From 1147436d60d19c882109915c417a45e383d52e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Wed, 29 Jan 2020 18:50:18 +0300 Subject: [PATCH 001/752] Raw add of direct dictionaries (doesnt work) --- dbms/src/Dictionaries/DirectDictionary.cpp | 582 ++++++++++++++++++ dbms/src/Dictionaries/DirectDictionary.h | 294 +++++++++ dbms/src/Dictionaries/DirectDictionary.inc.h | 406 ++++++++++++ .../src/Dictionaries/registerDictionaries.cpp | 3 + 4 files changed, 1285 insertions(+) create mode 100644 dbms/src/Dictionaries/DirectDictionary.cpp create mode 100644 dbms/src/Dictionaries/DirectDictionary.h create mode 100644 dbms/src/Dictionaries/DirectDictionary.inc.h diff --git a/dbms/src/Dictionaries/DirectDictionary.cpp b/dbms/src/Dictionaries/DirectDictionary.cpp new file mode 100644 index 00000000000..9e0a77ebc91 --- /dev/null +++ b/dbms/src/Dictionaries/DirectDictionary.cpp @@ -0,0 +1,582 @@ +#include "DirectDictionary.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "DirectDictionary.inc.h" +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" + + +/* + * + * TODO: CHANGE EVENTS TO DIRECT DICTIONARY EVENTS (WTF? WHERE R THEY DECLARED????) + * +*/ + +namespace ProfileEvents +{ + extern const Event DictCacheKeysRequested; + extern const Event DictCacheKeysRequestedMiss; + extern const Event DictCacheKeysRequestedFound; + extern const Event DictCacheKeysExpired; + extern const Event DictCacheKeysNotFound; + extern const Event DictCacheKeysHit; + extern const Event DictCacheRequestTimeNs; + extern const Event DictCacheRequests; + extern const Event DictCacheLockWriteNs; + extern const Event DictCacheLockReadNs; +} + +namespace CurrentMetrics +{ + extern const Metric DictCacheRequests; +} + + +namespace DB +{ + namespace ErrorCodes + { + extern const int TYPE_MISMATCH; + extern const int BAD_ARGUMENTS; + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int TOO_SMALL_BUFFER_SIZE; + } + + /* + * deleted inline size_t DirectDictionary::getCellIdx(const Key id) const + * + */ + + + DirectDictionary::DirectDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_ + ) + : name{name_} + , dict_struct(dict_struct_) + , source_ptr{std::move(source_ptr_)} + , dict_lifetime(dict_lifetime_) + , log(&Logger::get("ExternalDictionaries")) + , rnd_engine(randomSeed()) + { + if (!this->source_ptr->supportsSelectiveLoad()) + throw Exception{name + ": source cannot be used with DirectDictionary", ErrorCodes::UNSUPPORTED_METHOD}; + + createAttributes(); + } + + + void DirectDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray & out) const + { + const auto null_value = std::get(hierarchical_attribute->null_values); + + getItemsNumberImpl(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; }); + } + + +/// Allow to use single value in same way as array. + static inline DirectDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) + { + return arr[idx]; + } + static inline DirectDictionary::Key getAt(const DirectDictionary::Key & value, const size_t) + { + return value; + } + + + template + void DirectDictionary::isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const + { + /// Transform all children to parents until ancestor id or null_value will be reached. + + size_t out_size = out.size(); + memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated" + + const auto null_value = std::get(hierarchical_attribute->null_values); + + PaddedPODArray children(out_size, 0); + PaddedPODArray parents(child_ids.begin(), child_ids.end()); + + while (true) + { + size_t out_idx = 0; + size_t parents_idx = 0; + size_t new_children_idx = 0; + + while (out_idx < out_size) + { + /// Already calculated + if (out[out_idx] != 0xFF) + { + ++out_idx; + continue; + } + + /// No parent + if (parents[parents_idx] == null_value) + { + out[out_idx] = 0; + } + /// Found ancestor + else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx)) + { + out[out_idx] = 1; + } + /// Loop detected + else if (children[new_children_idx] == parents[parents_idx]) + { + out[out_idx] = 1; + } + /// Found intermediate parent, add this value to search at next loop iteration + else + { + children[new_children_idx] = parents[parents_idx]; + ++new_children_idx; + } + + ++out_idx; + ++parents_idx; + } + + if (new_children_idx == 0) + break; + + /// Transform all children to its parents. + children.resize(new_children_idx); + parents.resize(new_children_idx); + + toParent(children, parents); + } + } + + void DirectDictionary::isInVectorVector( + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const + { + isInImpl(child_ids, ancestor_ids, out); + } + + void DirectDictionary::isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const + { + isInImpl(child_ids, ancestor_id, out); + } + + void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const + { + /// Special case with single child value. + + const auto null_value = std::get(hierarchical_attribute->null_values); + + PaddedPODArray child(1, child_id); + PaddedPODArray parent(1); + std::vector ancestors(1, child_id); + + /// Iteratively find all ancestors for child. + while (true) + { + toParent(child, parent); + + if (parent[0] == null_value) + break; + + child[0] = parent[0]; + ancestors.push_back(parent[0]); + } + + /// Assuming short hierarchy, so linear search is Ok. + for (size_t i = 0, out_size = out.size(); i < out_size; ++i) + out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end(); + } + + void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const + { + auto & attribute = getAttribute(attribute_name); + checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + + const auto null_value = StringRef{std::get(attribute.null_values)}; + + getItemsString(attribute, ids, out, [&](const size_t) { return null_value; }); + } + + void DirectDictionary::getString( + const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const + { + auto & attribute = getAttribute(attribute_name); + checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + + getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); + } + + void DirectDictionary::getString( + const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const + { + auto & attribute = getAttribute(attribute_name); + checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + + getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); + } + + +/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag +/// true false found and valid +/// false true not found (something outdated, maybe our cell) +/// false false not found (other id stored with valid data) +/// true true impossible +/// +/// todo: split this func to two: find_for_get and find_for_set + DirectDictionary::FindResult DirectDictionary::findCellIdx(const Key & id, const CellMetadata::time_point_t now) const + { + auto pos = getCellIdx(id); + auto oldest_id = pos; + auto oldest_time = CellMetadata::time_point_t::max(); + const auto stop = pos + max_collision_length; + for (; pos < stop; ++pos) + { + const auto cell_idx = pos & size_overlap_mask; + const auto & cell = cells[cell_idx]; + + if (cell.id != id) + { + /// maybe we already found nearest expired cell (try minimize collision_length on insert) + if (oldest_time > now && oldest_time > cell.expiresAt()) + { + oldest_time = cell.expiresAt(); + oldest_id = cell_idx; + } + continue; + } + + if (cell.expiresAt() < now) + { + return {cell_idx, false, true}; + } + + return {cell_idx, true, false}; + } + + return {oldest_id, false, false}; + } + + + /* + * deleted most part of has, that stood for + * looking for a key in cache + * + * TODO: check whether we need last two arguments + * in update function (seems like no) + * + */ + + void DirectDictionary::has(const PaddedPODArray & ids, PaddedPODArray & out) const + { + std::vector required_ids(ids.size()); + std::copy(std::begin(ids), std::end(ids), std::begin(required_ids)); + + /// request new values + update( + required_ids, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + out[row] = true; + }, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + out[row] = false; + }); + } + + + void DirectDictionary::createAttributes() + { + const auto attributes_size = dict_struct.attributes.size(); + attributes.reserve(attributes_size); + + bytes_allocated += size * sizeof(CellMetadata); + bytes_allocated += attributes_size * sizeof(attributes.front()); + + for (const auto & attribute : dict_struct.attributes) + { + attribute_index_by_name.emplace(attribute.name, attributes.size()); + attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); + + if (attribute.hierarchical) + { + hierarchical_attribute = &attributes.back(); + + if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64) + throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH}; + } + } + } + + DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) + { + Attribute attr{type, {}, {}}; + + switch (type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + attr.null_values = TYPE(null_value.get>()); \ + attr.arrays = std::make_unique>(size); \ + bytes_allocated += size * sizeof(TYPE); \ + break; + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + case AttributeUnderlyingType::utString: + attr.null_values = null_value.get(); + attr.arrays = std::make_unique>(size); + bytes_allocated += size * sizeof(StringRef); + if (!string_arena) + string_arena = std::make_unique(); + break; + } + + return attr; + } + + void DirectDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const + { + switch (attribute.type) + { + case AttributeUnderlyingType::utUInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utUInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utUInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utUInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utUInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utFloat32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utFloat64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + + case AttributeUnderlyingType::utDecimal32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utDecimal64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::utDecimal128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + + case AttributeUnderlyingType::utString: + { + const auto & null_value_ref = std::get(attribute.null_values); + auto & string_ref = std::get>(attribute.arrays)[idx]; + + if (string_ref.data != null_value_ref.data()) + { + if (string_ref.data) + string_arena->free(const_cast(string_ref.data), string_ref.size); + + string_ref = StringRef{null_value_ref}; + } + + break; + } + } + } + + void DirectDictionary::setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const + { + switch (attribute.type) + { + case AttributeUnderlyingType::utUInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utUInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utUInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utUInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utUInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utFloat32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utFloat64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + + case AttributeUnderlyingType::utDecimal32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utDecimal64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::utDecimal128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + + case AttributeUnderlyingType::utString: + { + const auto & string = value.get(); + auto & string_ref = std::get>(attribute.arrays)[idx]; + const auto & null_value_ref = std::get(attribute.null_values); + + /// free memory unless it points to a null_value + if (string_ref.data && string_ref.data != null_value_ref.data()) + string_arena->free(const_cast(string_ref.data), string_ref.size); + + const auto str_size = string.size(); + if (str_size != 0) + { + auto string_ptr = string_arena->alloc(str_size + 1); + std::copy(string.data(), string.data() + str_size + 1, string_ptr); + string_ref = StringRef{string_ptr, str_size}; + } + else + string_ref = {}; + + break; + } + } + } + + DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const + { + const auto it = attribute_index_by_name.find(attribute_name); + if (it == std::end(attribute_index_by_name)) + throw Exception{name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS}; + + return attributes[it->second]; + } + + /* + * I've deleted: + * bool CacheDictionary::isEmptyCell(const UInt64 idx) const + * and + * PaddedPODArray CacheDictionary::getCachedIds() const + */ + + BlockInputStreamPtr DirectDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const + { + using BlockInputStreamType = DictionaryBlockInputStream; + + /* deleted pre-last argument getCachedIds() from this return (will something break then?) */ + return std::make_shared(shared_from_this(), max_block_size, column_names); + } + + std::exception_ptr DirectDictionary::getLastException() const + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + return last_exception; + } + + void registerDictionaryDirect(DictionaryFactory & factory) + { + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr + { + if (dict_struct.key) + throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception{name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const auto & layout_prefix = config_prefix + ".layout"; + + /* + * + * seems like this stands only for cache dictionaries + * + const auto size = config.getInt(layout_prefix + ".cache.size_in_cells"); + if (size == 0) + throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + */ + + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + if (require_nonempty) + throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + + /* deleted last argument (size) in this return */ + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + }; + factory.registerLayout("direct", create_layout, false); + } + + +} diff --git a/dbms/src/Dictionaries/DirectDictionary.h b/dbms/src/Dictionaries/DirectDictionary.h new file mode 100644 index 00000000000..1431adccf02 --- /dev/null +++ b/dbms/src/Dictionaries/DirectDictionary.h @@ -0,0 +1,294 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" + + +namespace DB +{ + class DirectDictionary final : public IDictionary + { + public: + /* Removed last argument (size_) */ + DirectDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_ + ); + + std::string getName() const override { return name; } + + std::string getTypeName() const override { return "Cache"; } + + size_t getBytesAllocated() const override { return bytes_allocated + (string_arena ? string_arena->size() : 0); } + + size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } + + double getHitRate() const override + { + return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); + } + + size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } + + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } + + bool isCached() const override { return false; } + + std::shared_ptr clone() const override + { + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, size); + } + + const IDictionarySource * getSource() const override { return source_ptr.get(); } + + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + + const DictionaryStructure & getStructure() const override { return dict_struct; } + + bool isInjective(const std::string & attribute_name) const override + { + return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; + } + + bool hasHierarchy() const override { return hierarchical_attribute; } + + void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; + + void isInVectorVector( + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; + void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + + std::exception_ptr getLastException() const override; + + template + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + +#define DECLARE(TYPE) \ + void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void + getString(const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) + const; + +#define DECLARE(TYPE) \ + void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; + + void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; + + BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; + + private: + template + using ContainerType = Value[]; + template + using ContainerPtrType = std::unique_ptr>; + + struct CellMetadata final + { + using time_point_t = std::chrono::system_clock::time_point; + using time_point_rep_t = time_point_t::rep; + using time_point_urep_t = std::make_unsigned_t; + + static constexpr UInt64 EXPIRES_AT_MASK = std::numeric_limits::max(); + static constexpr UInt64 IS_DEFAULT_MASK = ~EXPIRES_AT_MASK; + + UInt64 id; + /// Stores both expiration time and `is_default` flag in the most significant bit + time_point_urep_t data; + + /// Sets expiration time, resets `is_default` flag to false + time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } + void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } + + bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } + void setDefault() { data |= IS_DEFAULT_MASK; } + }; + + struct Attribute final + { + AttributeUnderlyingType type; + std::variant< + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; + std::variant< + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType> + arrays; + }; + + void createAttributes(); + + Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + + template + void getItemsNumberImpl( + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; + + template + void getItemsString(Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; + + template + void update(const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; + + PaddedPODArray getCachedIds() const; + + bool isEmptyCell(const UInt64 idx) const; + + size_t getCellIdx(const Key id) const; + + void setDefaultAttributeValue(Attribute & attribute, const Key idx) const; + + void setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const; + + Attribute & getAttribute(const std::string & attribute_name) const; + + struct FindResult + { + const size_t cell_idx; + const bool valid; + const bool outdated; + }; + + FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; + + template + void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; + + const std::string name; + const DictionaryStructure dict_struct; + mutable DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; + Logger * const log; + + mutable std::shared_mutex rw_lock; + + /// Actual size will be increased to match power of 2 + const size_t size; + + /// all bits to 1 mask (size - 1) (0b1000 - 1 = 0b111) + const size_t size_overlap_mask; + + /// Max tries to find cell, overlaped with mask: if size = 16 and start_cell=10: will try cells: 10,11,12,13,14,15,0,1,2,3 + static constexpr size_t max_collision_length = 10; + + const size_t zero_cell_idx{getCellIdx(0)}; + std::map attribute_index_by_name; + mutable std::vector attributes; + mutable std::vector cells; + Attribute * hierarchical_attribute = nullptr; + std::unique_ptr string_arena; + + mutable std::exception_ptr last_exception; + mutable size_t error_count = 0; + mutable std::chrono::system_clock::time_point backoff_end_time; + + mutable pcg64 rnd_engine; + + mutable size_t bytes_allocated = 0; + mutable std::atomic element_count{0}; + mutable std::atomic hit_count{0}; + mutable std::atomic query_count{0}; + }; + +} diff --git a/dbms/src/Dictionaries/DirectDictionary.inc.h b/dbms/src/Dictionaries/DirectDictionary.inc.h new file mode 100644 index 00000000000..68010b0fe19 --- /dev/null +++ b/dbms/src/Dictionaries/DirectDictionary.inc.h @@ -0,0 +1,406 @@ +#include "CacheDictionary.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event DictCacheKeysRequested; + extern const Event DictCacheKeysRequestedMiss; + extern const Event DictCacheKeysRequestedFound; + extern const Event DictCacheKeysExpired; + extern const Event DictCacheKeysNotFound; + extern const Event DictCacheKeysHit; + extern const Event DictCacheRequestTimeNs; + extern const Event DictCacheRequests; + extern const Event DictCacheLockWriteNs; + extern const Event DictCacheLockReadNs; +} + +namespace CurrentMetrics +{ + extern const Metric DictCacheRequests; +} + +namespace DB +{ + namespace ErrorCodes + { + extern const int TYPE_MISMATCH; + } + + template + void CacheDictionary::getItemsNumberImpl( + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const + { + /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } + std::unordered_map> outdated_ids; + auto & attribute_array = std::get>(attribute.arrays); + const auto rows = ext::size(ids); + + size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; + + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + const auto now = std::chrono::system_clock::now(); + /// fetch up-to-date values, decide which ones require update + for (const auto row : ext::range(0, rows)) + { + const auto id = ids[row]; + + /** cell should be updated if either: + * 1. ids do not match, + * 2. cell has expired, + * 3. explicit defaults were specified and cell was set default. */ + + const auto find_result = findCellIdx(id, now); + if (!find_result.valid) + { + outdated_ids[id].push_back(row); + if (find_result.outdated) + ++cache_expired; + else + ++cache_not_found; + } + else + { + ++cache_hit; + const auto & cell_idx = find_result.cell_idx; + const auto & cell = cells[cell_idx]; + out[row] = cell.isDefault() ? get_default(row) : static_cast(attribute_array[cell_idx]); + } + } + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); + ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); + ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); + + query_count.fetch_add(rows, std::memory_order_relaxed); + hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); + + if (outdated_ids.empty()) + return; + + std::vector required_ids(outdated_ids.size()); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); + + /// request new values + update( + required_ids, + [&](const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; + + for (const size_t row : outdated_ids[id]) + out[row] = static_cast(attribute_value); + }, + [&](const auto id, const auto) + { + for (const size_t row : outdated_ids[id]) + out[row] = get_default(row); + }); + } + + template + void CacheDictionary::getItemsString( + Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const + { + const auto rows = ext::size(ids); + + /// save on some allocations + out->getOffsets().reserve(rows); + + auto & attribute_array = std::get>(attribute.arrays); + + auto found_outdated_values = false; + + /// perform optimistic version, fallback to pessimistic if failed + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + const auto now = std::chrono::system_clock::now(); + /// fetch up-to-date values, discard on fail + for (const auto row : ext::range(0, rows)) + { + const auto id = ids[row]; + + const auto find_result = findCellIdx(id, now); + if (!find_result.valid) + { + found_outdated_values = true; + break; + } + else + { + const auto & cell_idx = find_result.cell_idx; + const auto & cell = cells[cell_idx]; + const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; + out->insertData(string_ref.data, string_ref.size); + } + } + } + + /// optimistic code completed successfully + if (!found_outdated_values) + { + query_count.fetch_add(rows, std::memory_order_relaxed); + hit_count.fetch_add(rows, std::memory_order_release); + return; + } + + /// now onto the pessimistic one, discard possible partial results from the optimistic path + out->getChars().resize_assume_reserved(0); + out->getOffsets().resize_assume_reserved(0); + + /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } + std::unordered_map> outdated_ids; + /// we are going to store every string separately + std::unordered_map map; + + size_t total_length = 0; + size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; + { + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + const auto now = std::chrono::system_clock::now(); + for (const auto row : ext::range(0, ids.size())) + { + const auto id = ids[row]; + + const auto find_result = findCellIdx(id, now); + if (!find_result.valid) + { + outdated_ids[id].push_back(row); + if (find_result.outdated) + ++cache_expired; + else + ++cache_not_found; + } + else + { + ++cache_hit; + const auto & cell_idx = find_result.cell_idx; + const auto & cell = cells[cell_idx]; + const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; + + if (!cell.isDefault()) + map[id] = String{string_ref}; + + total_length += string_ref.size + 1; + } + } + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); + ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); + ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); + + query_count.fetch_add(rows, std::memory_order_relaxed); + hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); + + /// request new values + if (!outdated_ids.empty()) + { + std::vector required_ids(outdated_ids.size()); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); + + update( + required_ids, + [&](const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; + + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * outdated_ids[id].size(); + }, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + total_length += get_default(row).size + 1; + }); + } + + out->getChars().reserve(total_length); + + for (const auto row : ext::range(0, ext::size(ids))) + { + const auto id = ids[row]; + const auto it = map.find(id); + + const auto string_ref = it != std::end(map) ? StringRef{it->second} : get_default(row); + out->insertData(string_ref.data, string_ref.size); + } + } + + template + void CacheDictionary::update( + const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const + { + CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); + + std::unordered_map remaining_ids{requested_ids.size()}; + for (const auto id : requested_ids) + remaining_ids.insert({id, 0}); + + const auto now = std::chrono::system_clock::now(); + + const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + if (now > backoff_end_time) + { + try + { + if (error_count) + { + /// Recover after error: we have to clone the source here because + /// it could keep connections which should be reset after error. + source_ptr = source_ptr->clone(); + } + + Stopwatch watch; + auto stream = source_ptr->loadIds(requested_ids); + stream->readPrefix(); + + while (const auto block = stream->read()) + { + const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); + if (!id_column) + throw Exception{name + ": id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; + + const auto & ids = id_column->getData(); + + /// cache column pointers + const auto column_ptrs = ext::map( + ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); + + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + + const auto find_result = findCellIdx(id, now); + const auto & cell_idx = find_result.cell_idx; + + auto & cell = cells[cell_idx]; + + for (const auto attribute_idx : ext::range(0, attributes.size())) + { + const auto & attribute_column = *column_ptrs[attribute_idx]; + auto & attribute = attributes[attribute_idx]; + + setAttributeValue(attribute, cell_idx, attribute_column[i]); + } + + /// if cell id is zero and zero does not map to this cell, then the cell is unused + if (cell.id == 0 && cell_idx != zero_cell_idx) + element_count.fetch_add(1, std::memory_order_relaxed); + + cell.id = id; + if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) + { + std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; + cell.setExpiresAt(now + std::chrono::seconds{distribution(rnd_engine)}); + } + else + cell.setExpiresAt(std::chrono::time_point::max()); + + /// inform caller + on_cell_updated(id, cell_idx); + /// mark corresponding id as found + remaining_ids[id] = 1; + } + } + + stream->readSuffix(); + + error_count = 0; + last_exception = std::exception_ptr{}; + backoff_end_time = std::chrono::system_clock::time_point{}; + + ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); + } + catch (...) + { + ++error_count; + last_exception = std::current_exception(); + backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); + + tryLogException(last_exception, log, "Could not update cache dictionary '" + getName() + + "', next update is scheduled at " + DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(backoff_end_time))); + } + } + + size_t not_found_num = 0, found_num = 0; + + /// Check which ids have not been found and require setting null_value + for (const auto & id_found_pair : remaining_ids) + { + if (id_found_pair.second) + { + ++found_num; + continue; + } + ++not_found_num; + + const auto id = id_found_pair.first; + + const auto find_result = findCellIdx(id, now); + const auto & cell_idx = find_result.cell_idx; + auto & cell = cells[cell_idx]; + + if (error_count) + { + if (find_result.outdated) + { + /// We have expired data for that `id` so we can continue using it. + bool was_default = cell.isDefault(); + cell.setExpiresAt(backoff_end_time); + if (was_default) + cell.setDefault(); + if (was_default) + on_id_not_found(id, cell_idx); + else + on_cell_updated(id, cell_idx); + continue; + } + /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. + std::rethrow_exception(last_exception); + } + + /// Check if cell had not been occupied before and increment element counter if it hadn't + if (cell.id == 0 && cell_idx != zero_cell_idx) + element_count.fetch_add(1, std::memory_order_relaxed); + + cell.id = id; + + if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) + { + std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; + cell.setExpiresAt(now + std::chrono::seconds{distribution(rnd_engine)}); + } + else + cell.setExpiresAt(std::chrono::time_point::max()); + + /// Set null_value for each attribute + cell.setDefault(); + for (auto & attribute : attributes) + setDefaultAttributeValue(attribute, cell_idx); + + /// inform caller that the cell has not been found + on_id_not_found(id, cell_idx); + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); + ProfileEvents::increment(ProfileEvents::DictCacheRequests); + } + +} diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp index ee320d7177b..44cb1f2858f 100644 --- a/dbms/src/Dictionaries/registerDictionaries.cpp +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -21,6 +21,7 @@ void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); +void registerDictionaryDirect(DictionaryFactory & factory); void registerDictionaries() @@ -48,6 +49,8 @@ void registerDictionaries() registerDictionaryFlat(factory); registerDictionaryHashed(factory); registerDictionaryCache(factory); + /// added new dictionary type (i bet i wont forget to remove this comment) + registerDictionaryDirect(factory); } } From 783a898b9dac0f09e609ffbdd47b50109a7171e8 Mon Sep 17 00:00:00 2001 From: Avogar Date: Thu, 12 Mar 2020 20:19:40 +0300 Subject: [PATCH 002/752] Add MsgPackRowOutputFormat. --- dbms/programs/server/metadata/test_n2zcw0.sql | 2 + .../server/metadata/test_n2zcw0/t.sql | 8 + dbms/src/Formats/FormatFactory.cpp | 1 + dbms/src/Formats/FormatFactory.h | 4 +- .../Formats/Impl/MsgPackRowOutputFormat.cpp | 164 ++++++++++++++++++ .../Formats/Impl/MsgPackRowOutputFormat.h | 29 ++++ 6 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 dbms/programs/server/metadata/test_n2zcw0.sql create mode 100644 dbms/programs/server/metadata/test_n2zcw0/t.sql create mode 100644 dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp create mode 100644 dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h diff --git a/dbms/programs/server/metadata/test_n2zcw0.sql b/dbms/programs/server/metadata/test_n2zcw0.sql new file mode 100644 index 00000000000..80046cd585c --- /dev/null +++ b/dbms/programs/server/metadata/test_n2zcw0.sql @@ -0,0 +1,2 @@ +ATTACH DATABASE test_n2zcw0 +ENGINE = Ordinary diff --git a/dbms/programs/server/metadata/test_n2zcw0/t.sql b/dbms/programs/server/metadata/test_n2zcw0/t.sql new file mode 100644 index 00000000000..64e3abcfe34 --- /dev/null +++ b/dbms/programs/server/metadata/test_n2zcw0/t.sql @@ -0,0 +1,8 @@ +ATTACH TABLE t +( + `a` Int, + `b` Int +) +ENGINE = MergeTree +ORDER BY (a, b) +SETTINGS index_granularity = 400 diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index a8e27054704..8b6034dad9d 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -352,6 +352,7 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorAvro(*this); registerInputFormatProcessorTemplate(*this); registerOutputFormatProcessorTemplate(*this); + registerOutputFormatProcessorMsgPack(*this); registerFileSegmentationEngineTabSeparated(*this); registerFileSegmentationEngineCSV(*this); diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index 7c18971e0eb..68ba2155642 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -171,7 +171,9 @@ void registerOutputFormatProcessorProtobuf(FormatFactory & factory); void registerInputFormatProcessorAvro(FormatFactory & factory); void registerOutputFormatProcessorAvro(FormatFactory & factory); void registerInputFormatProcessorTemplate(FormatFactory & factory); -void registerOutputFormatProcessorTemplate(FormatFactory &factory); +void registerOutputFormatProcessorTemplate(FormatFactory & factory); +void registerOutputFormatProcessorMsgPack(FormatFactory &factory); + /// File Segmentation Engines for parallel reading diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp new file mode 100644 index 00000000000..061f4228158 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -0,0 +1,164 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_) + : IRowOutputFormat(header_, out_, callback), settings(settings_), packer(out_) {} + +void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num) +{ + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + { + packer.pack_uint8(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::UInt16: + { + packer.pack_uint16(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::UInt32: + { + packer.pack_uint32(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::UInt64: + { + packer.pack_uint64(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Int8: + { + packer.pack_int8(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Int16: + { + packer.pack_int16(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Int32: + { + packer.pack_int32(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Int64: + { + packer.pack_int64(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Float32: + { + packer.pack_float(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Float64: + { + packer.pack_double(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::Date: + { + packer.pack_uint16(assert_cast(column).getElement(row_num)); + return; + } + case TypeIndex::DateTime: + { + UInt32 datetime = assert_cast(column).getElement(row_num); + // Timestamp extension type in MsgPack is -1. + packer.pack_ext(sizeof(datetime), -1); + packer.pack_ext_body(reinterpret_cast(&datetime), sizeof(datetime)); + return; + } + case TypeIndex::String: + { + const StringRef & string = assert_cast(column).getDataAt(row_num); + packer.pack_str(string.size); + packer.pack_str_body(string.data, string.size); + return; + } + case TypeIndex::FixedString: + { + const StringRef & string = assert_cast(column).getDataAt(row_num); + packer.pack_str(string.size); + packer.pack_str_body(string.data, string.size); + return; + } + case TypeIndex::Array: + { + auto nested_type = assert_cast(*data_type).getNestedType(); + const ColumnArray & column_array = assert_cast(column); + const IColumn & nested_column = column_array.getData(); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + size_t offset = offsets[row_num - 1]; + size_t size = offsets[row_num] - offset; + packer.pack_array(size); + for (size_t i = 0; i < size; ++i) + { + serializeField(nested_column, nested_type, offset + i); + } + return; + } + case TypeIndex::Nullable: + { + auto nested_type = removeNullable(data_type); + const ColumnNullable & column_nullable = assert_cast(column); + if (!column_nullable.isNullAt(row_num)) + serializeField(column_nullable.getNestedColumn(), nested_type, row_num); + else + packer.pack_nil(); + return; + } + case TypeIndex::Nothing: + { + packer.pack_nil(); + return; + } + default: + break; + } + throw Exception("Type " + data_type->getName() + " is not supported for MsgPack output format", ErrorCodes::ILLEGAL_COLUMN); +} + +void MsgPackRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + size_t num_columns = columns.size(); + for (size_t i = 0; i < num_columns; ++i) + { + serializeField(*columns[i], types[i], row_num); + } +} + +void registerOutputFormatProcessorMsgPack(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("MsgPack", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback callback, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, callback, settings); + }); +} + +} \ No newline at end of file diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h new file mode 100644 index 00000000000..20df018b60c --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +class MsgPackRowOutputFormat : public IRowOutputFormat +{ +public: + MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_); + + String getName() const override { return "MsgPackRowOutputFormat"; } + + void write(const Columns & columns, size_t row_num) override; + void writeField(const IColumn &, const IDataType &, size_t) override {} + void serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num); + +private: + FormatSettings settings; + msgpack::packer packer; +}; + +} From e6b10b81f018f68af28edc7366ed0d238cb36339 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 26 Mar 2020 17:10:47 +0300 Subject: [PATCH 003/752] Refactor MergingSortedTransform. Add IMergingTransform. --- dbms/CMakeLists.txt | 1 + .../Interpreters/InterpreterSelectQuery.cpp | 2 +- .../Processors/Merges/IMergingTransform.cpp | 211 +++++++++++++++++ .../IMergingTransform.h} | 91 +++----- .../MergingSortedTransform.cpp | 213 ++++-------------- .../Merges/MergingSortedTransform.h | 62 +++++ .../Transforms/MergeSortingTransform.cpp | 2 +- ...ocessors_test_merging_sorted_transform.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- 9 files changed, 352 insertions(+), 234 deletions(-) create mode 100644 dbms/src/Processors/Merges/IMergingTransform.cpp rename dbms/src/Processors/{Transforms/MergingSortedTransform.h => Merges/IMergingTransform.h} (53%) rename dbms/src/Processors/{Transforms => Merges}/MergingSortedTransform.cpp (50%) create mode 100644 dbms/src/Processors/Merges/MergingSortedTransform.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index b254c986511..c121ed00dd1 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -299,6 +299,7 @@ add_object_library(clickhouse_processors_formats src/Processors/Formats) add_object_library(clickhouse_processors_formats_impl src/Processors/Formats/Impl) add_object_library(clickhouse_processors_transforms src/Processors/Transforms) add_object_library(clickhouse_processors_sources src/Processors/Sources) +add_object_library(clickhouse_processors_sources src/Processors/Merges) if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 4fe83afa48d..f3100dce19c 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -84,7 +84,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp new file mode 100644 index 00000000000..d22acb71521 --- /dev/null +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -0,0 +1,211 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + +IMergingTransform::IMergingTransform( + size_t num_inputs, const Block & input_header, const Block & output_header, bool have_all_inputs_) + : IProcessor(InputPorts(num_inputs, input_header), {output_header}) + , merged_data(output_header), have_all_inputs(have_all_inputs_) +{ +} + +void IMergingTransform::onNewInput() +{ + throw Exception("onNewInput is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + +void IMergingTransform::addInput() +{ + if (have_all_inputs) + throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + + inputs.emplace_back(outputs.front().getHeader(), this); + onNewInput(); +} + +void IMergingTransform::setHaveAllInputs() +{ + if (have_all_inputs) + throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + + have_all_inputs = true; +} + +void IMergingTransform::requestDataForInput(size_t input_number) +{ + if (!need_data) + throw Exception("Data was requested for several inputs in IMergingTransform:" + " " + std::to_string(next_input_to_read) + " and " + std::to_string(input_number), + ErrorCodes::LOGICAL_ERROR); + + need_data = true; + next_input_to_read = input_number; +} + +IProcessor::Status IMergingTransform::prepareSingleInput() +{ + auto & input = inputs.front(); + auto & output = outputs.front(); + + if (input.isFinished()) + { + output.finish(); + return Status::Finished; + } + + input.setNeeded(); + + if (input.hasData()) + { + if (output.canPush()) + output.push(input.pull()); + + return Status::PortFull; + } + + return Status::NeedData; +} + +IProcessor::Status IMergingTransform::prepareInitializeInputs() +{ + /// Add information about inputs. + if (input_states.empty()) + { + input_states.reserve(inputs.size()); + for (auto & input : inputs) + input_states.emplace_back(input); + } + + /// Check for inputs we need. + bool all_inputs_has_data = true; + auto it = inputs.begin(); + for (size_t i = 0; it != inputs.end(); ++i, ++it) + { + auto & input = *it; + if (input.isFinished()) + continue; + + if (!input_states[i].is_initialized) + { + // input.setNotNeeded(); + continue; + } + + input.setNeeded(); + + if (!input.hasData()) + { + all_inputs_has_data = false; + continue; + } + + auto chunk = input.pull(); + if (!chunk.hasRows()) + { + + if (!input.isFinished()) + all_inputs_has_data = false; + + continue; + } + + consume(std::move(chunk), i); + input_states[i].is_initialized = true; + } + + if (!all_inputs_has_data) + return Status::NeedData; + + initializeInputs(); + + is_initialized = true; + return Status::Ready; +} + + +IProcessor::Status IMergingTransform::prepare() +{ + if (!have_all_inputs) + return Status::NeedData; + + auto & output = outputs.front(); + + /// Special case for no inputs. + if (inputs.empty()) + { + output.finish(); + return Status::Finished; + } + + /// Check can output. + + if (output.isFinished()) + { + for (auto & in : inputs) + in.close(); + + return Status::Finished; + } + + /// Special case for single input. + if (inputs.size() == 1) + return prepareSingleInput(); + + /// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before. + bool is_port_full = !output.canPush(); + + /// Push if has data. + if (merged_data.mergedRows() && !is_port_full) + output.push(merged_data.pull()); + + if (!is_initialized) + return prepareInitializeInputs(); + + if (is_finished) + { + + if (is_port_full) + return Status::PortFull; + + for (auto & input : inputs) + input.close(); + + outputs.front().finish(); + + return Status::Finished; + } + + if (need_data) + { + auto & input = input_states[next_input_to_read].port; + if (!input.isFinished()) + { + input.setNeeded(); + + if (!input.hasData()) + return Status::NeedData; + + auto chunk = input.pull(); + if (!chunk.hasRows() && !input.isFinished()) + return Status::NeedData; + + consume(std::move(chunk), next_input_to_read); + } + + need_data = false; + } + + if (is_port_full) + return Status::PortFull; + + return Status::Ready; +} + +} diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h similarity index 53% rename from dbms/src/Processors/Transforms/MergingSortedTransform.h rename to dbms/src/Processors/Merges/IMergingTransform.h index 914ad543c75..5c5a7bad1f7 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -1,38 +1,34 @@ #pragma once #include -#include -#include - namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} -class MergingSortedTransform : public IProcessor +/// Base class for merging transforms. +class IMergingTransform : public IProcessor { public: - MergingSortedTransform( - const Block & header, - size_t num_inputs, - const SortDescription & description_, - size_t max_block_size, - UInt64 limit = 0, - bool quiet = false, - bool have_all_inputs = true); - - String getName() const override { return "MergingSortedTransform"; } - Status prepare() override; - void work() override; + IMergingTransform(size_t num_inputs, const Block & input_header, const Block & output_header, bool have_all_inputs); + /// Methods to add additional input port. It is possible to do only before the first call of `prepare`. void addInput(); + /// Need to be called after all inputs are added. (only if have_all_inputs was not specified). void setHaveAllInputs(); + Status prepare() override; + protected: + virtual void onNewInput(); /// Is called when new input is added. To initialize input's data. + virtual void initializeInputs() = 0; /// Is called after first chunk was read for every input. + virtual void consume(Chunk chunk, size_t input_number) = 0; /// Is called after chunk was consumed from input. + + void requestDataForInput(size_t input_number); /// Call it to say that next chunk of data is required for input. + void finish() { is_finished = true; } /// Call it when all data was inserted to merged_data. + + /// Struct which represents current merging chunk of data. + /// Also it calculates the number of merged rows. class MergedData { public: @@ -96,65 +92,30 @@ protected: MutableColumns columns; }; - /// Settings - SortDescription description; - const size_t max_block_size; - UInt64 limit; - bool has_collation = false; - bool quiet = false; - - std::atomic have_all_inputs; - MergedData merged_data; - /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) - /// If it is not nullptr then it should be populated during execution - WriteBuffer * out_row_sources_buf = nullptr; - - /// Chunks currently being merged. - std::vector source_chunks; - - SortCursorImpls cursors; - - SortingHeap queue_without_collation; - SortingHeap queue_with_collation; - private: - /// Processor state. bool is_initialized = false; bool is_finished = false; + bool need_data = false; size_t next_input_to_read = 0; - template - void merge(TSortingHeap & queue); + std::atomic have_all_inputs; - void insertFromChunk(size_t source_num); - - void updateCursor(Chunk chunk, size_t source_num) + struct InputState { - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); + explicit InputState(InputPort & port_) : port(port_) {} - chunk.setColumns(std::move(columns), num_rows); + InputPort & port; + bool is_initialized = false; + }; - auto & source_chunk = source_chunks[source_num]; + std::vector input_states; - if (source_chunk.empty()) - { - source_chunk = std::move(chunk); - cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); - has_collation |= cursors[source_num].has_collation; - } - else - { - source_chunk = std::move(chunk); - cursors[source_num].reset(source_chunk.getColumns(), {}); - } - } + Status prepareSingleInput(); + Status prepareInitializeInputs(); }; } diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp similarity index 50% rename from dbms/src/Processors/Transforms/MergingSortedTransform.cpp rename to dbms/src/Processors/Merges/MergingSortedTransform.cpp index b9e74277023..c259ce05d76 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -1,10 +1,11 @@ -#include +#include #include #include #include namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -13,15 +14,14 @@ namespace ErrorCodes MergingSortedTransform::MergingSortedTransform( const Block & header, size_t num_inputs, - const SortDescription & description_, + SortDescription description_, size_t max_block_size_, UInt64 limit_, bool quiet_, bool have_all_inputs_) - : IProcessor(InputPorts(num_inputs, header), {header}) - , description(description_), max_block_size(max_block_size_), limit(limit_), quiet(quiet_) - , have_all_inputs(have_all_inputs_) - , merged_data(header), source_chunks(num_inputs), cursors(num_inputs) + : IMergingTransform(num_inputs, header, header, have_all_inputs_) + , description(std::move(description_)), max_block_size(max_block_size_), limit(limit_), quiet(quiet_) + , source_chunks(num_inputs), cursors(num_inputs) { auto & sample = outputs.front().getHeader(); /// Replace column names in description to positions. @@ -36,172 +36,56 @@ MergingSortedTransform::MergingSortedTransform( } } -void MergingSortedTransform::addInput() +void MergingSortedTransform::onNewInput() { - if (have_all_inputs) - throw Exception("MergingSortedTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); - - inputs.emplace_back(outputs.front().getHeader(), this); source_chunks.emplace_back(); cursors.emplace_back(); } -void MergingSortedTransform::setHaveAllInputs() +void MergingSortedTransform::initializeInputs() { - if (have_all_inputs) - throw Exception("MergingSortedTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + if (has_collation) + queue_with_collation = SortingHeap(cursors); + else + queue_without_collation = SortingHeap(cursors); - have_all_inputs = true; + is_queue_initialized = true; } -IProcessor::Status MergingSortedTransform::prepare() +void MergingSortedTransform::consume(Chunk chunk, size_t input_number) { - if (!have_all_inputs) - return Status::NeedData; + updateCursor(std::move(chunk), input_number); - auto & output = outputs.front(); - - /// Special case for no inputs. - if (inputs.empty()) + if (is_queue_initialized) { - output.finish(); - return Status::Finished; - } - - /// Check can output. - - if (output.isFinished()) - { - for (auto & in : inputs) - in.close(); - - return Status::Finished; - } - - /// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before. - bool is_port_full = !output.canPush(); - - /// Special case for single input. - if (inputs.size() == 1) - { - auto & input = inputs.front(); - if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } - - input.setNeeded(); - - if (input.hasData()) - { - if (!is_port_full) - output.push(input.pull()); - - return Status::PortFull; - } - - return Status::NeedData; - } - - /// Push if has data. - if (merged_data.mergedRows() && !is_port_full) - output.push(merged_data.pull()); - - if (!is_initialized) - { - /// Check for inputs we need. - bool all_inputs_has_data = true; - auto it = inputs.begin(); - for (size_t i = 0; it != inputs.end(); ++i, ++it) - { - auto & input = *it; - if (input.isFinished()) - continue; - - if (!cursors[i].empty()) - { - // input.setNotNeeded(); - continue; - } - - input.setNeeded(); - - if (!input.hasData()) - { - all_inputs_has_data = false; - continue; - } - - auto chunk = input.pull(); - if (!chunk.hasRows()) - { - - if (!input.isFinished()) - all_inputs_has_data = false; - - continue; - } - - updateCursor(std::move(chunk), i); - } - - if (!all_inputs_has_data) - return Status::NeedData; - if (has_collation) - queue_with_collation = SortingHeap(cursors); + queue_with_collation.push(cursors[input_number]); else - queue_without_collation = SortingHeap(cursors); + queue_without_collation.push(cursors[input_number]); + } +} - is_initialized = true; - return Status::Ready; +void MergingSortedTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk.empty()) + { + source_chunk = std::move(chunk); + cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); + has_collation |= cursors[source_num].has_collation; } else { - if (is_finished) - { - - if (is_port_full) - return Status::PortFull; - - for (auto & input : inputs) - input.close(); - - outputs.front().finish(); - - return Status::Finished; - } - - if (need_data) - { - auto & input = *std::next(inputs.begin(), next_input_to_read); - if (!input.isFinished()) - { - input.setNeeded(); - - if (!input.hasData()) - return Status::NeedData; - - auto chunk = input.pull(); - if (!chunk.hasRows() && !input.isFinished()) - return Status::NeedData; - - updateCursor(std::move(chunk), next_input_to_read); - - if (has_collation) - queue_with_collation.push(cursors[next_input_to_read]); - else - queue_without_collation.push(cursors[next_input_to_read]); - } - - need_data = false; - } - - if (is_port_full) - return Status::PortFull; - - return Status::Ready; + source_chunk = std::move(chunk); + cursors[source_num].reset(source_chunk.getColumns(), {}); } } @@ -222,7 +106,7 @@ void MergingSortedTransform::merge(TSortingHeap & queue) if (limit && merged_data.totalMergedRows() >= limit) { //std::cerr << "Limit reached\n"; - is_finished = true; + finish(); return false; } @@ -274,7 +158,7 @@ void MergingSortedTransform::merge(TSortingHeap & queue) if (!current->isLast()) { -// std::cerr << "moving to next row\n"; + //std::cerr << "moving to next row\n"; queue.next(); } else @@ -282,17 +166,17 @@ void MergingSortedTransform::merge(TSortingHeap & queue) /// We will get the next block from the corresponding source, if there is one. queue.removeTop(); -// std::cerr << "It was last row, fetching next block\n"; - need_data = true; - next_input_to_read = current.impl->order; + //std::cerr << "It was last row, fetching next block\n"; + requestDataForInput(current.impl->order); if (limit && merged_data.totalMergedRows() >= limit) - is_finished = true; + finish(); return; } } - is_finished = true; + + finish(); } void MergingSortedTransform::insertFromChunk(size_t source_num) @@ -309,14 +193,14 @@ void MergingSortedTransform::insertFromChunk(size_t source_num) { num_rows = total_merged_rows_after_insertion - limit; merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); - is_finished = true; + finish(); } else { merged_data.insertFromChunk(std::move(source_chunks[source_num]), 0); - need_data = true; - next_input_to_read = source_num; + requestDataForInput(source_num); } + source_chunks[source_num] = Chunk(); if (out_row_sources_buf) @@ -327,5 +211,4 @@ void MergingSortedTransform::insertFromChunk(size_t source_num) } } - } diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.h b/dbms/src/Processors/Merges/MergingSortedTransform.h new file mode 100644 index 00000000000..1fac794b7d0 --- /dev/null +++ b/dbms/src/Processors/Merges/MergingSortedTransform.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Merges several sorted inputs into one sorted output. +class MergingSortedTransform : public IMergingTransform +{ +public: + MergingSortedTransform( + const Block & header, + size_t num_inputs, + SortDescription description, + size_t max_block_size, + UInt64 limit = 0, + bool quiet = false, + bool have_all_inputs = true); + + String getName() const override { return "MergingSortedTransform"; } + void work() override; + +protected: + + void onNewInput() override; + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + + /// Settings + SortDescription description; + const size_t max_block_size; + UInt64 limit; + bool has_collation = false; + bool quiet = false; + + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) + /// If it is not nullptr then it should be populated during execution + WriteBuffer * out_row_sources_buf = nullptr; + + /// Chunks currently being merged. + std::vector source_chunks; + + SortCursorImpls cursors; + + SortingHeap queue_without_collation; + SortingHeap queue_with_collation; + bool is_queue_initialized = false; + +private: + + template + void merge(TSortingHeap & queue); + + void insertFromChunk(size_t source_num); + void updateCursor(Chunk chunk, size_t source_num); +}; + +} diff --git a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp index 060d860b0b5..7c447a24b8e 100644 --- a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp b/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp index 71d282585fd..477626d165d 100644 --- a/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp +++ b/dbms/src/Processors/tests/processors_test_merging_sorted_transform.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 629a2b2cc18..10db375a6cf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -58,7 +58,7 @@ namespace std #include #include #include -#include +#include #include #include From b02636f9163f163a15e99d12084636d8e5cea9ee Mon Sep 17 00:00:00 2001 From: Avogar Date: Thu, 26 Mar 2020 19:33:00 +0300 Subject: [PATCH 004/752] Add MsgPackRowInputFormat, msgpack-c contrib and tests. --- .gitmodules | 3 + CMakeLists.txt | 1 + cmake/find/msgpack.cmake | 2 + contrib/msgpack-c | 1 + dbms/CMakeLists.txt | 2 + dbms/src/Formats/FormatFactory.cpp | 1 + dbms/src/Formats/FormatFactory.h | 3 +- .../Formats/Impl/MsgPackRowInputFormat.cpp | 178 ++++++++++++++++++ .../Formats/Impl/MsgPackRowInputFormat.h | 28 +++ .../Formats/Impl/MsgPackRowOutputFormat.cpp | 31 +-- .../Formats/Impl/MsgPackRowOutputFormat.h | 3 +- .../01098_msgpack_format.reference | 8 + .../0_stateless/01098_msgpack_format.sh | 31 +++ .../0_stateless/data_msgpack/all_types.msgpk | Bin 0 -> 200 bytes .../data_msgpack/nested_arrays.msgpk | 1 + 15 files changed, 269 insertions(+), 24 deletions(-) create mode 100644 cmake/find/msgpack.cmake create mode 160000 contrib/msgpack-c create mode 100644 dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp create mode 100644 dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h create mode 100644 dbms/tests/queries/0_stateless/01098_msgpack_format.reference create mode 100755 dbms/tests/queries/0_stateless/01098_msgpack_format.sh create mode 100644 dbms/tests/queries/0_stateless/data_msgpack/all_types.msgpk create mode 100644 dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk diff --git a/.gitmodules b/.gitmodules index 29b2ada63ea..c6afed3a5e2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -148,3 +148,6 @@ path = contrib/avro url = https://github.com/ClickHouse-Extras/avro.git ignore = untracked +[submodule "contrib/msgpack-c"] + path = contrib/msgpack-c + url = https://github.com/msgpack/msgpack-c diff --git a/CMakeLists.txt b/CMakeLists.txt index 9513caa8eee..d79cf152e15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -345,6 +345,7 @@ include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) include (cmake/find/orc.cmake) include (cmake/find/avro.cmake) +include (cmake/find/msgpack.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake new file mode 100644 index 00000000000..a1f18bb1eb0 --- /dev/null +++ b/cmake/find/msgpack.cmake @@ -0,0 +1,2 @@ +set(MSGPACK_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include) +message(STATUS "Using msgpack: ${MSGPACK_INCLUDE_DIR}") diff --git a/contrib/msgpack-c b/contrib/msgpack-c new file mode 160000 index 00000000000..46684265d50 --- /dev/null +++ b/contrib/msgpack-c @@ -0,0 +1 @@ +Subproject commit 46684265d50b5d1b062d4c5c428ba08462844b1d diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index aa10b0ed2ca..b13958a7b4d 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -574,6 +574,8 @@ target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR}) +target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) + add_subdirectory (programs) add_subdirectory (tests) diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index 8b6034dad9d..55cd4b1f368 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -352,6 +352,7 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorAvro(*this); registerInputFormatProcessorTemplate(*this); registerOutputFormatProcessorTemplate(*this); + registerInputFormatProcessorMsgPack(*this); registerOutputFormatProcessorMsgPack(*this); registerFileSegmentationEngineTabSeparated(*this); diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index 68ba2155642..705bd2039fc 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -172,7 +172,8 @@ void registerInputFormatProcessorAvro(FormatFactory & factory); void registerOutputFormatProcessorAvro(FormatFactory & factory); void registerInputFormatProcessorTemplate(FormatFactory & factory); void registerOutputFormatProcessorTemplate(FormatFactory & factory); -void registerOutputFormatProcessorMsgPack(FormatFactory &factory); +void registerInputFormatProcessorMsgPack(FormatFactory & factory); +void registerOutputFormatProcessorMsgPack(FormatFactory & factory); /// File Segmentation Engines for parallel reading diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp new file mode 100644 index 00000000000..59a8d176f32 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -0,0 +1,178 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int INCORRECT_DATA; +} + +MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) + : IRowInputFormat(header_, in_, std::move(params_)), data_types(header_.getDataTypes()) {} + +bool MsgPackRowInputFormat::readObject() +{ + if (in.eof() && unpacker.nonparsed_size() == 0) + return false; + while (!unpacker.next(object_handle)) + { + if (in.eof()) + throw Exception("Unexpected end of file while parsing MsgPack object.", ErrorCodes::INCORRECT_DATA); + unpacker.reserve_buffer(in.available()); + memcpy(unpacker.buffer(), in.position(), in.available()); + unpacker.buffer_consumed(in.available()); + in.position() += in.available(); + } + return true; +} + +void MsgPackRowInputFormat::insertObject(IColumn & column, DataTypePtr data_type, const msgpack::object & object) +{ + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::UInt64: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Int8: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Int16: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Int32: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Int64: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Float32: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::Float64: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::DateTime64: + { + assert_cast(column).insertValue(object.as()); + return; + } + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + { + String str = object.as(); + column.insertData(str.data(), str.size()); + return; + } + case TypeIndex::Array: + { + msgpack::object_array object_array = object.via.array; + auto nested_type = assert_cast(*data_type).getNestedType(); + ColumnArray & column_array = assert_cast(column); + ColumnArray::Offsets & offsets = column_array.getOffsets(); + IColumn & nested_column = column_array.getData(); + for (size_t i = 0; i != object_array.size; ++i) + { + insertObject(nested_column, nested_type, object_array.ptr[i]); + } + offsets.push_back(offsets.back() + object_array.size); + return; + } + case TypeIndex::Nullable: + { + auto nested_type = removeNullable(data_type); + ColumnNullable & column_nullable = assert_cast(column); + if (object.type == msgpack::type::NIL) + column_nullable.insertDefault(); + else + insertObject(column_nullable.getNestedColumn(), nested_type, object); + return; + } + case TypeIndex::Nothing: + { + // Nothing to insert, MsgPack object is nil. + return; + } + default: + break; + } + throw Exception("Type " + data_type->getName() + " is not supported for MsgPack input format", ErrorCodes::ILLEGAL_COLUMN); +} + +bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) +{ + size_t column_index = 0; + bool has_more_data = true; + for (; column_index != columns.size(); ++column_index) + { + has_more_data = readObject(); + if (!has_more_data) + break; + insertObject(*columns[column_index], data_types[column_index], object_handle.get()); + } + if (!has_more_data) + { + if (column_index != 0) + throw Exception("Not enough values to complete the row.", ErrorCodes::INCORRECT_DATA); + return false; + } + return true; +} + +void registerInputFormatProcessorMsgPack(FormatFactory & factory) { + factory.registerInputFormatProcessor("MsgPack", []( + ReadBuffer &buf, + const Block &sample, + const RowInputFormatParams ¶ms, + const FormatSettings &) { + return std::make_shared(sample, buf, params); + }); +} + +} \ No newline at end of file diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h new file mode 100644 index 00000000000..b2f14fca6f6 --- /dev/null +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ReadBuffer; + +class MsgPackRowInputFormat : public IRowInputFormat +{ +public: + MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); + + bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + String getName() const override { return "MagPackRowInputFormat"; } +private: + bool readObject(); + void insertObject(IColumn & column, DataTypePtr type, const msgpack::object & object); + + DataTypes data_types; + msgpack::unpacker unpacker; + msgpack::object_handle object_handle; +}; + +} \ No newline at end of file diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 061f4228158..b4cb7185406 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -20,8 +21,8 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_) - : IRowOutputFormat(header_, out_, callback), settings(settings_), packer(out_) {} +MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback) + : IRowOutputFormat(header_, out_, callback), packer(out_) {} void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num) { @@ -32,11 +33,13 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_uint8(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::Date: [[fallthrough]]; case TypeIndex::UInt16: { packer.pack_uint16(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::DateTime: [[fallthrough]]; case TypeIndex::UInt32: { packer.pack_uint32(assert_cast(column).getElement(row_num)); @@ -77,19 +80,12 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_double(assert_cast(column).getElement(row_num)); return; } - case TypeIndex::Date: + case TypeIndex::DateTime64: { - packer.pack_uint16(assert_cast(column).getElement(row_num)); - return; - } - case TypeIndex::DateTime: - { - UInt32 datetime = assert_cast(column).getElement(row_num); - // Timestamp extension type in MsgPack is -1. - packer.pack_ext(sizeof(datetime), -1); - packer.pack_ext_body(reinterpret_cast(&datetime), sizeof(datetime)); + packer.pack_uint64(assert_cast(column).getElement(row_num)); return; } + case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { const StringRef & string = assert_cast(column).getDataAt(row_num); @@ -97,13 +93,6 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_str_body(string.data, string.size); return; } - case TypeIndex::FixedString: - { - const StringRef & string = assert_cast(column).getDataAt(row_num); - packer.pack_str(string.size); - packer.pack_str_body(string.data, string.size); - return; - } case TypeIndex::Array: { auto nested_type = assert_cast(*data_type).getNestedType(); @@ -155,9 +144,9 @@ void registerOutputFormatProcessorMsgPack(FormatFactory & factory) WriteBuffer & buf, const Block & sample, FormatFactory::WriteCallback callback, - const FormatSettings & settings) + const FormatSettings &) { - return std::make_shared(buf, sample, callback, settings); + return std::make_shared(buf, sample, callback); }); } diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index 20df018b60c..351920eb7c8 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -13,7 +13,7 @@ namespace DB class MsgPackRowOutputFormat : public IRowOutputFormat { public: - MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_); + MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback); String getName() const override { return "MsgPackRowOutputFormat"; } @@ -22,7 +22,6 @@ public: void serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num); private: - FormatSettings settings; msgpack::packer packer; }; diff --git a/dbms/tests/queries/0_stateless/01098_msgpack_format.reference b/dbms/tests/queries/0_stateless/01098_msgpack_format.reference new file mode 100644 index 00000000000..aab048208bc --- /dev/null +++ b/dbms/tests/queries/0_stateless/01098_msgpack_format.reference @@ -0,0 +1,8 @@ +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 [1,2,3,4,5] +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 [5,4,3,2,1] +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 [1,2,3,4,5] +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 [5,4,3,2,1] +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] +[[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] +[[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] diff --git a/dbms/tests/queries/0_stateless/01098_msgpack_format.sh b/dbms/tests/queries/0_stateless/01098_msgpack_format.sh new file mode 100755 index 00000000000..2aaf2dfd527 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01098_msgpack_format.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime, datetime64 DateTime64, array Array(UInt32)) ENGINE = Memory"; + + +$CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, [1,2,3,4,5]), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, [5,4,3,2,1]),(42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, [42])"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/data_msgpack/all_types.msgpk; + +cat $CURDIR/data_msgpack/all_types.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (array1 Array(Array(UInt32)), array2 Array(Array(Array(String)))) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES ([[1,2,3], [1001, 2002], [3167]], [[['one'], ['two']], [['three']],[['four'], ['five']]])"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/data_msgpack/nested_arrays.msgpk; + +cat $CURDIR/data_msgpack/nested_arrays.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; + diff --git a/dbms/tests/queries/0_stateless/data_msgpack/all_types.msgpk b/dbms/tests/queries/0_stateless/data_msgpack/all_types.msgpk new file mode 100644 index 0000000000000000000000000000000000000000..efefdf32a55f96112d8952e725c2023f9687cde3 GIT binary patch literal 200 zcmX@}|Lp(&=l%o1c?JdsagVYW3>O+MHZWXj0CFz_g$*9;swCwUf}BZ jJe8G&nTb&g3Qjp`t`j-!;A(oPKStx*A`>H^_(UxL8(vuZ literal 0 HcmV?d00001 diff --git a/dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk b/dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk new file mode 100644 index 00000000000..761ef1d5b6c --- /dev/null +++ b/dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk @@ -0,0 +1 @@ +ґ _onetwothreefourfive \ No newline at end of file From d2d6d637acd77153b82c0831b31410220cc48872 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 26 Mar 2020 20:14:52 +0300 Subject: [PATCH 005/752] Delete excess file --- dbms/programs/server/metadata/test_n2zcw0/t.sql | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 dbms/programs/server/metadata/test_n2zcw0/t.sql diff --git a/dbms/programs/server/metadata/test_n2zcw0/t.sql b/dbms/programs/server/metadata/test_n2zcw0/t.sql deleted file mode 100644 index 64e3abcfe34..00000000000 --- a/dbms/programs/server/metadata/test_n2zcw0/t.sql +++ /dev/null @@ -1,8 +0,0 @@ -ATTACH TABLE t -( - `a` Int, - `b` Int -) -ENGINE = MergeTree -ORDER BY (a, b) -SETTINGS index_granularity = 400 From 2cdb1989a0c29f9c69ad425c0aa9a29a759be39d Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 26 Mar 2020 20:15:35 +0300 Subject: [PATCH 006/752] Delete test_n2zcw0.sql --- dbms/programs/server/metadata/test_n2zcw0.sql | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 dbms/programs/server/metadata/test_n2zcw0.sql diff --git a/dbms/programs/server/metadata/test_n2zcw0.sql b/dbms/programs/server/metadata/test_n2zcw0.sql deleted file mode 100644 index 80046cd585c..00000000000 --- a/dbms/programs/server/metadata/test_n2zcw0.sql +++ /dev/null @@ -1,2 +0,0 @@ -ATTACH DATABASE test_n2zcw0 -ENGINE = Ordinary From 0c12117f189bda531e7d5946c58b6c8f54c5ff7a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 26 Mar 2020 20:27:48 +0300 Subject: [PATCH 007/752] Refactor MergingSortedTransform. Add IMergingTransform. --- .../Processors/Merges/IMergingTransform.cpp | 27 +++++++++-- .../src/Processors/Merges/IMergingTransform.h | 48 +++++++++++++++++-- .../Merges/MergingSortedTransform.cpp | 48 ++++++++++++++----- .../Merges/MergingSortedTransform.h | 11 ++--- .../Transforms/MergeSortingTransform.cpp | 2 + 5 files changed, 111 insertions(+), 25 deletions(-) diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index d22acb71521..92ad592e11f 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -10,9 +10,15 @@ namespace ErrorCodes } IMergingTransform::IMergingTransform( - size_t num_inputs, const Block & input_header, const Block & output_header, bool have_all_inputs_) + size_t num_inputs, + const Block & input_header, + const Block & output_header, + size_t max_block_size, + bool use_average_block_size, + bool have_all_inputs_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) - , merged_data(output_header), have_all_inputs(have_all_inputs_) + , merged_data(output_header, use_average_block_size, max_block_size) + , have_all_inputs(have_all_inputs_) { } @@ -57,6 +63,7 @@ IProcessor::Status IMergingTransform::prepareSingleInput() if (input.isFinished()) { output.finish(); + onFinish(); return Status::Finished; } @@ -141,6 +148,7 @@ IProcessor::Status IMergingTransform::prepare() if (inputs.empty()) { output.finish(); + onFinish(); return Status::Finished; } @@ -151,6 +159,7 @@ IProcessor::Status IMergingTransform::prepare() for (auto & in : inputs) in.close(); + onFinish(); return Status::Finished; } @@ -162,8 +171,17 @@ IProcessor::Status IMergingTransform::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if (merged_data.mergedRows() && !is_port_full) - output.push(merged_data.pull()); + bool has_data_to_push = (is_finished && merged_data.mergedRows()) || merged_data.hasEnoughRows(); + if (has_data_to_push && !is_port_full) + { + auto chunk = merged_data.pull(); + + ++total_chunks; + total_rows += chunk.getNumRows(); + total_bytes += chunk.allocatedBytes(); + + output.push(std::move(chunk)); + } if (!is_initialized) return prepareInitializeInputs(); @@ -179,6 +197,7 @@ IProcessor::Status IMergingTransform::prepare() outputs.front().finish(); + onFinish(); return Status::Finished; } diff --git a/dbms/src/Processors/Merges/IMergingTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h index 5c5a7bad1f7..5680b92b6d3 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -9,7 +10,13 @@ namespace DB class IMergingTransform : public IProcessor { public: - IMergingTransform(size_t num_inputs, const Block & input_header, const Block & output_header, bool have_all_inputs); + IMergingTransform( + size_t num_inputs, + const Block & input_header, + const Block & output_header, + size_t max_block_size, + bool use_average_block_size, /// For adaptive granularity. Return chunks with the same avg size as inputs. + bool have_all_inputs_); /// Methods to add additional input port. It is possible to do only before the first call of `prepare`. void addInput(); @@ -23,6 +30,7 @@ protected: virtual void onNewInput(); /// Is called when new input is added. To initialize input's data. virtual void initializeInputs() = 0; /// Is called after first chunk was read for every input. virtual void consume(Chunk chunk, size_t input_number) = 0; /// Is called after chunk was consumed from input. + virtual void onFinish() {} /// Is called when all data is processed. void requestDataForInput(size_t input_number); /// Call it to say that next chunk of data is required for input. void finish() { is_finished = true; } /// Call it when all data was inserted to merged_data. @@ -32,14 +40,15 @@ protected: class MergedData { public: - explicit MergedData(const Block & header) + explicit MergedData(const Block & header, bool use_average_block_size_, UInt64 max_block_size_) + : max_block_size(max_block_size_), use_average_block_size(use_average_block_size_) { columns.reserve(header.columns()); for (const auto & column : header) columns.emplace_back(column.type->createColumn()); } - void insertRow(const ColumnRawPtrs & raw_columns, size_t row) + void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) { size_t num_columns = raw_columns.size(); for (size_t i = 0; i < num_columns; ++i) @@ -47,6 +56,7 @@ protected: ++total_merged_rows; ++merged_rows; + sum_blocks_granularity += block_size; } void insertFromChunk(Chunk && chunk, size_t limit_rows) @@ -56,6 +66,7 @@ protected: ErrorCodes::LOGICAL_ERROR); auto num_rows = chunk.getNumRows(); + auto block_size = num_rows; columns = chunk.mutateColumns(); if (limit_rows && num_rows > limit_rows) { @@ -66,6 +77,7 @@ protected: total_merged_rows += num_rows; merged_rows = num_rows; + sum_blocks_granularity += block_size * num_rows; } Chunk pull() @@ -78,22 +90,52 @@ protected: empty_columns.swap(columns); Chunk chunk(std::move(empty_columns), merged_rows); + merged_rows = 0; + sum_blocks_granularity = 0; return chunk; } + bool hasEnoughRows() const + { + /// Never return more then max_block_size. + if (merged_rows >= max_block_size) + return true; + + if (!use_average_block_size) + return false; + + /// Zero rows always not enough. + if (merged_rows == 0) + return false; + + return merged_rows * merged_rows >= sum_blocks_granularity; + } + UInt64 totalMergedRows() const { return total_merged_rows; } UInt64 mergedRows() const { return merged_rows; } private: + UInt64 sum_blocks_granularity = 0; UInt64 total_merged_rows = 0; UInt64 merged_rows = 0; + + const UInt64 max_block_size; + const bool use_average_block_size; + MutableColumns columns; }; MergedData merged_data; +protected: + /// Profile info. + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + UInt64 total_rows = 0; + UInt64 total_bytes = 0; + UInt64 total_chunks = 0; + private: /// Processor state. bool is_initialized = false; diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index c259ce05d76..2dc48e4265b 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { @@ -15,13 +17,17 @@ MergingSortedTransform::MergingSortedTransform( const Block & header, size_t num_inputs, SortDescription description_, - size_t max_block_size_, + size_t max_block_size, UInt64 limit_, bool quiet_, - bool have_all_inputs_) - : IMergingTransform(num_inputs, header, header, have_all_inputs_) - , description(std::move(description_)), max_block_size(max_block_size_), limit(limit_), quiet(quiet_) - , source_chunks(num_inputs), cursors(num_inputs) + bool use_average_block_sizes, + bool have_all_inputs) + : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, have_all_inputs) + , description(std::move(description_)) + , limit(limit_) + , quiet(quiet_) + , source_chunks(num_inputs) + , cursors(num_inputs) { auto & sample = outputs.front().getHeader(); /// Replace column names in description to positions. @@ -110,16 +116,12 @@ void MergingSortedTransform::merge(TSortingHeap & queue) return false; } - return merged_data.mergedRows() < max_block_size; + return merged_data.hasEnoughRows(); }; /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` - while (queue.isValid()) + while (queue.isValid() && can_read_another_row()) { - /// Shouldn't happen at first iteration, but check just in case. - if (!can_read_another_row()) - return; - auto current = queue.current(); /** And what if the block is totally less or equal than the rest for the current cursor? @@ -147,7 +149,7 @@ void MergingSortedTransform::merge(TSortingHeap & queue) //std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n"; //std::cerr << "Inserting row\n"; - merged_data.insertRow(current->all_columns, current->pos); + merged_data.insertRow(current->all_columns, current->pos, current->rows); if (out_row_sources_buf) { @@ -211,4 +213,26 @@ void MergingSortedTransform::insertFromChunk(size_t source_num) } } +void MergingSortedTransform::onFinish() +{ + if (quiet) + return; + + auto * log = &Logger::get("MergingSortedBlockInputStream"); + + double seconds = total_stopwatch.elapsedSeconds(); + + std::stringstream message; + message << std::fixed << std::setprecision(2) + << "Merge sorted " << total_chunks << " blocks, " << total_rows << " rows" + << " in " << seconds << " sec."; + + if (seconds != 0) + message << ", " + << total_rows / seconds << " rows/sec., " + << total_bytes / 1000000.0 / seconds << " MB/sec."; + + LOG_DEBUG(log, message.str()); +} + } diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.h b/dbms/src/Processors/Merges/MergingSortedTransform.h index 1fac794b7d0..c176c43d0c5 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.h +++ b/dbms/src/Processors/Merges/MergingSortedTransform.h @@ -17,22 +17,23 @@ public: size_t num_inputs, SortDescription description, size_t max_block_size, - UInt64 limit = 0, - bool quiet = false, + UInt64 limit_ = 0, + bool quiet_ = false, + bool use_average_block_sizes = false, bool have_all_inputs = true); String getName() const override { return "MergingSortedTransform"; } void work() override; -protected: +private: void onNewInput() override; void initializeInputs() override; void consume(Chunk chunk, size_t input_number) override; + void onFinish() override; /// Settings SortDescription description; - const size_t max_block_size; UInt64 limit; bool has_collation = false; bool quiet = false; @@ -50,8 +51,6 @@ protected: SortingHeap queue_with_collation; bool is_queue_initialized = false; -private: - template void merge(TSortingHeap & queue); diff --git a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp index 7c447a24b8e..de5b7092024 100644 --- a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp @@ -191,6 +191,7 @@ void MergeSortingTransform::consume(Chunk chunk) { bool quiet = false; bool have_all_inputs = false; + bool use_average_block_sizes = false; external_merging_sorted = std::make_shared( header_without_constants, @@ -199,6 +200,7 @@ void MergeSortingTransform::consume(Chunk chunk) max_merged_block_size, limit, quiet, + use_average_block_sizes, have_all_inputs); processors.emplace_back(external_merging_sorted); From af7f3fc25184c30194786580edcb40c43b72806d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 26 Mar 2020 20:31:03 +0300 Subject: [PATCH 008/752] Fix build. --- dbms/src/Processors/Merges/MergingSortedTransform.cpp | 4 ++-- dbms/src/Processors/Merges/MergingSortedTransform.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index 2dc48e4265b..d371d623746 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -21,8 +21,8 @@ MergingSortedTransform::MergingSortedTransform( UInt64 limit_, bool quiet_, bool use_average_block_sizes, - bool have_all_inputs) - : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, have_all_inputs) + bool have_all_inputs_) + : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, have_all_inputs_) , description(std::move(description_)) , limit(limit_) , quiet(quiet_) diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.h b/dbms/src/Processors/Merges/MergingSortedTransform.h index c176c43d0c5..9d6fdda723c 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.h +++ b/dbms/src/Processors/Merges/MergingSortedTransform.h @@ -20,7 +20,7 @@ public: UInt64 limit_ = 0, bool quiet_ = false, bool use_average_block_sizes = false, - bool have_all_inputs = true); + bool have_all_inputs_ = true); String getName() const override { return "MergingSortedTransform"; } void work() override; From d1d54ab1df24de222fd74c9a44ba35277a6607f8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 26 Mar 2020 22:15:14 +0300 Subject: [PATCH 009/752] Try fix tests. --- dbms/src/Processors/Merges/IMergingTransform.cpp | 2 +- dbms/src/Processors/Merges/MergingSortedTransform.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index 92ad592e11f..ac8f6b947a6 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -99,7 +99,7 @@ IProcessor::Status IMergingTransform::prepareInitializeInputs() if (input.isFinished()) continue; - if (!input_states[i].is_initialized) + if (input_states[i].is_initialized) { // input.setNotNeeded(); continue; diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index d371d623746..22b8673c4f5 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -116,7 +116,7 @@ void MergingSortedTransform::merge(TSortingHeap & queue) return false; } - return merged_data.hasEnoughRows(); + return !merged_data.hasEnoughRows(); }; /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` @@ -218,7 +218,7 @@ void MergingSortedTransform::onFinish() if (quiet) return; - auto * log = &Logger::get("MergingSortedBlockInputStream"); + auto * log = &Logger::get("MergingSortedTransform"); double seconds = total_stopwatch.elapsedSeconds(); From bc9d18a9c4abe7b6ec9548b5bc2944351dac7622 Mon Sep 17 00:00:00 2001 From: Avogar Date: Fri, 27 Mar 2020 00:11:33 +0300 Subject: [PATCH 010/752] Fix style and build errors. --- .../src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp | 8 +++++--- dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h | 2 +- .../Processors/Formats/Impl/MsgPackRowOutputFormat.cpp | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 59a8d176f32..0b3fb3d58ed 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -165,14 +165,16 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & return true; } -void registerInputFormatProcessorMsgPack(FormatFactory & factory) { +void registerInputFormatProcessorMsgPack(FormatFactory & factory) +{ factory.registerInputFormatProcessor("MsgPack", []( ReadBuffer &buf, const Block &sample, const RowInputFormatParams ¶ms, - const FormatSettings &) { + const FormatSettings &) + { return std::make_shared(sample, buf, params); }); } -} \ No newline at end of file +} diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index b2f14fca6f6..7daac811374 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -25,4 +25,4 @@ private: msgpack::object_handle object_handle; }; -} \ No newline at end of file +} diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index b4cb7185406..7c5e2c5b522 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -150,4 +150,4 @@ void registerOutputFormatProcessorMsgPack(FormatFactory & factory) }); } -} \ No newline at end of file +} From 96895e063fc2ff2600bd23bffe118f7dfbc01365 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 27 Mar 2020 13:09:42 +0300 Subject: [PATCH 011/752] Try fix tests. --- dbms/src/Processors/Merges/IMergingTransform.cpp | 2 +- dbms/src/Processors/Merges/MergingSortedTransform.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index ac8f6b947a6..2dd6c27fd61 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -46,7 +46,7 @@ void IMergingTransform::setHaveAllInputs() void IMergingTransform::requestDataForInput(size_t input_number) { - if (!need_data) + if (need_data) throw Exception("Data was requested for several inputs in IMergingTransform:" " " + std::to_string(next_input_to_read) + " and " + std::to_string(input_number), ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index 22b8673c4f5..2971c55a18a 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -184,7 +184,7 @@ void MergingSortedTransform::merge(TSortingHeap & queue) void MergingSortedTransform::insertFromChunk(size_t source_num) { if (source_num >= cursors.size()) - throw Exception("Logical error in MergingSortedTrandform", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error in MergingSortedTransform", ErrorCodes::LOGICAL_ERROR); //std::cerr << "copied columns\n"; From 9577ef38d0fb9b297a8df0752f1734cef9d92e93 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 27 Mar 2020 15:04:56 +0300 Subject: [PATCH 012/752] Fix style. --- dbms/src/Processors/Merges/IMergingTransform.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/src/Processors/Merges/IMergingTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h index 5680b92b6d3..58ad3a9b969 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -6,6 +6,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + /// Base class for merging transforms. class IMergingTransform : public IProcessor { From d5ab2e53319331a22acbb147081793f6fabb007b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 27 Mar 2020 20:37:35 +0300 Subject: [PATCH 013/752] Added CollapsingSortedTransform. --- .../Merges/CollapsingSortedTransform.cpp | 231 ++++++++++++++++++ .../Merges/CollapsingSortedTransform.h | 93 +++++++ .../Processors/Merges/IMergingTransform.cpp | 10 +- .../src/Processors/Merges/IMergingTransform.h | 20 +- .../Merges/MergingSortedTransform.cpp | 10 +- .../Merges/MergingSortedTransform.h | 3 +- dbms/src/Processors/Merges/SharedChunk.h | 83 +++++++ .../Transforms/MergeSortingTransform.cpp | 1 + 8 files changed, 429 insertions(+), 22 deletions(-) create mode 100644 dbms/src/Processors/Merges/CollapsingSortedTransform.cpp create mode 100644 dbms/src/Processors/Merges/CollapsingSortedTransform.h create mode 100644 dbms/src/Processors/Merges/SharedChunk.h diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp new file mode 100644 index 00000000000..e95b574ffe7 --- /dev/null +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -0,0 +1,231 @@ +#include +#include +#include +#include +#include + +/// Maximum number of messages about incorrect data in the log. +#define MAX_ERROR_MESSAGES 10 + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; +} + +CollapsingSortedTransform::CollapsingSortedTransform( + const Block & header, + size_t num_inputs, + SortDescription description_, + const String & sign_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes) + : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) + , description(std::move(description_)) + , sign_column_number(header.getPositionByName(sign_column)) + , out_row_sources_buf(out_row_sources_buf_) +{ +} + +void CollapsingSortedTransform::initializeInputs() +{ + queue = SortingHeap(cursors); + is_queue_initialized = true; +} + +void CollapsingSortedTransform::consume(Chunk chunk, size_t input_number) +{ + updateCursor(std::move(chunk), input_number); + + if (is_queue_initialized) + queue.push(cursors[input_number]); +} + +void CollapsingSortedTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk) + { + source_chunk = new detail::SharedChunk(std::move(chunk)); + cursors[source_num].reset(source_chunk->getColumns(), {}); + } + else + { + if (cursors[source_num].has_collation) + throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); + + source_chunk = new detail::SharedChunk(std::move(chunk)); + cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); + } + + source_chunk->all_columns = cursors[source_num].all_columns; + source_chunk->sort_columns = cursors[source_num].sort_columns; +} + +void CollapsingSortedTransform::reportIncorrectData() +{ + std::stringstream s; + s << "Incorrect data: number of rows with sign = 1 (" << count_positive + << ") differs with number of rows with sign = -1 (" << count_negative + << ") by more than one (for key: "; + + auto & sort_columns = *last_row.sort_columns; + for (size_t i = 0, size = sort_columns.size(); i < size; ++i) + { + if (i != 0) + s << ", "; + s << applyVisitor(FieldVisitorToString(), (*sort_columns[i])[last_row.row_num]); + } + + s << ")."; + + /** Fow now we limit ourselves to just logging such situations, + * since the data is generated by external programs. + * With inconsistent data, this is an unavoidable error that can not be easily corrected by admins. Therefore Warning. + */ + LOG_WARNING(log, s.rdbuf()); +} + +void CollapsingSortedTransform::insertRow(RowRef & row) +{ + merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); +} + +void CollapsingSortedTransform::insertRows() +{ + if (count_positive == 0 && count_negative == 0) + { + /// No input rows have been read. + return; + } + + if (last_is_positive || count_positive != count_negative) + { + if (count_positive <= count_negative) + { + insertRow(first_negative_row); + + if (out_row_sources_buf) + current_row_sources[first_negative_pos].setSkipFlag(false); + } + + if (count_positive >= count_negative) + { + insertRow(last_positive_row); + + if (out_row_sources_buf) + current_row_sources[last_positive_pos].setSkipFlag(false); + } + + if (!(count_positive == count_negative || count_positive + 1 == count_negative || count_positive == count_negative + 1)) + { + if (count_incorrect_data < MAX_ERROR_MESSAGES) + reportIncorrectData(); + ++count_incorrect_data; + } + } + + if (out_row_sources_buf) + out_row_sources_buf->write( + reinterpret_cast(current_row_sources.data()), + current_row_sources.size() * sizeof(RowSourcePart)); +} + +void CollapsingSortedTransform::work() +{ + merge(); +} + +void CollapsingSortedTransform::merge() +{ + /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` + while (queue.isValid()) + { + auto current = queue.current(); + Int8 sign = assert_cast(*current->all_columns[sign_column_number]).getData()[current->pos]; + + RowRef current_row; + setRowRef(current_row, current); + + if (last_row.empty()) + setRowRef(last_row, current); + + bool key_differs = !last_row.hasEqualSortColumnsWith(current_row); + + /// if there are enough rows and the last one is calculated completely + if (key_differs && merged_data.hasEnoughRows()) + return; + + if (key_differs) + { + /// We write data for the previous primary key. + insertRows(); + + current_row.swap(last_row); + + count_negative = 0; + count_positive = 0; + + current_pos = 0; + first_negative_pos = 0; + last_positive_pos = 0; + current_row_sources.resize(0); + } + + /// Initially, skip all rows. On insert, unskip "corner" rows. + if (out_row_sources_buf) + current_row_sources.emplace_back(current.impl->order, true); + + if (sign == 1) + { + ++count_positive; + last_is_positive = true; + + setRowRef(last_positive_row, current); + last_positive_pos = current_pos; + } + else if (sign == -1) + { + if (!count_negative) + { + setRowRef(first_negative_row, current); + first_negative_pos = current_pos; + } + + ++count_negative; + last_is_positive = false; + } + else + throw Exception("Incorrect data: Sign = " + toString(sign) + " (must be 1 or -1).", + ErrorCodes::INCORRECT_DATA); + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We take next block from the corresponding source, if there is one. + queue.removeTop(); + requestDataForInput(current.impl->order); + return; + } + } + + insertRows(); + finish(); +} + +} diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.h b/dbms/src/Processors/Merges/CollapsingSortedTransform.h new file mode 100644 index 00000000000..a2240d20e77 --- /dev/null +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.h @@ -0,0 +1,93 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +/** Merges several sorted inputs to one. + * For each group of consecutive identical values of the primary key (the columns by which the data is sorted), + * keeps no more than one row with the value of the column `sign_column = -1` ("negative row") + * and no more than a row with the value of the column `sign_column = 1` ("positive row"). + * That is, it collapses the records from the change log. + * + * If the number of positive and negative rows is the same, and the last row is positive, then the first negative and last positive rows are written. + * If the number of positive and negative rows is the same, and the last line is negative, it writes nothing. + * If the positive by 1 is greater than the negative rows, then only the last positive row is written. + * If negative by 1 is greater than positive rows, then only the first negative row is written. + * Otherwise, a logical error. + */ +class CollapsingSortedTransform final : public IMergingTransform +{ +public: + CollapsingSortedTransform( + const Block & header, + size_t num_inputs, + SortDescription description_, + const String & sign_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_ = nullptr, + bool use_average_block_sizes = false); + + String getName() const override { return "CollapsingSortedTransform"; } + void work() override; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + Logger * log = &Logger::get("CollapsingSortedTransform"); + + /// Settings + SortDescription description; + bool has_collation = false; + + const size_t sign_column_number; + + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) + /// If it is not nullptr then it should be populated during execution + WriteBuffer * out_row_sources_buf = nullptr; + + /// Chunks currently being merged. + using SourceChunks = std::vector; + SourceChunks source_chunks; + + SortCursorImpls cursors; + + SortingHeap queue; + bool is_queue_initialized = false; + + using RowRef = detail::RowRef; + RowRef first_negative_row; + RowRef last_positive_row; + RowRef last_row; + + size_t count_positive = 0; /// The number of positive rows for the current primary key. + size_t count_negative = 0; /// The number of negative rows for the current primary key. + bool last_is_positive = false; /// true if the last row for the current primary key is positive. + + /// Fields specific for VERTICAL merge algorithm. + /// Row numbers are relative to the start of current primary key. + size_t current_pos = 0; /// Current row number + size_t first_negative_pos = 0; /// Row number of first_negative + size_t last_positive_pos = 0; /// Row number of last_positive + PODArray current_row_sources; /// Sources of rows with the current primary key + + size_t count_incorrect_data = 0; /// To prevent too many error messages from writing to the log. + + void reportIncorrectData(); + void insertRow(RowRef & row); + void insertRows(); + void merge(); + void updateCursor(Chunk chunk, size_t source_num); + void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } +}; + +} diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index 2dd6c27fd61..b0226ce0f3d 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -173,15 +173,7 @@ IProcessor::Status IMergingTransform::prepare() /// Push if has data. bool has_data_to_push = (is_finished && merged_data.mergedRows()) || merged_data.hasEnoughRows(); if (has_data_to_push && !is_port_full) - { - auto chunk = merged_data.pull(); - - ++total_chunks; - total_rows += chunk.getNumRows(); - total_bytes += chunk.allocatedBytes(); - - output.push(std::move(chunk)); - } + output.push(merged_data.pull()); if (!is_initialized) return prepareInitializeInputs(); diff --git a/dbms/src/Processors/Merges/IMergingTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h index 58ad3a9b969..933b300bb96 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -41,7 +41,7 @@ protected: void finish() { is_finished = true; } /// Call it when all data was inserted to merged_data. /// Struct which represents current merging chunk of data. - /// Also it calculates the number of merged rows. + /// Also it calculates the number of merged rows and other profile info. class MergedData { public: @@ -98,6 +98,8 @@ protected: merged_rows = 0; sum_blocks_granularity = 0; + ++total_chunks; + total_allocated_bytes += chunk.allocatedBytes(); return chunk; } @@ -118,28 +120,28 @@ protected: return merged_rows * merged_rows >= sum_blocks_granularity; } - UInt64 totalMergedRows() const { return total_merged_rows; } UInt64 mergedRows() const { return merged_rows; } + UInt64 totalMergedRows() const { return total_merged_rows; } + UInt64 totalChunks() const { return total_chunks; } + UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } private: + MutableColumns columns; + UInt64 sum_blocks_granularity = 0; - UInt64 total_merged_rows = 0; UInt64 merged_rows = 0; + UInt64 total_merged_rows = 0; + UInt64 total_chunks = 0; + UInt64 total_allocated_bytes = 0; const UInt64 max_block_size; const bool use_average_block_size; - - MutableColumns columns; }; MergedData merged_data; -protected: /// Profile info. Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; - UInt64 total_rows = 0; - UInt64 total_bytes = 0; - UInt64 total_chunks = 0; private: /// Processor state. diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index 2971c55a18a..fc07a7c63ea 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -19,6 +19,7 @@ MergingSortedTransform::MergingSortedTransform( SortDescription description_, size_t max_block_size, UInt64 limit_, + WriteBuffer * out_row_sources_buf_, bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) @@ -26,6 +27,7 @@ MergingSortedTransform::MergingSortedTransform( , description(std::move(description_)) , limit(limit_) , quiet(quiet_) + , out_row_sources_buf(out_row_sources_buf_) , source_chunks(num_inputs) , cursors(num_inputs) { @@ -205,6 +207,8 @@ void MergingSortedTransform::insertFromChunk(size_t source_num) source_chunks[source_num] = Chunk(); + /// Write order of rows for other columns + /// this data will be used in gather stream if (out_row_sources_buf) { RowSourcePart row_source(source_num); @@ -224,13 +228,13 @@ void MergingSortedTransform::onFinish() std::stringstream message; message << std::fixed << std::setprecision(2) - << "Merge sorted " << total_chunks << " blocks, " << total_rows << " rows" + << "Merge sorted " << merged_data.totalChunks() << " blocks, " << merged_data.totalMergedRows() << " rows" << " in " << seconds << " sec."; if (seconds != 0) message << ", " - << total_rows / seconds << " rows/sec., " - << total_bytes / 1000000.0 / seconds << " MB/sec."; + << merged_data.totalMergedRows() / seconds << " rows/sec., " + << merged_data.totalAllocatedBytes() / 1000000.0 / seconds << " MB/sec."; LOG_DEBUG(log, message.str()); } diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.h b/dbms/src/Processors/Merges/MergingSortedTransform.h index 9d6fdda723c..f8e3e65951f 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.h +++ b/dbms/src/Processors/Merges/MergingSortedTransform.h @@ -9,7 +9,7 @@ namespace DB { /// Merges several sorted inputs into one sorted output. -class MergingSortedTransform : public IMergingTransform +class MergingSortedTransform final : public IMergingTransform { public: MergingSortedTransform( @@ -18,6 +18,7 @@ public: SortDescription description, size_t max_block_size, UInt64 limit_ = 0, + WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false, bool use_average_block_sizes = false, bool have_all_inputs_ = true); diff --git a/dbms/src/Processors/Merges/SharedChunk.h b/dbms/src/Processors/Merges/SharedChunk.h new file mode 100644 index 00000000000..5846c9d0336 --- /dev/null +++ b/dbms/src/Processors/Merges/SharedChunk.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB::detail +{ + +/// Allows you refer to the row in the block and hold the block ownership, +/// and thus avoid creating a temporary row object. +/// Do not use std::shared_ptr, since there is no need for a place for `weak_count` and `deleter`; +/// does not use Poco::SharedPtr, since you need to allocate a block and `refcount` in one piece; +/// does not use Poco::AutoPtr, since it does not have a `move` constructor and there are extra checks for nullptr; +/// The reference counter is not atomic, since it is used from one thread. +struct SharedChunk : Chunk +{ + int refcount = 0; + + ColumnRawPtrs all_columns; + ColumnRawPtrs sort_columns; + + explicit SharedChunk(Chunk && chunk) : Chunk(std::move(chunk)) {} +}; + +inline void intrusive_ptr_add_ref(detail::SharedChunk * ptr) +{ + ++ptr->refcount; +} + +inline void intrusive_ptr_release(detail::SharedChunk * ptr) +{ + if (0 == --ptr->refcount) + delete ptr; +} + +using SharedChunkPtr = boost::intrusive_ptr; + +struct RowRef +{ + detail::SharedChunkPtr owned_chunk; + + ColumnRawPtrs * all_columns = nullptr; + ColumnRawPtrs * sort_columns = nullptr; + UInt64 row_num = 0; + + void swap(RowRef & other) + { + owned_chunk.swap(other.owned_chunk); + std::swap(all_columns, other.all_columns); + std::swap(sort_columns, other.sort_columns); + std::swap(row_num, other.row_num); + } + + bool empty() const { return all_columns == nullptr; } + + void set(SortCursor & cursor, SharedChunkPtr chunk) + { + owned_chunk = std::move(chunk); + row_num = cursor.impl->pos; + all_columns = &owned_chunk->all_columns; + sort_columns = &owned_chunk->sort_columns; + } + + bool hasEqualSortColumnsWith(const RowRef & other) + { + auto size = sort_columns->size(); + for (size_t col_number = 0; col_number < size; ++col_number) + { + auto & cur_column = (*sort_columns)[col_number]; + auto & other_column = (*other.sort_columns)[col_number]; + + if (0 != cur_column->compareAt(row_num, other.row_num, *other_column, 1)) + return false; + } + + return true; + } +}; + +} diff --git a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp index de5b7092024..ee1836a4a14 100644 --- a/dbms/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/dbms/src/Processors/Transforms/MergeSortingTransform.cpp @@ -199,6 +199,7 @@ void MergeSortingTransform::consume(Chunk chunk) description, max_merged_block_size, limit, + nullptr, quiet, use_average_block_sizes, have_all_inputs); From bbecbff5461ae1e50833559f7cd47185279af720 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 27 Mar 2020 20:58:35 +0300 Subject: [PATCH 014/752] Try fix tests. --- dbms/src/Processors/Merges/MergingSortedTransform.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index fc07a7c63ea..52e8a7f5a57 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -122,8 +122,11 @@ void MergingSortedTransform::merge(TSortingHeap & queue) }; /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` - while (queue.isValid() && can_read_another_row()) + while (queue.isValid()) { + if (!can_read_another_row()) + return; + auto current = queue.current(); /** And what if the block is totally less or equal than the rest for the current cursor? From d591cea77ccd19a5ccc14a0f075109a2518ccebc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 27 Mar 2020 21:42:36 +0300 Subject: [PATCH 015/752] Try fix tests. --- dbms/src/Processors/Merges/IMergingTransform.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/IMergingTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h index 933b300bb96..d0b18c77480 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -71,7 +71,6 @@ protected: ErrorCodes::LOGICAL_ERROR); auto num_rows = chunk.getNumRows(); - auto block_size = num_rows; columns = chunk.mutateColumns(); if (limit_rows && num_rows > limit_rows) { @@ -80,9 +79,12 @@ protected: column = (*column->cut(0, num_rows)).mutate(); } + was_chunk_inserted = true; total_merged_rows += num_rows; merged_rows = num_rows; - sum_blocks_granularity += block_size * num_rows; + + /// We don't cate about granularity here. Because, for fast-forward optimization, chunk will be moved as-is. + /// sum_blocks_granularity += block_size * num_rows; } Chunk pull() @@ -100,12 +102,18 @@ protected: sum_blocks_granularity = 0; ++total_chunks; total_allocated_bytes += chunk.allocatedBytes(); + was_chunk_inserted = false; return chunk; } bool hasEnoughRows() const { + /// If full chunk was inserted, then we must pull it. + /// It is needed for fast-forward optimization. + if (was_chunk_inserted) + return true; + /// Never return more then max_block_size. if (merged_rows >= max_block_size) return true; @@ -136,6 +144,8 @@ protected: const UInt64 max_block_size; const bool use_average_block_size; + + bool was_chunk_inserted = false; }; MergedData merged_data; From a02eb99f7b1a41f5f9c69ee103f3440707c9516f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 12:23:42 +0300 Subject: [PATCH 016/752] Fix build. --- dbms/src/Processors/Merges/CollapsingSortedTransform.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.h b/dbms/src/Processors/Merges/CollapsingSortedTransform.h index a2240d20e77..985fd8018fe 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.h +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.h @@ -47,8 +47,6 @@ private: /// Settings SortDescription description; - bool has_collation = false; - const size_t sign_column_number; /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) From 968917e99b9364860122e585f0503edc9123ee72 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 12:32:55 +0300 Subject: [PATCH 017/752] Fix tests. --- dbms/src/Processors/Merges/IMergingTransform.h | 13 ++++++++----- .../Processors/Merges/MergingSortedTransform.cpp | 4 +++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/dbms/src/Processors/Merges/IMergingTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h index d0b18c77480..040263aca36 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -53,6 +53,9 @@ protected: columns.emplace_back(column.type->createColumn()); } + /// Pull will be called at next prepare call. + void flush() { need_flush = true; } + void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) { size_t num_columns = raw_columns.size(); @@ -79,7 +82,7 @@ protected: column = (*column->cut(0, num_rows)).mutate(); } - was_chunk_inserted = true; + need_flush = true; total_merged_rows += num_rows; merged_rows = num_rows; @@ -102,16 +105,16 @@ protected: sum_blocks_granularity = 0; ++total_chunks; total_allocated_bytes += chunk.allocatedBytes(); - was_chunk_inserted = false; + need_flush = false; return chunk; } bool hasEnoughRows() const { - /// If full chunk was inserted, then we must pull it. + /// If full chunk was or is going to be inserted, then we must pull it. /// It is needed for fast-forward optimization. - if (was_chunk_inserted) + if (need_flush) return true; /// Never return more then max_block_size. @@ -145,7 +148,7 @@ protected: const UInt64 max_block_size; const bool use_average_block_size; - bool was_chunk_inserted = false; + bool need_flush = false; }; MergedData merged_data; diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index 52e8a7f5a57..734da2e4bb3 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -138,10 +138,12 @@ void MergingSortedTransform::merge(TSortingHeap & queue) { //std::cerr << "current block is totally less or equals\n"; - /// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function. + /// If there are already data in the current block, we first return it. + /// We'll get here again the next time we call the merge function. if (merged_data.mergedRows() != 0) { //std::cerr << "merged rows is non-zero\n"; + merged_data.flush(); return; } From d40225330e71f9a88153c0a40d3aab75b11b028d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 13:51:40 +0300 Subject: [PATCH 018/752] Add SharedChunkAllocator. --- .../Merges/CollapsingSortedTransform.cpp | 5 +- .../Merges/CollapsingSortedTransform.h | 5 +- dbms/src/Processors/Merges/RowRef.h | 147 ++++++++++++++++++ dbms/src/Processors/Merges/SharedChunk.h | 83 ---------- 4 files changed, 154 insertions(+), 86 deletions(-) create mode 100644 dbms/src/Processors/Merges/RowRef.h delete mode 100644 dbms/src/Processors/Merges/SharedChunk.h diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp index e95b574ffe7..ee5f79013a9 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -28,6 +28,7 @@ CollapsingSortedTransform::CollapsingSortedTransform( , description(std::move(description_)) , sign_column_number(header.getPositionByName(sign_column)) , out_row_sources_buf(out_row_sources_buf_) + , chunk_allocator(num_inputs + max_row_refs) { } @@ -58,7 +59,7 @@ void CollapsingSortedTransform::updateCursor(Chunk chunk, size_t source_num) if (source_chunk) { - source_chunk = new detail::SharedChunk(std::move(chunk)); + source_chunk = chunk_allocator.alloc(std::move(chunk)); cursors[source_num].reset(source_chunk->getColumns(), {}); } else @@ -66,7 +67,7 @@ void CollapsingSortedTransform::updateCursor(Chunk chunk, size_t source_num) if (cursors[source_num].has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); - source_chunk = new detail::SharedChunk(std::move(chunk)); + source_chunk = chunk_allocator.alloc(std::move(chunk)); cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); } diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.h b/dbms/src/Processors/Merges/CollapsingSortedTransform.h index 985fd8018fe..87596d11a29 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.h +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -63,10 +63,13 @@ private: bool is_queue_initialized = false; using RowRef = detail::RowRef; + static constexpr size_t max_row_refs = 4; /// first_negative, last_positive, last, current. RowRef first_negative_row; RowRef last_positive_row; RowRef last_row; + detail::SharedChunkAllocator chunk_allocator; + size_t count_positive = 0; /// The number of positive rows for the current primary key. size_t count_negative = 0; /// The number of negative rows for the current primary key. bool last_is_positive = false; /// true if the last row for the current primary key is positive. diff --git a/dbms/src/Processors/Merges/RowRef.h b/dbms/src/Processors/Merges/RowRef.h new file mode 100644 index 00000000000..ea89b24fbdb --- /dev/null +++ b/dbms/src/Processors/Merges/RowRef.h @@ -0,0 +1,147 @@ +#pragma once + +#include +#include +#include + +#include + + +namespace DB::detail +{ + +class SharedChunkAllocator; + +/// Allows you refer to the row in the block and hold the block ownership, +/// and thus avoid creating a temporary row object. +/// Do not use std::shared_ptr, since there is no need for a place for `weak_count` and `deleter`; +/// does not use Poco::SharedPtr, since you need to allocate a block and `refcount` in one piece; +/// does not use Poco::AutoPtr, since it does not have a `move` constructor and there are extra checks for nullptr; +/// The reference counter is not atomic, since it is used from one thread. +struct SharedChunk : Chunk +{ + ColumnRawPtrs all_columns; + ColumnRawPtrs sort_columns; + + using Chunk::Chunk; + using Chunk::operator=; + +private: + int refcount = 0; + size_t position = 0; + SharedChunkAllocator * allocator = nullptr; + + friend class SharedChunkAllocator; + friend void intrusive_ptr_add_ref(SharedChunk * ptr); + friend void intrusive_ptr_release(SharedChunk * ptr); +}; + +using SharedChunkPtr = boost::intrusive_ptr; + +/// Custom allocator for shared chunk. +/// It helps to avoid explicit new/delete calls if we know maximum required capacity. +/// Thanks to that, SharedChunk does not own any memory. +/// It improves leaks detection, because memory is allocated only once in constructor. +class SharedChunkAllocator +{ +public: + explicit SharedChunkAllocator(size_t max_chunks) + { + chunks.resize(max_chunks); + free_chunks.reserve(max_chunks); + + for (size_t i = 0; i < max_chunks; ++i) + free_chunks.push_back(i); + } + + SharedChunkPtr alloc(Chunk && chunk) + { + if (free_chunks.empty()) + throw Exception("Not enough space in SharedChunkAllocator. " + "Chunks allocated: " + std::to_string(chunks.size()), ErrorCodes::LOGICAL_ERROR); + + auto pos = free_chunks.back(); + free_chunks.pop_back(); + + chunks[pos] = std::move(chunk); + chunks[pos].position = pos; + chunks[pos].allocator = this; + + return SharedChunkPtr(&chunks[pos]); + } + +private: + std::vector chunks; + std::vector free_chunks; + + void release(SharedChunk * ptr) + { + /// Release memory. It is not obligatory. + ptr->clear(); + ptr->all_columns.clear(); + ptr->sort_columns.clear(); + + free_chunks.push_back(ptr->position); + } + + friend void intrusive_ptr_release(SharedChunk * ptr); +}; + +inline void intrusive_ptr_add_ref(SharedChunk * ptr) +{ + ++ptr->refcount; +} + +inline void intrusive_ptr_release(SharedChunk * ptr) +{ + if (0 == --ptr->refcount) + ptr->allocator->release(ptr); +} + +/// This class represents a row in a chunk. +/// RowRef hold shared pointer to this chunk, possibly extending its life time. +/// It is needed, for example, in CollapsingTransform, where we need to store first negative row for current sort key. +/// We do not copy data itself, because it may be potentially changed for each row. Performance for `set` is important. +struct RowRef +{ + detail::SharedChunkPtr owned_chunk; + + ColumnRawPtrs * all_columns = nullptr; + ColumnRawPtrs * sort_columns = nullptr; + UInt64 row_num = 0; + + void swap(RowRef & other) + { + owned_chunk.swap(other.owned_chunk); + std::swap(all_columns, other.all_columns); + std::swap(sort_columns, other.sort_columns); + std::swap(row_num, other.row_num); + } + + bool empty() const { return all_columns == nullptr; } + + void set(SortCursor & cursor, SharedChunkPtr chunk) + { + owned_chunk = std::move(chunk); + row_num = cursor.impl->pos; + all_columns = &owned_chunk->all_columns; + sort_columns = &owned_chunk->sort_columns; + } + + bool hasEqualSortColumnsWith(const RowRef & other) + { + auto size = sort_columns->size(); + for (size_t col_number = 0; col_number < size; ++col_number) + { + auto & cur_column = (*sort_columns)[col_number]; + auto & other_column = (*other.sort_columns)[col_number]; + + if (0 != cur_column->compareAt(row_num, other.row_num, *other_column, 1)) + return false; + } + + return true; + } +}; + +} diff --git a/dbms/src/Processors/Merges/SharedChunk.h b/dbms/src/Processors/Merges/SharedChunk.h deleted file mode 100644 index 5846c9d0336..00000000000 --- a/dbms/src/Processors/Merges/SharedChunk.h +++ /dev/null @@ -1,83 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB::detail -{ - -/// Allows you refer to the row in the block and hold the block ownership, -/// and thus avoid creating a temporary row object. -/// Do not use std::shared_ptr, since there is no need for a place for `weak_count` and `deleter`; -/// does not use Poco::SharedPtr, since you need to allocate a block and `refcount` in one piece; -/// does not use Poco::AutoPtr, since it does not have a `move` constructor and there are extra checks for nullptr; -/// The reference counter is not atomic, since it is used from one thread. -struct SharedChunk : Chunk -{ - int refcount = 0; - - ColumnRawPtrs all_columns; - ColumnRawPtrs sort_columns; - - explicit SharedChunk(Chunk && chunk) : Chunk(std::move(chunk)) {} -}; - -inline void intrusive_ptr_add_ref(detail::SharedChunk * ptr) -{ - ++ptr->refcount; -} - -inline void intrusive_ptr_release(detail::SharedChunk * ptr) -{ - if (0 == --ptr->refcount) - delete ptr; -} - -using SharedChunkPtr = boost::intrusive_ptr; - -struct RowRef -{ - detail::SharedChunkPtr owned_chunk; - - ColumnRawPtrs * all_columns = nullptr; - ColumnRawPtrs * sort_columns = nullptr; - UInt64 row_num = 0; - - void swap(RowRef & other) - { - owned_chunk.swap(other.owned_chunk); - std::swap(all_columns, other.all_columns); - std::swap(sort_columns, other.sort_columns); - std::swap(row_num, other.row_num); - } - - bool empty() const { return all_columns == nullptr; } - - void set(SortCursor & cursor, SharedChunkPtr chunk) - { - owned_chunk = std::move(chunk); - row_num = cursor.impl->pos; - all_columns = &owned_chunk->all_columns; - sort_columns = &owned_chunk->sort_columns; - } - - bool hasEqualSortColumnsWith(const RowRef & other) - { - auto size = sort_columns->size(); - for (size_t col_number = 0; col_number < size; ++col_number) - { - auto & cur_column = (*sort_columns)[col_number]; - auto & other_column = (*other.sort_columns)[col_number]; - - if (0 != cur_column->compareAt(row_num, other.row_num, *other_column, 1)) - return false; - } - - return true; - } -}; - -} From 9dfe4a4c614392ebeb18d7723d5c8145266f328d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 15:06:16 +0300 Subject: [PATCH 019/752] Added ReplacingSortedTransform. --- .../Merges/CollapsingSortedTransform.cpp | 3 + .../Merges/CollapsingSortedTransform.h | 1 - .../Merges/ReplacingSortedTransform.cpp | 144 ++++++++++++++++++ .../Merges/ReplacingSortedTransform.h | 67 ++++++++ dbms/src/Processors/Merges/RowRef.h | 12 +- 5 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 dbms/src/Processors/Merges/ReplacingSortedTransform.cpp create mode 100644 dbms/src/Processors/Merges/ReplacingSortedTransform.h diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp index ee5f79013a9..4bb4eb0af43 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -138,6 +138,9 @@ void CollapsingSortedTransform::insertRows() } } + first_negative_row.clear(); + last_positive_row.clear(); + if (out_row_sources_buf) out_row_sources_buf->write( reinterpret_cast(current_row_sources.data()), diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.h b/dbms/src/Processors/Merges/CollapsingSortedTransform.h index 87596d11a29..64f9016b90d 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.h +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.h @@ -56,7 +56,6 @@ private: /// Chunks currently being merged. using SourceChunks = std::vector; SourceChunks source_chunks; - SortCursorImpls cursors; SortingHeap queue; diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp new file mode 100644 index 00000000000..9783d23dfc6 --- /dev/null +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -0,0 +1,144 @@ +#include +#include + +namespace DB +{ + +ReplacingSortedTransform::ReplacingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, const String & version_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes) + : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) + , description(std::move(description_)) + , out_row_sources_buf(out_row_sources_buf_) + , chunk_allocator(num_inputs + max_row_refs) +{ + if (!version_column.empty()) + version_column_number = header.getPositionByName(version_column); +} + +void ReplacingSortedTransform::initializeInputs() +{ + queue = SortingHeap(cursors); + is_queue_initialized = true; +} + +void ReplacingSortedTransform::consume(Chunk chunk, size_t input_number) +{ + updateCursor(std::move(chunk), input_number); + + if (is_queue_initialized) + queue.push(cursors[input_number]); +} + +void ReplacingSortedTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk) + { + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num].reset(source_chunk->getColumns(), {}); + } + else + { + if (cursors[source_num].has_collation) + throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); + + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); + } + + source_chunk->all_columns = cursors[source_num].all_columns; + source_chunk->sort_columns = cursors[source_num].sort_columns; +} + +void ReplacingSortedTransform::insertRow() +{ + if (out_row_sources_buf) + { + /// true flag value means "skip row" + current_row_sources[max_pos].setSkipFlag(false); + + out_row_sources_buf->write(reinterpret_cast(current_row_sources.data()), + current_row_sources.size() * sizeof(RowSourcePart)); + current_row_sources.resize(0); + } + + merged_data.insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows()); + selected_row.clear(); +} + +void ReplacingSortedTransform::merge() +{ + /// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size` + while (queue.isValid()) + { + SortCursor current = queue.current(); + + if (last_row.empty()) + setRowRef(last_row, current); + + RowRef current_row; + setRowRef(current_row, current); + + bool key_differs = !current_row.hasEqualSortColumnsWith(last_row); + + /// if there are enough rows and the last one is calculated completely + if (key_differs && merged_data.hasEnoughRows()) + return; + + if (key_differs) + { + /// Write the data for the previous primary key. + insertRow(); + last_row.swap(current_row); + } + + /// Initially, skip all rows. Unskip last on insert. + size_t current_pos = current_row_sources.size(); + if (out_row_sources_buf) + current_row_sources.emplace_back(current.impl->order, true); + + /// A non-strict comparison, since we select the last row for the same version values. + if (version_column_number == -1 + || selected_row.empty() + || current->all_columns[version_column_number]->compareAt( + current->pos, selected_row.row_num, + *(*selected_row.all_columns)[version_column_number], + /* nan_direction_hint = */ 1) >= 0) + { + max_pos = current_pos; + setRowRef(selected_row, current); + } + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We get the next block from the corresponding source, if there is one. + queue.removeTop(); + requestDataForInput(current.impl->order); + return; + } + } + + /// We will write the data for the last primary key. + if (!selected_row.empty()) + insertRow(); + + finish(); +} + +} diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.h b/dbms/src/Processors/Merges/ReplacingSortedTransform.h new file mode 100644 index 00000000000..19b7a999e9b --- /dev/null +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +class ReplacingSortedTransform : public IMergingTransform +{ +public: + ReplacingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, const String & version_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_ = nullptr, + bool use_average_block_sizes = false); + + String getName() const override { return "ReplacingSorted"; } + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + Logger * log = &Logger::get("ReplacingSortedBlockInputStream"); + + SortDescription description; + ssize_t version_column_number = -1; + + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) + /// If it is not nullptr then it should be populated during execution + WriteBuffer * out_row_sources_buf = nullptr; + + /// Chunks currently being merged. + using SourceChunks = std::vector; + SourceChunks source_chunks; + SortCursorImpls cursors; + + SortingHeap queue; + bool is_queue_initialized = false; + + using RowRef = detail::RowRef; + static constexpr size_t max_row_refs = 3; /// last, current, selected. + RowRef last_row; + /// RowRef next_key; /// Primary key of next row. + RowRef selected_row; /// Last row with maximum version for current primary key. + size_t max_pos = 0; /// The position (into current_row_sources) of the row with the highest version. + + detail::SharedChunkAllocator chunk_allocator; + + /// Sources of rows with the current primary key. + PODArray current_row_sources; + + void insertRow(); + void merge(); + void updateCursor(Chunk chunk, size_t source_num); + void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } +}; + +} diff --git a/dbms/src/Processors/Merges/RowRef.h b/dbms/src/Processors/Merges/RowRef.h index ea89b24fbdb..55d5577107f 100644 --- a/dbms/src/Processors/Merges/RowRef.h +++ b/dbms/src/Processors/Merges/RowRef.h @@ -104,7 +104,7 @@ inline void intrusive_ptr_release(SharedChunk * ptr) /// We do not copy data itself, because it may be potentially changed for each row. Performance for `set` is important. struct RowRef { - detail::SharedChunkPtr owned_chunk; + detail::SharedChunkPtr owned_chunk = nullptr; ColumnRawPtrs * all_columns = nullptr; ColumnRawPtrs * sort_columns = nullptr; @@ -118,7 +118,15 @@ struct RowRef std::swap(row_num, other.row_num); } - bool empty() const { return all_columns == nullptr; } + bool empty() const { return owned_chunk == nullptr; } + + void clear() + { + owned_chunk.reset(); + all_columns = nullptr; + sort_columns = nullptr; + row_num = 0; + } void set(SortCursor & cursor, SharedChunkPtr chunk) { From d8a303dda8088d1c7676e66d200c92e45f66b063 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 15:12:16 +0300 Subject: [PATCH 020/752] Fix style. --- dbms/src/Processors/Merges/RowRef.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Processors/Merges/RowRef.h b/dbms/src/Processors/Merges/RowRef.h index 55d5577107f..67d32d11277 100644 --- a/dbms/src/Processors/Merges/RowRef.h +++ b/dbms/src/Processors/Merges/RowRef.h @@ -6,6 +6,10 @@ #include +namespace DB::ErrorCodes +{ + extern const int LOGICAL_ERROR; +} namespace DB::detail { From c05331656fbbb977787be527a3ec29ef5914ebb1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 19:16:00 +0300 Subject: [PATCH 021/752] Added VersionedCollapsingTransform. --- .../Merges/ReplacingSortedTransform.cpp | 10 + .../Merges/ReplacingSortedTransform.h | 3 +- .../Merges/VersionedCollapsingTransform.cpp | 200 ++++++++++++++++ .../Merges/VersionedCollapsingTransform.h | 225 ++++++++++++++++++ 4 files changed, 437 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp create mode 100644 dbms/src/Processors/Merges/VersionedCollapsingTransform.h diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp index 9783d23dfc6..18fb9f98221 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -4,6 +4,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + ReplacingSortedTransform::ReplacingSortedTransform( size_t num_inputs, const Block & header, SortDescription description_, const String & version_column, @@ -78,6 +83,11 @@ void ReplacingSortedTransform::insertRow() selected_row.clear(); } +void ReplacingSortedTransform::work() +{ + merge(); +} + void ReplacingSortedTransform::merge() { /// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size` diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.h b/dbms/src/Processors/Merges/ReplacingSortedTransform.h index 19b7a999e9b..85fd0d23fe4 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.h +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.h @@ -23,13 +23,14 @@ public: bool use_average_block_sizes = false); String getName() const override { return "ReplacingSorted"; } + void work() override; protected: void initializeInputs() override; void consume(Chunk chunk, size_t input_number) override; private: - Logger * log = &Logger::get("ReplacingSortedBlockInputStream"); + Logger * log = &Logger::get("ReplacingSortedTransform"); SortDescription description; ssize_t version_column_number = -1; diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp new file mode 100644 index 00000000000..b4caeb22bf8 --- /dev/null +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -0,0 +1,200 @@ +#include +#include +#include + +namespace DB +{ + +static const size_t MAX_ROWS_IN_MULTIVERSION_QUEUE = 8192; + +VersionedCollapsingTransform::VersionedCollapsingTransform( + size_t num_inputs, const Block & header, + SortDescription description_, const String & sign_column_, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes) + : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) + , description(std::move(description_)) + , out_row_sources_buf(out_row_sources_buf_) + , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 2) + , current_keys(max_rows_in_queue + 1) + , chunk_allocator(num_inputs + max_rows_in_queue + 1) +{ + sign_column_number = header.getPositionByName(sign_column_); +} + +void VersionedCollapsingTransform::initializeInputs() +{ + queue = SortingHeap(cursors); + is_queue_initialized = true; +} + +void VersionedCollapsingTransform::consume(Chunk chunk, size_t input_number) +{ + updateCursor(std::move(chunk), input_number); + + if (is_queue_initialized) + queue.push(cursors[input_number]); +} + +void VersionedCollapsingTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk) + { + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num].reset(source_chunk->getColumns(), {}); + } + else + { + if (cursors[source_num].has_collation) + throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); + + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); + } + + source_chunk->all_columns = cursors[source_num].all_columns; + source_chunk->sort_columns = cursors[source_num].sort_columns; +} + +void VersionedCollapsingTransform::work() +{ + merge(); +} + +inline ALWAYS_INLINE static void writeRowSourcePart(WriteBuffer & buffer, RowSourcePart row_source) +{ + if constexpr (sizeof(RowSourcePart) == 1) + buffer.write(*reinterpret_cast(&row_source)); + else + buffer.write(reinterpret_cast(&row_source), sizeof(RowSourcePart)); +} + +void VersionedCollapsingTransform::insertGap(size_t gap_size) +{ + if (out_row_sources_buf) + { + for (size_t i = 0; i < gap_size; ++i) + { + writeRowSourcePart(*out_row_sources_buf, current_row_sources.front()); + current_row_sources.pop(); + } + } +} + +void VersionedCollapsingTransform::insertRow(size_t skip_rows, const RowRef & row) +{ + merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); + + insertGap(skip_rows); + + if (out_row_sources_buf) + { + current_row_sources.front().setSkipFlag(false); + writeRowSourcePart(*out_row_sources_buf, current_row_sources.front()); + current_row_sources.pop(); + } +} + +void VersionedCollapsingTransform::merge() +{ + /// Take rows in correct order and put them into `merged_columns` until the rows no more than `max_block_size` + while (queue.isValid()) + { + SortCursor current = queue.current(); + + RowRef current_row; + + Int8 sign = assert_cast(*current->all_columns[sign_column_number]).getData()[current->pos]; + + setRowRef(current_row, current); + + /// At first, let's decide the number of rows needed to insert right now. + size_t num_rows_to_insert = 0; + if (!current_keys.empty()) + { + auto key_differs = !current_row.hasEqualSortColumnsWith(current_keys.back()); + + if (key_differs) /// Flush whole queue + num_rows_to_insert = current_keys.size(); + else if (current_keys.size() >= max_rows_in_queue) /// Flush single row if queue is big + num_rows_to_insert = 1; + } + + /// Insert ready roes if any. + while (num_rows_to_insert) + { + const auto & row = current_keys.front(); + auto gap = current_keys.frontGap(); + + insertRow(gap, row); + + current_keys.popFront(); + + --num_rows_to_insert; + + /// It's ok to return here, because we didn't affect queue. + if (merged_data.hasEnoughRows()) + return; + } + + if (current_keys.empty()) + { + sign_in_queue = sign; + current_keys.pushBack(current_row); + } + else /// If queue is not empty, then current_row has the same key as in current_keys queue + { + if (sign == sign_in_queue) + current_keys.pushBack(current_row); + else + { + current_keys.popBack(); + current_keys.pushGap(2); + } + } + + if (out_row_sources_buf) + current_row_sources.emplace(current->order, true); + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We take next block from the corresponding source, if there is one. + queue.removeTop(); + requestDataForInput(current.impl->order); + return; + } + } + + while (!current_keys.empty()) + { + const auto & row = current_keys.front(); + auto gap = current_keys.frontGap(); + + insertRow(gap, row); + current_keys.popFront(); + + if (merged_data.hasEnoughRows()) + return; + } + + /// Write information about last collapsed rows. + insertGap(current_keys.frontGap()); + finish(); +} + + +} diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h new file mode 100644 index 00000000000..03b5be9edc6 --- /dev/null +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h @@ -0,0 +1,225 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/* Deque with fixed memory size. Allows pushing gaps. + * frontGap() returns the number of gaps were inserted before front. + * + * This structure may be implemented via std::deque, but + * - Deque uses fixed amount of memory which is allocated in constructor. No more allocations are performed. + * - Gaps are not stored as separate values in queue, which is more memory efficient. + * - Deque is responsible for gaps invariant: after removing element, moves gaps into neighbor cell. + * + * Note: empty deque may have non-zero front gap. + */ +template +class FixedSizeDequeWithGaps +{ +public: + + struct ValueWithGap + { + /// The number of gaps before current element. The number of gaps after last element stores into end cell. + size_t gap; + /// Store char[] instead of T in order to make ValueWithGap POD. + /// Call placement constructors after push and and destructors after pop. + char value[sizeof(T)]; + }; + + explicit FixedSizeDequeWithGaps(size_t size) + { + container.resize_fill(size + 1); + } + + ~FixedSizeDequeWithGaps() + { + auto destruct_range = [this](size_t from, size_t to) + { + for (size_t i = from; i < to; ++i) + destructValue(i); + }; + + if (begin <= end) + destruct_range(begin, end); + else + { + destruct_range(0, end); + destruct_range(begin, container.size()); + } + } + + void pushBack(const T & value) + { + checkEnoughSpaceToInsert(); + constructValue(end, value); + moveRight(end); + container[end].gap = 0; + } + + void pushGap(size_t count) { container[end].gap += count; } + + void popBack() + { + checkHasValuesToRemove(); + size_t curr_gap = container[end].gap; + moveLeft(end); + destructValue(end); + container[end].gap += curr_gap; + } + + void popFront() + { + checkHasValuesToRemove(); + destructValue(begin); + moveRight(begin); + } + + T & front() + { + checkHasValuesToGet(); + return getValue(begin); + } + const T & front() const + { + checkHasValuesToGet(); + return getValue(begin); + } + + const T & back() const + { + size_t ps = end; + moveLeft(ps); + return getValue(ps); + } + + size_t & frontGap() { return container[begin].gap; } + const size_t & frontGap() const { return container[begin].gap; } + + size_t size() const + { + if (begin <= end) + return end - begin; + return end + (container.size() - begin); + } + + bool empty() const { return begin == end; } + +private: + PODArray container; + + size_t gap_before_first = 0; + size_t begin = 0; + size_t end = 0; + + void constructValue(size_t index, const T & value) { new (container[index].value) T(value); } + void destructValue(size_t index) { reinterpret_cast(container[index].value)->~T(); } + + T & getValue(size_t index) { return *reinterpret_cast(container[index].value); } + const T & getValue(size_t index) const { return *reinterpret_cast(container[index].value); } + + void moveRight(size_t & index) const + { + ++index; + + if (index == container.size()) + index = 0; + } + + void moveLeft(size_t & index) const + { + if (index == 0) + index = container.size(); + + --index; + } + + void checkEnoughSpaceToInsert() const + { + if (size() + 1 == container.size()) + throw Exception("Not enough space to insert into FixedSizeDequeWithGaps with capacity " + + toString(container.size() - 1), ErrorCodes::LOGICAL_ERROR); + } + + void checkHasValuesToRemove() const + { + if (empty()) + throw Exception("Cannot remove from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + } + + void checkHasValuesToGet() const + { + if (empty()) + throw Exception("Cannot get value from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + } +}; + +class VersionedCollapsingTransform : public IMergingTransform +{ +public: + /// Don't need version column. It's in primary key. + VersionedCollapsingTransform( + size_t num_inputs, const Block & header, + SortDescription description_, const String & sign_column_, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_ = nullptr, + bool use_average_block_sizes = false); + + String getName() const override { return "VersionedCollapsingTransform"; } + void work() override; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + Logger * log = &Logger::get("VersionedCollapsingTransform"); + + SortDescription description; + size_t sign_column_number = 0; + + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) + /// If it is not nullptr then it should be populated during execution + WriteBuffer * out_row_sources_buf = nullptr; + + /// Chunks currently being merged. + using SourceChunks = std::vector; + SourceChunks source_chunks; + SortCursorImpls cursors; + + SortingHeap queue; + bool is_queue_initialized = false; + + using RowRef = detail::RowRef; + const size_t max_rows_in_queue; + /// Rows with the same primary key and sign. + FixedSizeDequeWithGaps current_keys; + Int8 sign_in_queue = 0; + + detail::SharedChunkAllocator chunk_allocator; + + std::queue current_row_sources; /// Sources of rows with the current primary key + + void insertGap(size_t gap_size); + void insertRow(size_t skip_rows, const RowRef & row); + void merge(); + void updateCursor(Chunk chunk, size_t source_num); + void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } +}; + +} From d77a907df4f6cefb9c25cc8a1d159f8568c9f696 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Mar 2020 19:42:57 +0300 Subject: [PATCH 022/752] Fix build. --- dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp | 6 +++--- dbms/src/Processors/Merges/VersionedCollapsingTransform.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp index b4caeb22bf8..bc19931e9cc 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -16,9 +16,9 @@ VersionedCollapsingTransform::VersionedCollapsingTransform( : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) - , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 2) - , current_keys(max_rows_in_queue + 1) - , chunk_allocator(num_inputs + max_rows_in_queue + 1) + , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 1) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer + , current_keys(max_rows_in_queue) + , chunk_allocator(num_inputs + max_rows_in_queue + 1) /// +1 just in case (for current_row) { sign_column_number = header.getPositionByName(sign_column_); } diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h index 03b5be9edc6..4be0eb41438 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h @@ -153,7 +153,7 @@ private: { if (size() + 1 == container.size()) throw Exception("Not enough space to insert into FixedSizeDequeWithGaps with capacity " - + toString(container.size() - 1), ErrorCodes::LOGICAL_ERROR); + + std::to_string(container.size() - 1), ErrorCodes::LOGICAL_ERROR); } void checkHasValuesToRemove() const From fa9399d82607e3f799bae53a5770a70b7dad22cb Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 30 Mar 2020 21:00:38 +0300 Subject: [PATCH 023/752] [demo] Create unique columns for literals --- dbms/src/Interpreters/ActionsVisitor.cpp | 43 +++++++++++++------ dbms/src/Parsers/ASTLiteral.h | 2 + .../01101_literal_column_clash.reference | 3 ++ .../01101_literal_column_clash.sql | 10 +++++ 4 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01101_literal_column_clash.reference create mode 100644 dbms/tests/queries/0_stateless/01101_literal_column_clash.sql diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 4e008a81973..b2091465be9 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -197,10 +197,17 @@ SetPtr makeExplicitSet( static String getUniqueName(const Block & block, const String & prefix) { - int i = 1; - while (block.has(prefix + toString(i))) - ++i; - return prefix + toString(i); + auto result = prefix; + + if (block.has(prefix)) + { + int i = 1; + while (block.has(prefix + toString(i))) + ++i; + result = prefix + "_" + toString(i); + } + + return result; } ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_) @@ -431,7 +438,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & for (size_t arg = 0; arg < node.arguments->children.size(); ++arg) { auto & child = node.arguments->children[arg]; - auto child_column_name = child->getColumnName(); const auto * lambda = child->as(); const auto * identifier = child->as(); @@ -461,7 +467,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!prepared_set->empty()) column.name = getUniqueName(data.getSampleBlock(), "__set"); else - column.name = child_column_name; + column.name = child->getColumnName(); if (!data.hasColumn(column.name)) { @@ -496,6 +502,18 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { /// If the argument is not a lambda expression, call it recursively and find out its type. visit(child, data); + + // In the above visit() call, if the argument is a literal, we + // generated a unique column name for it. Use it instead of a generic + // display name. + auto child_column_name = child->getColumnName(); + auto asLiteral = dynamic_cast(child.get()); + if (asLiteral) + { + assert(!asLiteral->unique_column_name.empty()); + child_column_name = asLiteral->unique_column_name; + } + if (data.hasColumn(child_column_name)) { argument_types.push_back(data.getSampleBlock().getByName(child_column_name).type); @@ -587,18 +605,19 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } -void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data) +void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, + Data & data) { - CachedColumnName column_name; - if (data.hasColumn(column_name.get(ast))) - return; - DataTypePtr type = applyVisitor(FieldToDataType(), literal.value); ColumnWithTypeAndName column; column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type)); column.type = type; - column.name = column_name.get(ast); + + // Always create columns for literals with a unique name. Otherwise, there + // may be some weird clashes, see 01101_literal_column_clash. + column.name = getUniqueName(data.getSampleBlock(), literal.getColumnName()); + const_cast(literal).unique_column_name = column.name; data.addAction(ExpressionAction::addColumn(column)); } diff --git a/dbms/src/Parsers/ASTLiteral.h b/dbms/src/Parsers/ASTLiteral.h index 552f5da04a2..e9fb3d3b8ec 100644 --- a/dbms/src/Parsers/ASTLiteral.h +++ b/dbms/src/Parsers/ASTLiteral.h @@ -17,6 +17,8 @@ class ASTLiteral : public ASTWithAlias public: Field value; + String unique_column_name; + /// For ConstantExpressionTemplate std::optional begin; std::optional end; diff --git a/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference b/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference new file mode 100644 index 00000000000..0dc94464bfc --- /dev/null +++ b/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference @@ -0,0 +1,3 @@ +1 +7 3 +xyzabc 2 diff --git a/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql b/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql new file mode 100644 index 00000000000..11e3b622277 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql @@ -0,0 +1,10 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/9810 +select cast(1 as String) +from (select 1 as iid) as t1 +join (select '1' as sid) as t2 on t2.sid = cast(t1.iid as String); + +-- even simpler cases +select cast(7 as String), * from (select 3 "'String'"); +SELECT concat('xyz', 'abc'), * FROM (SELECT 2 AS "'xyz'"); + + From 4e668a5b06974afe45e7c6cbdb74e5c98297fae1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 30 Mar 2020 23:41:29 +0300 Subject: [PATCH 024/752] fixup --- dbms/src/Interpreters/ActionsVisitor.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index b2091465be9..fd36cd27a0e 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -199,12 +199,15 @@ static String getUniqueName(const Block & block, const String & prefix) { auto result = prefix; - if (block.has(prefix)) + if (block.has(result)) { int i = 1; - while (block.has(prefix + toString(i))) + do + { + result = prefix + "_" + toString(i); ++i; - result = prefix + "_" + toString(i); + } + while (block.has(result)); } return result; From ebb5cc05422d2397c992945b741270ba68729786 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 30 Mar 2020 23:47:28 +0300 Subject: [PATCH 025/752] fixup --- dbms/src/Interpreters/ActionsVisitor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index fd36cd27a0e..eb92ee76973 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -510,11 +510,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & // generated a unique column name for it. Use it instead of a generic // display name. auto child_column_name = child->getColumnName(); - auto asLiteral = dynamic_cast(child.get()); - if (asLiteral) + auto as_literal = dynamic_cast(child.get()); + if (as_literal) { - assert(!asLiteral->unique_column_name.empty()); - child_column_name = asLiteral->unique_column_name; + assert(!as_literal->unique_column_name.empty()); + child_column_name = as_literal->unique_column_name; } if (data.hasColumn(child_column_name)) From de36104ab27c6a22a0c55210e08c3505100ddfa4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 31 Mar 2020 13:42:42 +0300 Subject: [PATCH 026/752] Refactor MergingTransforms. --- .../Merges/CollapsingSortedTransform.cpp | 6 +- .../Merges/CollapsingSortedTransform.h | 3 + .../Merges/FixedSizeDequeWithGaps.h | 162 ++++++++++++++++++ .../Processors/Merges/IMergingTransform.cpp | 15 +- .../src/Processors/Merges/IMergingTransform.h | 130 +------------- dbms/src/Processors/Merges/MergedData.h | 122 +++++++++++++ .../Merges/MergingSortedTransform.cpp | 13 +- .../Merges/MergingSortedTransform.h | 7 +- .../Merges/ReplacingSortedTransform.cpp | 6 +- .../Merges/ReplacingSortedTransform.h | 3 + .../Merges/VersionedCollapsingTransform.cpp | 6 +- .../Merges/VersionedCollapsingTransform.h | 160 +---------------- 12 files changed, 336 insertions(+), 297 deletions(-) create mode 100644 dbms/src/Processors/Merges/FixedSizeDequeWithGaps.h create mode 100644 dbms/src/Processors/Merges/MergedData.h diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp index 4bb4eb0af43..13a61d26caa 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -24,7 +24,8 @@ CollapsingSortedTransform::CollapsingSortedTransform( size_t max_block_size, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) + : IMergingTransform(num_inputs, header, header, true) + , merged_data(header, use_average_block_sizes, max_block_size) , description(std::move(description_)) , sign_column_number(header.getPositionByName(sign_column)) , out_row_sources_buf(out_row_sources_buf_) @@ -150,6 +151,7 @@ void CollapsingSortedTransform::insertRows() void CollapsingSortedTransform::work() { merge(); + prepareOutputChunk(merged_data); } void CollapsingSortedTransform::merge() @@ -229,7 +231,7 @@ void CollapsingSortedTransform::merge() } insertRows(); - finish(); + is_finished = true; } } diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.h b/dbms/src/Processors/Merges/CollapsingSortedTransform.h index 64f9016b90d..58c97f964bc 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.h +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -45,6 +46,8 @@ protected: private: Logger * log = &Logger::get("CollapsingSortedTransform"); + MergedData merged_data; + /// Settings SortDescription description; const size_t sign_column_number; diff --git a/dbms/src/Processors/Merges/FixedSizeDequeWithGaps.h b/dbms/src/Processors/Merges/FixedSizeDequeWithGaps.h new file mode 100644 index 00000000000..35cfded4214 --- /dev/null +++ b/dbms/src/Processors/Merges/FixedSizeDequeWithGaps.h @@ -0,0 +1,162 @@ +#pragma once + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +/* Deque with fixed memory size. Allows pushing gaps. + * frontGap() returns the number of gaps were inserted before front. + * + * This structure may be implemented via std::deque, but + * - Deque uses fixed amount of memory which is allocated in constructor. No more allocations are performed. + * - Gaps are not stored as separate values in queue, which is more memory efficient. + * - Deque is responsible for gaps invariant: after removing element, moves gaps into neighbor cell. + * + * Note: empty deque may have non-zero front gap. + */ +template +class FixedSizeDequeWithGaps +{ +public: + + struct ValueWithGap + { + /// The number of gaps before current element. The number of gaps after last element stores into end cell. + size_t gap; + /// Store char[] instead of T in order to make ValueWithGap POD. + /// Call placement constructors after push and and destructors after pop. + char value[sizeof(T)]; + }; + + explicit FixedSizeDequeWithGaps(size_t size) + { + container.resize_fill(size + 1); + } + + ~FixedSizeDequeWithGaps() + { + auto destruct_range = [this](size_t from, size_t to) + { + for (size_t i = from; i < to; ++i) + destructValue(i); + }; + + if (begin <= end) + destruct_range(begin, end); + else + { + destruct_range(0, end); + destruct_range(begin, container.size()); + } + } + + void pushBack(const T & value) + { + checkEnoughSpaceToInsert(); + constructValue(end, value); + moveRight(end); + container[end].gap = 0; + } + + void pushGap(size_t count) { container[end].gap += count; } + + void popBack() + { + checkHasValuesToRemove(); + size_t curr_gap = container[end].gap; + moveLeft(end); + destructValue(end); + container[end].gap += curr_gap; + } + + void popFront() + { + checkHasValuesToRemove(); + destructValue(begin); + moveRight(begin); + } + + T & front() + { + checkHasValuesToGet(); + return getValue(begin); + } + const T & front() const + { + checkHasValuesToGet(); + return getValue(begin); + } + + const T & back() const + { + size_t ps = end; + moveLeft(ps); + return getValue(ps); + } + + size_t & frontGap() { return container[begin].gap; } + const size_t & frontGap() const { return container[begin].gap; } + + size_t size() const + { + if (begin <= end) + return end - begin; + return end + (container.size() - begin); + } + + bool empty() const { return begin == end; } + +private: + PODArray container; + + size_t gap_before_first = 0; + size_t begin = 0; + size_t end = 0; + + void constructValue(size_t index, const T & value) { new (container[index].value) T(value); } + void destructValue(size_t index) { reinterpret_cast(container[index].value)->~T(); } + + T & getValue(size_t index) { return *reinterpret_cast(container[index].value); } + const T & getValue(size_t index) const { return *reinterpret_cast(container[index].value); } + + void moveRight(size_t & index) const + { + ++index; + + if (index == container.size()) + index = 0; + } + + void moveLeft(size_t & index) const + { + if (index == 0) + index = container.size(); + + --index; + } + + void checkEnoughSpaceToInsert() const + { + if (size() + 1 == container.size()) + throw Exception("Not enough space to insert into FixedSizeDequeWithGaps with capacity " + + std::to_string(container.size() - 1), ErrorCodes::LOGICAL_ERROR); + } + + void checkHasValuesToRemove() const + { + if (empty()) + throw Exception("Cannot remove from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + } + + void checkHasValuesToGet() const + { + if (empty()) + throw Exception("Cannot get value from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + } +}; + +} diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index b0226ce0f3d..0838dda6323 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -13,11 +13,8 @@ IMergingTransform::IMergingTransform( size_t num_inputs, const Block & input_header, const Block & output_header, - size_t max_block_size, - bool use_average_block_size, bool have_all_inputs_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) - , merged_data(output_header, use_average_block_size, max_block_size) , have_all_inputs(have_all_inputs_) { } @@ -55,6 +52,13 @@ void IMergingTransform::requestDataForInput(size_t input_number) next_input_to_read = input_number; } +void IMergingTransform::prepareOutputChunk(MergedData & merged_data) +{ + has_output_chunk = (is_finished && merged_data.mergedRows()) || merged_data.hasEnoughRows(); + if (has_output_chunk) + output_chunk = merged_data.pull(); +} + IProcessor::Status IMergingTransform::prepareSingleInput() { auto & input = inputs.front(); @@ -171,9 +175,8 @@ IProcessor::Status IMergingTransform::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - bool has_data_to_push = (is_finished && merged_data.mergedRows()) || merged_data.hasEnoughRows(); - if (has_data_to_push && !is_port_full) - output.push(merged_data.pull()); + if (has_output_chunk && !is_port_full) + output.push(std::move(output_chunk)); if (!is_initialized) return prepareInitializeInputs(); diff --git a/dbms/src/Processors/Merges/IMergingTransform.h b/dbms/src/Processors/Merges/IMergingTransform.h index 040263aca36..a1046d207ad 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.h +++ b/dbms/src/Processors/Merges/IMergingTransform.h @@ -6,10 +6,7 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} +class MergedData; /// Base class for merging transforms. class IMergingTransform : public IProcessor @@ -19,8 +16,8 @@ public: size_t num_inputs, const Block & input_header, const Block & output_header, - size_t max_block_size, - bool use_average_block_size, /// For adaptive granularity. Return chunks with the same avg size as inputs. + //size_t max_block_size, + //bool use_average_block_size, /// For adaptive granularity. Return chunks with the same avg size as inputs. bool have_all_inputs_); /// Methods to add additional input port. It is possible to do only before the first call of `prepare`. @@ -38,129 +35,18 @@ protected: virtual void onFinish() {} /// Is called when all data is processed. void requestDataForInput(size_t input_number); /// Call it to say that next chunk of data is required for input. - void finish() { is_finished = true; } /// Call it when all data was inserted to merged_data. - - /// Struct which represents current merging chunk of data. - /// Also it calculates the number of merged rows and other profile info. - class MergedData - { - public: - explicit MergedData(const Block & header, bool use_average_block_size_, UInt64 max_block_size_) - : max_block_size(max_block_size_), use_average_block_size(use_average_block_size_) - { - columns.reserve(header.columns()); - for (const auto & column : header) - columns.emplace_back(column.type->createColumn()); - } - - /// Pull will be called at next prepare call. - void flush() { need_flush = true; } - - void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) - { - size_t num_columns = raw_columns.size(); - for (size_t i = 0; i < num_columns; ++i) - columns[i]->insertFrom(*raw_columns[i], row); - - ++total_merged_rows; - ++merged_rows; - sum_blocks_granularity += block_size; - } - - void insertFromChunk(Chunk && chunk, size_t limit_rows) - { - if (merged_rows) - throw Exception("Cannot insert to MergedData from Chunk because MergedData is not empty.", - ErrorCodes::LOGICAL_ERROR); - - auto num_rows = chunk.getNumRows(); - columns = chunk.mutateColumns(); - if (limit_rows && num_rows > limit_rows) - { - num_rows = limit_rows; - for (auto & column : columns) - column = (*column->cut(0, num_rows)).mutate(); - } - - need_flush = true; - total_merged_rows += num_rows; - merged_rows = num_rows; - - /// We don't cate about granularity here. Because, for fast-forward optimization, chunk will be moved as-is. - /// sum_blocks_granularity += block_size * num_rows; - } - - Chunk pull() - { - MutableColumns empty_columns; - empty_columns.reserve(columns.size()); - - for (const auto & column : columns) - empty_columns.emplace_back(column->cloneEmpty()); - - empty_columns.swap(columns); - Chunk chunk(std::move(empty_columns), merged_rows); - - merged_rows = 0; - sum_blocks_granularity = 0; - ++total_chunks; - total_allocated_bytes += chunk.allocatedBytes(); - need_flush = false; - - return chunk; - } - - bool hasEnoughRows() const - { - /// If full chunk was or is going to be inserted, then we must pull it. - /// It is needed for fast-forward optimization. - if (need_flush) - return true; - - /// Never return more then max_block_size. - if (merged_rows >= max_block_size) - return true; - - if (!use_average_block_size) - return false; - - /// Zero rows always not enough. - if (merged_rows == 0) - return false; - - return merged_rows * merged_rows >= sum_blocks_granularity; - } - - UInt64 mergedRows() const { return merged_rows; } - UInt64 totalMergedRows() const { return total_merged_rows; } - UInt64 totalChunks() const { return total_chunks; } - UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } - - private: - MutableColumns columns; - - UInt64 sum_blocks_granularity = 0; - UInt64 merged_rows = 0; - UInt64 total_merged_rows = 0; - UInt64 total_chunks = 0; - UInt64 total_allocated_bytes = 0; - - const UInt64 max_block_size; - const bool use_average_block_size; - - bool need_flush = false; - }; - - MergedData merged_data; + void prepareOutputChunk(MergedData & merged_data); /// Moves chunk from merged_data to output_chunk if needed. /// Profile info. Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + Chunk output_chunk; + bool has_output_chunk = false; + bool is_finished = false; + private: /// Processor state. bool is_initialized = false; - bool is_finished = false; - bool need_data = false; size_t next_input_to_read = 0; diff --git a/dbms/src/Processors/Merges/MergedData.h b/dbms/src/Processors/Merges/MergedData.h new file mode 100644 index 00000000000..6f66d584d91 --- /dev/null +++ b/dbms/src/Processors/Merges/MergedData.h @@ -0,0 +1,122 @@ +#pragma once + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// Class which represents current merging chunk of data. +/// Also it calculates the number of merged rows and other profile info. +class MergedData +{ +public: + explicit MergedData(const Block & header, bool use_average_block_size_, UInt64 max_block_size_) + : max_block_size(max_block_size_), use_average_block_size(use_average_block_size_) + { + columns.reserve(header.columns()); + for (const auto & column : header) + columns.emplace_back(column.type->createColumn()); + } + + /// Pull will be called at next prepare call. + void flush() { need_flush = true; } + + void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size) + { + size_t num_columns = raw_columns.size(); + for (size_t i = 0; i < num_columns; ++i) + columns[i]->insertFrom(*raw_columns[i], row); + + ++total_merged_rows; + ++merged_rows; + sum_blocks_granularity += block_size; + } + + void insertFromChunk(Chunk && chunk, size_t limit_rows) + { + if (merged_rows) + throw Exception("Cannot insert to MergedData from Chunk because MergedData is not empty.", + ErrorCodes::LOGICAL_ERROR); + + auto num_rows = chunk.getNumRows(); + columns = chunk.mutateColumns(); + if (limit_rows && num_rows > limit_rows) + { + num_rows = limit_rows; + for (auto & column : columns) + column = (*column->cut(0, num_rows)).mutate(); + } + + need_flush = true; + total_merged_rows += num_rows; + merged_rows = num_rows; + + /// We don't cate about granularity here. Because, for fast-forward optimization, chunk will be moved as-is. + /// sum_blocks_granularity += block_size * num_rows; + } + + Chunk pull() + { + MutableColumns empty_columns; + empty_columns.reserve(columns.size()); + + for (const auto & column : columns) + empty_columns.emplace_back(column->cloneEmpty()); + + empty_columns.swap(columns); + Chunk chunk(std::move(empty_columns), merged_rows); + + merged_rows = 0; + sum_blocks_granularity = 0; + ++total_chunks; + total_allocated_bytes += chunk.allocatedBytes(); + need_flush = false; + + return chunk; + } + + bool hasEnoughRows() const + { + /// If full chunk was or is going to be inserted, then we must pull it. + /// It is needed for fast-forward optimization. + if (need_flush) + return true; + + /// Never return more then max_block_size. + if (merged_rows >= max_block_size) + return true; + + if (!use_average_block_size) + return false; + + /// Zero rows always not enough. + if (merged_rows == 0) + return false; + + return merged_rows * merged_rows >= sum_blocks_granularity; + } + + UInt64 mergedRows() const { return merged_rows; } + UInt64 totalMergedRows() const { return total_merged_rows; } + UInt64 totalChunks() const { return total_chunks; } + UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } + +private: + MutableColumns columns; + + UInt64 sum_blocks_granularity = 0; + UInt64 merged_rows = 0; + UInt64 total_merged_rows = 0; + UInt64 total_chunks = 0; + UInt64 total_allocated_bytes = 0; + + const UInt64 max_block_size; + const bool use_average_block_size; + + bool need_flush = false; +}; + +} diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index 734da2e4bb3..226b29c36dc 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -23,7 +23,8 @@ MergingSortedTransform::MergingSortedTransform( bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) - : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, have_all_inputs_) + : IMergingTransform(num_inputs, header, header, have_all_inputs_) + , merged_data(header, use_average_block_sizes, max_block_size) , description(std::move(description_)) , limit(limit_) , quiet(quiet_) @@ -103,6 +104,8 @@ void MergingSortedTransform::work() merge(queue_with_collation); else merge(queue_without_collation); + + prepareOutputChunk(merged_data); } template @@ -114,7 +117,7 @@ void MergingSortedTransform::merge(TSortingHeap & queue) if (limit && merged_data.totalMergedRows() >= limit) { //std::cerr << "Limit reached\n"; - finish(); + is_finished = true; return false; } @@ -179,13 +182,13 @@ void MergingSortedTransform::merge(TSortingHeap & queue) requestDataForInput(current.impl->order); if (limit && merged_data.totalMergedRows() >= limit) - finish(); + is_finished = true; return; } } - finish(); + is_finished = true; } void MergingSortedTransform::insertFromChunk(size_t source_num) @@ -202,7 +205,7 @@ void MergingSortedTransform::insertFromChunk(size_t source_num) { num_rows = total_merged_rows_after_insertion - limit; merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); - finish(); + is_finished = true; } else { diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.h b/dbms/src/Processors/Merges/MergingSortedTransform.h index f8e3e65951f..49bfe228106 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.h +++ b/dbms/src/Processors/Merges/MergingSortedTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -26,13 +27,15 @@ public: String getName() const override { return "MergingSortedTransform"; } void work() override; -private: - +protected: void onNewInput() override; void initializeInputs() override; void consume(Chunk chunk, size_t input_number) override; void onFinish() override; +private: + MergedData merged_data; + /// Settings SortDescription description; UInt64 limit; diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp index 18fb9f98221..2de67707d6d 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -15,7 +15,8 @@ ReplacingSortedTransform::ReplacingSortedTransform( size_t max_block_size, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) + : IMergingTransform(num_inputs, header, header, true) + , merged_data(header, use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) , chunk_allocator(num_inputs + max_row_refs) @@ -86,6 +87,7 @@ void ReplacingSortedTransform::insertRow() void ReplacingSortedTransform::work() { merge(); + prepareOutputChunk(merged_data); } void ReplacingSortedTransform::merge() @@ -148,7 +150,7 @@ void ReplacingSortedTransform::merge() if (!selected_row.empty()) insertRow(); - finish(); + is_finished = true; } } diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.h b/dbms/src/Processors/Merges/ReplacingSortedTransform.h index 85fd0d23fe4..4f4b71c5b13 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.h +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,8 @@ protected: private: Logger * log = &Logger::get("ReplacingSortedTransform"); + MergedData merged_data; + SortDescription description; ssize_t version_column_number = -1; diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp index bc19931e9cc..9191123c878 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -13,7 +13,8 @@ VersionedCollapsingTransform::VersionedCollapsingTransform( size_t max_block_size, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) - : IMergingTransform(num_inputs, header, header, max_block_size, use_average_block_sizes, true) + : IMergingTransform(num_inputs, header, header, true) + , merged_data(header, use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 1) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer @@ -69,6 +70,7 @@ void VersionedCollapsingTransform::updateCursor(Chunk chunk, size_t source_num) void VersionedCollapsingTransform::work() { merge(); + prepareOutputChunk(merged_data); } inline ALWAYS_INLINE static void writeRowSourcePart(WriteBuffer & buffer, RowSourcePart row_source) @@ -193,7 +195,7 @@ void VersionedCollapsingTransform::merge() /// Write information about last collapsed rows. insertGap(current_keys.frontGap()); - finish(); + is_finished = true; } diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h index 4be0eb41438..722bd30feca 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -13,162 +15,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/* Deque with fixed memory size. Allows pushing gaps. - * frontGap() returns the number of gaps were inserted before front. - * - * This structure may be implemented via std::deque, but - * - Deque uses fixed amount of memory which is allocated in constructor. No more allocations are performed. - * - Gaps are not stored as separate values in queue, which is more memory efficient. - * - Deque is responsible for gaps invariant: after removing element, moves gaps into neighbor cell. - * - * Note: empty deque may have non-zero front gap. - */ -template -class FixedSizeDequeWithGaps -{ -public: - - struct ValueWithGap - { - /// The number of gaps before current element. The number of gaps after last element stores into end cell. - size_t gap; - /// Store char[] instead of T in order to make ValueWithGap POD. - /// Call placement constructors after push and and destructors after pop. - char value[sizeof(T)]; - }; - - explicit FixedSizeDequeWithGaps(size_t size) - { - container.resize_fill(size + 1); - } - - ~FixedSizeDequeWithGaps() - { - auto destruct_range = [this](size_t from, size_t to) - { - for (size_t i = from; i < to; ++i) - destructValue(i); - }; - - if (begin <= end) - destruct_range(begin, end); - else - { - destruct_range(0, end); - destruct_range(begin, container.size()); - } - } - - void pushBack(const T & value) - { - checkEnoughSpaceToInsert(); - constructValue(end, value); - moveRight(end); - container[end].gap = 0; - } - - void pushGap(size_t count) { container[end].gap += count; } - - void popBack() - { - checkHasValuesToRemove(); - size_t curr_gap = container[end].gap; - moveLeft(end); - destructValue(end); - container[end].gap += curr_gap; - } - - void popFront() - { - checkHasValuesToRemove(); - destructValue(begin); - moveRight(begin); - } - - T & front() - { - checkHasValuesToGet(); - return getValue(begin); - } - const T & front() const - { - checkHasValuesToGet(); - return getValue(begin); - } - - const T & back() const - { - size_t ps = end; - moveLeft(ps); - return getValue(ps); - } - - size_t & frontGap() { return container[begin].gap; } - const size_t & frontGap() const { return container[begin].gap; } - - size_t size() const - { - if (begin <= end) - return end - begin; - return end + (container.size() - begin); - } - - bool empty() const { return begin == end; } - -private: - PODArray container; - - size_t gap_before_first = 0; - size_t begin = 0; - size_t end = 0; - - void constructValue(size_t index, const T & value) { new (container[index].value) T(value); } - void destructValue(size_t index) { reinterpret_cast(container[index].value)->~T(); } - - T & getValue(size_t index) { return *reinterpret_cast(container[index].value); } - const T & getValue(size_t index) const { return *reinterpret_cast(container[index].value); } - - void moveRight(size_t & index) const - { - ++index; - - if (index == container.size()) - index = 0; - } - - void moveLeft(size_t & index) const - { - if (index == 0) - index = container.size(); - - --index; - } - - void checkEnoughSpaceToInsert() const - { - if (size() + 1 == container.size()) - throw Exception("Not enough space to insert into FixedSizeDequeWithGaps with capacity " - + std::to_string(container.size() - 1), ErrorCodes::LOGICAL_ERROR); - } - - void checkHasValuesToRemove() const - { - if (empty()) - throw Exception("Cannot remove from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); - } - - void checkHasValuesToGet() const - { - if (empty()) - throw Exception("Cannot get value from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); - } -}; - class VersionedCollapsingTransform : public IMergingTransform { public: @@ -190,6 +36,8 @@ protected: private: Logger * log = &Logger::get("VersionedCollapsingTransform"); + MergedData merged_data; + SortDescription description; size_t sign_column_number = 0; From 67073f8b100032969deaa335deb94db7ac2fd8ff Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 31 Mar 2020 13:44:06 +0300 Subject: [PATCH 027/752] Fix build. --- dbms/src/Processors/Merges/IMergingTransform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index 0838dda6323..7c1b58d30f5 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { From 5f5eb58abe767938c32eeda8a36c933d9a991ddf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 31 Mar 2020 17:11:58 +0300 Subject: [PATCH 028/752] Add SummingSortedTransform [part 1] --- dbms/src/Processors/Merges/MergedData.h | 2 +- .../Merges/SummingSortedTransform.cpp | 264 ++++++++++++++++++ .../Merges/SummingSortedTransform.h | 101 +++++++ 3 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Processors/Merges/SummingSortedTransform.cpp create mode 100644 dbms/src/Processors/Merges/SummingSortedTransform.h diff --git a/dbms/src/Processors/Merges/MergedData.h b/dbms/src/Processors/Merges/MergedData.h index 6f66d584d91..e5a8a541aa5 100644 --- a/dbms/src/Processors/Merges/MergedData.h +++ b/dbms/src/Processors/Merges/MergedData.h @@ -104,7 +104,7 @@ public: UInt64 totalChunks() const { return total_chunks; } UInt64 totalAllocatedBytes() const { return total_allocated_bytes; } -private: +protected: MutableColumns columns; UInt64 sum_blocks_granularity = 0; diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp new file mode 100644 index 00000000000..0c4052e821d --- /dev/null +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ +bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) +{ + for (auto & desc : description) + if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) + return true; + + return false; +} + +/// Returns true if merge result is not empty +bool mergeMap(const SummingSortedTransform::MapDescription & desc, Row & row, SortCursor & cursor) +{ + /// Strongly non-optimal. + + Row & left = row; + Row right(left.size()); + + for (size_t col_num : desc.key_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + for (size_t col_num : desc.val_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & + { + return matrix[i].get()[j]; + }; + + auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array + { + size_t size = col_nums.size(); + Array res(size); + for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) + res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); + return res; + }; + + std::map merged; + + auto accumulate = [](Array & dst, const Array & src) + { + bool has_non_zero = false; + size_t size = dst.size(); + for (size_t i = 0; i < size; ++i) + if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) + has_non_zero = true; + return has_non_zero; + }; + + auto merge = [&](const Row & matrix) + { + size_t rows = matrix[desc.key_col_nums[0]].get().size(); + + for (size_t j = 0; j < rows; ++j) + { + Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); + Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); + + auto it = merged.find(key); + if (merged.end() == it) + merged.emplace(std::move(key), std::move(value)); + else + { + if (!accumulate(it->second, value)) + merged.erase(it); + } + } + }; + + merge(left); + merge(right); + + for (size_t col_num : desc.key_col_nums) + row[col_num] = Array(merged.size()); + for (size_t col_num : desc.val_col_nums) + row[col_num] = Array(merged.size()); + + size_t row_num = 0; + for (const auto & key_value : merged) + { + for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + + for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + + ++row_num; + } + + return row_num != 0; +} +} + +SummingSortedTransform::SummingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size) + : IMergingTransform(num_inputs, header, header, true) +{ + size_t num_columns = header.columns(); + current_row.resize(num_columns); + + /// name of nested structure -> the column numbers that refer to it. + std::unordered_map> discovered_maps; + + /** Fill in the column numbers, which must be summed. + * This can only be numeric columns that are not part of the sort key. + * If a non-empty column_names_to_sum is specified, then we only take these columns. + * Some columns from column_names_to_sum may not be found. This is ignored. + */ + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// Discover nested Maps and find columns for summation + if (typeid_cast(column.type.get())) + { + const auto map_name = Nested::extractTableName(column.name); + /// if nested table name ends with `Map` it is a possible candidate for special handling + if (map_name == column.name || !endsWith(map_name, "Map")) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + discovered_maps[map_name].emplace_back(i); + } + else + { + bool is_agg_func = WhichDataType(column.type).isAggregateFunction(); + + /// There are special const columns for example after prewere sections. + if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column)) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + /// Are they inside the PK? + if (isInPrimaryKey(description, column.name, i)) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + if (column_names_to_sum.empty() + || column_names_to_sum.end() != + std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) + { + // Create aggregator to sum this column + AggregateDescription desc; + desc.is_agg_func_type = is_agg_func; + desc.column_numbers = {i}; + + if (!is_agg_func) + { + desc.init("sumWithOverflow", {column.type}); + } + + columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Column is not going to be summed, use last value + column_numbers_not_to_aggregate.push_back(i); + } + } + } + + /// select actual nested Maps from list of candidates + for (const auto & map : discovered_maps) + { + /// map should contain at least two elements (key -> value) + if (map.second.size() < 2) + { + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + continue; + } + + /// no elements of map could be in primary key + auto column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) + break; + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + continue; + } + + DataTypes argument_types; + AggregateDescription desc; + MapDescription map_desc; + + column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + { + const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); + const String & name = key_col.name; + const IDataType & nested_type = *static_cast(key_col.type.get())->getNestedType(); + + if (column_num_it == map.second.begin() + || endsWith(name, "ID") + || endsWith(name, "Key") + || endsWith(name, "Type")) + { + if (!nested_type.isValueRepresentedByInteger() && !isStringOrFixedString(nested_type)) + break; + + map_desc.key_col_nums.push_back(*column_num_it); + } + else + { + if (!nested_type.isSummable()) + break; + + map_desc.val_col_nums.push_back(*column_num_it); + } + + // Add column to function arguments + desc.column_numbers.push_back(*column_num_it); + argument_types.push_back(key_col.type); + } + + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + continue; + } + + if (map_desc.key_col_nums.size() == 1) + { + // Create summation for all value columns in the map + desc.init("sumMapWithOverflow", argument_types); + columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Fall back to legacy mergeMaps for composite keys + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + maps_to_sum.emplace_back(std::move(map_desc)); + } + } +} + +} diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h new file mode 100644 index 00000000000..f82c1f9e6db --- /dev/null +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + + + +class SummingSortedTransform : public IMergingTransform +{ +public: + + SummingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size); + + /// Stores aggregation function, state, and columns to be used as function arguments + struct AggregateDescription + { + /// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing. + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + std::vector column_numbers; + IColumn * merged_column = nullptr; + AlignedBuffer state; + bool created = false; + + /// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above. + bool is_agg_func_type = false; + + void init(const char * function_name, const DataTypes & argument_types) + { + function = AggregateFunctionFactory::instance().get(function_name, argument_types); + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); + } + + void createState() + { + if (created) + return; + if (is_agg_func_type) + merged_column->insertDefault(); + else + function->create(state.data()); + created = true; + } + + void destroyState() + { + if (!created) + return; + if (!is_agg_func_type) + function->destroy(state.data()); + created = false; + } + + /// Explicitly destroy aggregation state if the stream is terminated + ~AggregateDescription() + { + destroyState(); + } + + AggregateDescription() = default; + AggregateDescription(AggregateDescription &&) = default; + AggregateDescription(const AggregateDescription &) = delete; + }; + + struct SummingMergedData : public MergedData + { + public: + + }; + + /// Stores numbers of key-columns and value-columns. + struct MapDescription + { + std::vector key_col_nums; + std::vector val_col_nums; + }; + +private: + /// Columns with which values should be summed. + ColumnNumbers column_numbers_not_to_aggregate; + + std::vector columns_to_aggregate; + std::vector maps_to_sum; +}; + +} From abae7dfffec2618cc5acb87371e13635c0eea8f1 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 31 Mar 2020 16:14:59 +0300 Subject: [PATCH 029/752] Initial implementation of splitting string into Alpha-Num tokens with SIMD intrinsics. --- .../MergeTree/MergeTreeIndexFullText.cpp | 53 +++++++++++++++++++ .../tests/gtest_SplitTokenExtractor.cpp | 0 2 files changed, 53 insertions(+) create mode 100644 dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 4b3bd954496..5e4bf15418c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -606,8 +606,60 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size { *token_start = *pos; *token_len = 0; + while (*pos < len) { +#if __SSE2__ + // NOTE: we assume that `data` string is padded from the right with 15 zero-bytes. + const __m128i haystack = _mm_loadu_si128(reinterpret_cast(data + *pos)); + const size_t haystack_length = 16; + +#if __SSE4_2__ + // With the help of https://www.strchr.com/strcmp_and_strlen_using_sse_4.2 + static const auto alnum_chars_ranges = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'Z', 'A', 'z', 'a', '9', '0'); + // Every bit represents if `haystack` character is in the ranges (1) or not(0) + const auto result_bitmask = _mm_cvtsi128_si32(_mm_cmpestrm(alnum_chars_ranges, 6, haystack, haystack_length, _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS)); +#else + // NOTE: -1 and +1 required since SSE2 has no `>=` and `<=` instructions on packed 8-bit integers (epi8). + static const auto number_begin = _mm_set1_epi8('0' - 1); + static const auto number_end = _mm_set1_epi8('9' + 1); + static const auto alpha_lower_begin = _mm_set1_epi8('a' - 1); + static const auto alpha_lower_end = _mm_set1_epi8('z' + 1); + static const auto alpha_upper_begin = _mm_set1_epi8('A' - 1); + static const auto alpha_upper_end = _mm_set1_epi8('Z' + 1); + + // every bit represents if `haystack` character `c` statisfies condition: + // (c > '0' - 1 && c < '9' + 1) || (c > 'a' - 1 && c < 'z' + 1) || (c > 'A' - 1 && c < 'Z' + 1) + const int result_bitmask = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128( + _mm_and_si128(_mm_cmpgt_epi8(haystack, number_begin), _mm_cmplt_epi8(haystack, number_end)), + _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_lower_begin), _mm_cmplt_epi8(haystack, alpha_lower_end))), + _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_upper_begin), _mm_cmplt_epi8(haystack, alpha_upper_end)))); +#endif + // NOTE: __builtin_ctz family explicitly state that result is UNDEFINED if argument is 0 + if (result_bitmask == 0) + { + // end of token started on previous haystack + if (*token_len != 0) + return true; + + *pos += haystack_length; + continue; + } + + const auto start = getTrailingZeroBits(result_bitmask); + if (*token_len == 0) + *token_start = *pos + start; + + const auto l = getTrailingZeroBits(~(result_bitmask >> start)); + *token_len += l; + + *pos += start + l; + if (start + l == 16) + // check if there are leftovers in next `haystack` + continue; + + return true; +#else if (isASCII(data[*pos]) && !isAlphaNumericASCII(data[*pos])) { /// Finish current token if any @@ -621,6 +673,7 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size ++*pos; ++*token_len; } +#endif } return *token_len > 0; } diff --git a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp new file mode 100644 index 00000000000..e69de29bb2d From f3743552cedad4cd75207ed8b10b3d508e68ae59 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 31 Mar 2020 22:58:27 +0300 Subject: [PATCH 030/752] Add SummingSortedTransform [part 2] --- .../Merges/CollapsingSortedTransform.cpp | 2 +- dbms/src/Processors/Merges/MergedData.h | 7 +- .../Merges/MergingSortedTransform.cpp | 2 +- .../Merges/ReplacingSortedTransform.cpp | 2 +- .../Merges/SummingSortedTransform.cpp | 509 ++++++++++-------- .../Merges/SummingSortedTransform.h | 54 +- .../Merges/VersionedCollapsingTransform.cpp | 2 +- 7 files changed, 328 insertions(+), 250 deletions(-) diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp index 13a61d26caa..9fcb7cea116 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -25,7 +25,7 @@ CollapsingSortedTransform::CollapsingSortedTransform( WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : IMergingTransform(num_inputs, header, header, true) - , merged_data(header, use_average_block_sizes, max_block_size) + , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , sign_column_number(header.getPositionByName(sign_column)) , out_row_sources_buf(out_row_sources_buf_) diff --git a/dbms/src/Processors/Merges/MergedData.h b/dbms/src/Processors/Merges/MergedData.h index e5a8a541aa5..37dd3c62587 100644 --- a/dbms/src/Processors/Merges/MergedData.h +++ b/dbms/src/Processors/Merges/MergedData.h @@ -13,12 +13,9 @@ namespace ErrorCodes class MergedData { public: - explicit MergedData(const Block & header, bool use_average_block_size_, UInt64 max_block_size_) - : max_block_size(max_block_size_), use_average_block_size(use_average_block_size_) + explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_) + : columns(std::move(columns_)), max_block_size(max_block_size_), use_average_block_size(use_average_block_size_) { - columns.reserve(header.columns()); - for (const auto & column : header) - columns.emplace_back(column.type->createColumn()); } /// Pull will be called at next prepare call. diff --git a/dbms/src/Processors/Merges/MergingSortedTransform.cpp b/dbms/src/Processors/Merges/MergingSortedTransform.cpp index 226b29c36dc..7b7e4fcf62c 100644 --- a/dbms/src/Processors/Merges/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/MergingSortedTransform.cpp @@ -24,7 +24,7 @@ MergingSortedTransform::MergingSortedTransform( bool use_average_block_sizes, bool have_all_inputs_) : IMergingTransform(num_inputs, header, header, have_all_inputs_) - , merged_data(header, use_average_block_sizes, max_block_size) + , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , limit(limit_) , quiet(quiet_) diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp index 2de67707d6d..65654a98764 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -16,7 +16,7 @@ ReplacingSortedTransform::ReplacingSortedTransform( WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : IMergingTransform(num_inputs, header, header, true) - , merged_data(header, use_average_block_sizes, max_block_size) + , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) , chunk_allocator(num_inputs + max_row_refs) diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index 0c4052e821d..45f3f9b71c9 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -4,261 +4,302 @@ #include #include #include +#include namespace DB { namespace { -bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) -{ - for (auto & desc : description) - if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) - return true; - - return false; -} - -/// Returns true if merge result is not empty -bool mergeMap(const SummingSortedTransform::MapDescription & desc, Row & row, SortCursor & cursor) -{ - /// Strongly non-optimal. - - Row & left = row; - Row right(left.size()); - - for (size_t col_num : desc.key_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); - - for (size_t col_num : desc.val_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); - - auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & + bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) { - return matrix[i].get()[j]; - }; + for (auto & desc : description) + if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) + return true; - auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array - { - size_t size = col_nums.size(); - Array res(size); - for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) - res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); - return res; - }; - - std::map merged; - - auto accumulate = [](Array & dst, const Array & src) - { - bool has_non_zero = false; - size_t size = dst.size(); - for (size_t i = 0; i < size; ++i) - if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) - has_non_zero = true; - return has_non_zero; - }; - - auto merge = [&](const Row & matrix) - { - size_t rows = matrix[desc.key_col_nums[0]].get().size(); - - for (size_t j = 0; j < rows; ++j) - { - Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); - Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); - - auto it = merged.find(key); - if (merged.end() == it) - merged.emplace(std::move(key), std::move(value)); - else - { - if (!accumulate(it->second, value)) - merged.erase(it); - } - } - }; - - merge(left); - merge(right); - - for (size_t col_num : desc.key_col_nums) - row[col_num] = Array(merged.size()); - for (size_t col_num : desc.val_col_nums) - row[col_num] = Array(merged.size()); - - size_t row_num = 0; - for (const auto & key_value : merged) - { - for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; - - for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; - - ++row_num; + return false; } - return row_num != 0; -} + /// Returns true if merge result is not empty + bool mergeMap(const SummingSortedTransform::MapDescription & desc, Row & row, SortCursor & cursor) + { + /// Strongly non-optimal. + + Row & left = row; + Row right(left.size()); + + for (size_t col_num : desc.key_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + for (size_t col_num : desc.val_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & + { + return matrix[i].get()[j]; + }; + + auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array + { + size_t size = col_nums.size(); + Array res(size); + for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) + res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); + return res; + }; + + std::map merged; + + auto accumulate = [](Array & dst, const Array & src) + { + bool has_non_zero = false; + size_t size = dst.size(); + for (size_t i = 0; i < size; ++i) + if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) + has_non_zero = true; + return has_non_zero; + }; + + auto merge = [&](const Row & matrix) + { + size_t rows = matrix[desc.key_col_nums[0]].get().size(); + + for (size_t j = 0; j < rows; ++j) + { + Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); + Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); + + auto it = merged.find(key); + if (merged.end() == it) + merged.emplace(std::move(key), std::move(value)); + else + { + if (!accumulate(it->second, value)) + merged.erase(it); + } + } + }; + + merge(left); + merge(right); + + for (size_t col_num : desc.key_col_nums) + row[col_num] = Array(merged.size()); + for (size_t col_num : desc.val_col_nums) + row[col_num] = Array(merged.size()); + + size_t row_num = 0; + for (const auto & key_value : merged) + { + for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + + for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + + ++row_num; + } + + return row_num != 0; + } + + SummingSortedTransform::ColumnsDefinition defineColumns( + const Block & header, + const SortDescription & description, + const Names & column_names_to_sum) + { + size_t num_columns = header.columns(); + SummingSortedTransform::ColumnsDefinition def; + + /// name of nested structure -> the column numbers that refer to it. + std::unordered_map> discovered_maps; + + /** Fill in the column numbers, which must be summed. + * This can only be numeric columns that are not part of the sort key. + * If a non-empty column_names_to_sum is specified, then we only take these columns. + * Some columns from column_names_to_sum may not be found. This is ignored. + */ + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// Discover nested Maps and find columns for summation + if (typeid_cast(column.type.get())) + { + const auto map_name = Nested::extractTableName(column.name); + /// if nested table name ends with `Map` it is a possible candidate for special handling + if (map_name == column.name || !endsWith(map_name, "Map")) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + discovered_maps[map_name].emplace_back(i); + } + else + { + bool is_agg_func = WhichDataType(column.type).isAggregateFunction(); + + /// There are special const columns for example after prewhere sections. + if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column)) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + /// Are they inside the PK? + if (isInPrimaryKey(description, column.name, i)) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + if (column_names_to_sum.empty() + || column_names_to_sum.end() != + std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) + { + // Create aggregator to sum this column + SummingSortedTransform::AggregateDescription desc; + desc.is_agg_func_type = is_agg_func; + desc.column_numbers = {i}; + + if (!is_agg_func) + { + desc.init("sumWithOverflow", {column.type}); + } + + def.columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Column is not going to be summed, use last value + def.column_numbers_not_to_aggregate.push_back(i); + } + } + } + + /// select actual nested Maps from list of candidates + for (const auto & map : discovered_maps) + { + /// map should contain at least two elements (key -> value) + if (map.second.size() < 2) + { + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + continue; + } + + /// no elements of map could be in primary key + auto column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) + break; + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + continue; + } + + DataTypes argument_types; + SummingSortedTransform::AggregateDescription desc; + SummingSortedTransform::MapDescription map_desc; + + column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + { + const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); + const String & name = key_col.name; + const IDataType & nested_type = *assert_cast(*key_col.type).getNestedType(); + + if (column_num_it == map.second.begin() + || endsWith(name, "ID") + || endsWith(name, "Key") + || endsWith(name, "Type")) + { + if (!nested_type.isValueRepresentedByInteger() && !isStringOrFixedString(nested_type)) + break; + + map_desc.key_col_nums.push_back(*column_num_it); + } + else + { + if (!nested_type.isSummable()) + break; + + map_desc.val_col_nums.push_back(*column_num_it); + } + + // Add column to function arguments + desc.column_numbers.push_back(*column_num_it); + argument_types.push_back(key_col.type); + } + + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + continue; + } + + if (map_desc.key_col_nums.size() == 1) + { + // Create summation for all value columns in the map + desc.init("sumMapWithOverflow", argument_types); + def.columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Fall back to legacy mergeMaps for composite keys + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + def.maps_to_sum.emplace_back(std::move(map_desc)); + } + } + } + + MutableColumns getMergedDataColumns( + const Block & header, + const SummingSortedTransform::ColumnsDefinition & columns_definition) + { + MutableColumns columns; + columns.reserve(columns_definition.getNumColumns()); + + for (auto & desc : columns_definition.columns_to_aggregate) + { + // Wrap aggregated columns in a tuple to match function signature + if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) + { + size_t tuple_size = desc.column_numbers.size(); + MutableColumns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty(); + + columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); + } + else + columns.emplace_back(header.safeGetByPosition(desc.column_numbers[0]).column->cloneEmpty()); + } + + for (auto & column_number : columns_definition.column_numbers_not_to_aggregate) + columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); + + return columns; + } } SummingSortedTransform::SummingSortedTransform( size_t num_inputs, const Block & header, - SortDescription description, + SortDescription description_, /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. const Names & column_names_to_sum, size_t max_block_size) : IMergingTransform(num_inputs, header, header, true) + , columns_definition(defineColumns(header, description_, column_names_to_sum)) + , merged_data(getMergedDataColumns(header, columns_definition), false, max_block_size) { size_t num_columns = header.columns(); current_row.resize(num_columns); - - /// name of nested structure -> the column numbers that refer to it. - std::unordered_map> discovered_maps; - - /** Fill in the column numbers, which must be summed. - * This can only be numeric columns that are not part of the sort key. - * If a non-empty column_names_to_sum is specified, then we only take these columns. - * Some columns from column_names_to_sum may not be found. This is ignored. - */ - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnWithTypeAndName & column = header.safeGetByPosition(i); - - /// Discover nested Maps and find columns for summation - if (typeid_cast(column.type.get())) - { - const auto map_name = Nested::extractTableName(column.name); - /// if nested table name ends with `Map` it is a possible candidate for special handling - if (map_name == column.name || !endsWith(map_name, "Map")) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - discovered_maps[map_name].emplace_back(i); - } - else - { - bool is_agg_func = WhichDataType(column.type).isAggregateFunction(); - - /// There are special const columns for example after prewere sections. - if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column)) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - /// Are they inside the PK? - if (isInPrimaryKey(description, column.name, i)) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - if (column_names_to_sum.empty() - || column_names_to_sum.end() != - std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) - { - // Create aggregator to sum this column - AggregateDescription desc; - desc.is_agg_func_type = is_agg_func; - desc.column_numbers = {i}; - - if (!is_agg_func) - { - desc.init("sumWithOverflow", {column.type}); - } - - columns_to_aggregate.emplace_back(std::move(desc)); - } - else - { - // Column is not going to be summed, use last value - column_numbers_not_to_aggregate.push_back(i); - } - } - } - - /// select actual nested Maps from list of candidates - for (const auto & map : discovered_maps) - { - /// map should contain at least two elements (key -> value) - if (map.second.size() < 2) - { - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - continue; - } - - /// no elements of map could be in primary key - auto column_num_it = map.second.begin(); - for (; column_num_it != map.second.end(); ++column_num_it) - if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) - break; - if (column_num_it != map.second.end()) - { - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - continue; - } - - DataTypes argument_types; - AggregateDescription desc; - MapDescription map_desc; - - column_num_it = map.second.begin(); - for (; column_num_it != map.second.end(); ++column_num_it) - { - const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); - const String & name = key_col.name; - const IDataType & nested_type = *static_cast(key_col.type.get())->getNestedType(); - - if (column_num_it == map.second.begin() - || endsWith(name, "ID") - || endsWith(name, "Key") - || endsWith(name, "Type")) - { - if (!nested_type.isValueRepresentedByInteger() && !isStringOrFixedString(nested_type)) - break; - - map_desc.key_col_nums.push_back(*column_num_it); - } - else - { - if (!nested_type.isSummable()) - break; - - map_desc.val_col_nums.push_back(*column_num_it); - } - - // Add column to function arguments - desc.column_numbers.push_back(*column_num_it); - argument_types.push_back(key_col.type); - } - - if (column_num_it != map.second.end()) - { - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - continue; - } - - if (map_desc.key_col_nums.size() == 1) - { - // Create summation for all value columns in the map - desc.init("sumMapWithOverflow", argument_types); - columns_to_aggregate.emplace_back(std::move(desc)); - } - else - { - // Fall back to legacy mergeMaps for composite keys - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - maps_to_sum.emplace_back(std::move(map_desc)); - } - } } } diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h index f82c1f9e6db..20b49aa7ac8 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.h +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { @@ -20,7 +21,7 @@ public: SummingSortedTransform( size_t num_inputs, const Block & header, - SortDescription description, + SortDescription description_, /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. const Names & column_names_to_sum, size_t max_block_size); @@ -80,7 +81,7 @@ public: struct SummingMergedData : public MergedData { public: - + using MergedData::MergedData; }; /// Stores numbers of key-columns and value-columns. @@ -90,12 +91,51 @@ public: std::vector val_col_nums; }; -private: - /// Columns with which values should be summed. - ColumnNumbers column_numbers_not_to_aggregate; + struct ColumnsDefinition + { + /// Columns with which values should be summed. + ColumnNumbers column_numbers_not_to_aggregate; + /// Columns which should be aggregated. + std::vector columns_to_aggregate; + /// Mapping for nested columns. + std::vector maps_to_sum; - std::vector columns_to_aggregate; - std::vector maps_to_sum; + size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } + }; + +private: + Row current_row; + bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. + + ColumnsDefinition columns_definition; + SummingMergedData merged_data; + + SortDescription description; + + /// Chunks currently being merged. + std::vector source_chunks; + SortCursorImpls cursors; + + /// In merging algorithm, we need to compare current sort key with the last one. + /// So, sorting columns for last row needed to be stored. + /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). + Chunk last_chunk; + ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. + + struct RowRef + { + ColumnRawPtrs * sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. + UInt64 row_number = 0; + }; + + RowRef last_row; + + SortingHeap queue; + bool is_queue_initialized = false; + + void insertRow(); + void merge(); + void updateCursor(Chunk chunk, size_t source_num); }; } diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp index 9191123c878..4042c146724 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -14,7 +14,7 @@ VersionedCollapsingTransform::VersionedCollapsingTransform( WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : IMergingTransform(num_inputs, header, header, true) - , merged_data(header, use_average_block_sizes, max_block_size) + , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 1) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer From 61d6c61757be99add871888341c3ee3cbc0a154a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 31 Mar 2020 23:12:04 +0300 Subject: [PATCH 031/752] Try fix tests. --- dbms/src/Processors/Merges/IMergingTransform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Processors/Merges/IMergingTransform.cpp b/dbms/src/Processors/Merges/IMergingTransform.cpp index 7c1b58d30f5..2c5a7affd40 100644 --- a/dbms/src/Processors/Merges/IMergingTransform.cpp +++ b/dbms/src/Processors/Merges/IMergingTransform.cpp @@ -177,7 +177,10 @@ IProcessor::Status IMergingTransform::prepare() /// Push if has data. if (has_output_chunk && !is_port_full) + { output.push(std::move(output_chunk)); + has_output_chunk = false; + } if (!is_initialized) return prepareInitializeInputs(); From 2ecbf0b0bb2431593f05853ca6aa10ce74518f82 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 1 Apr 2020 14:45:02 +0300 Subject: [PATCH 032/752] Add SummingSortedTransform [part 3] --- .../Merges/SummingSortedTransform.cpp | 184 ++++++++++++++++++ .../Merges/SummingSortedTransform.h | 35 +++- 2 files changed, 217 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index 45f3f9b71c9..7ae127c2314 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -286,6 +286,43 @@ namespace return columns; } + + void finalizeChunk( + Chunk & chunk, size_t num_result_columns, + const SummingSortedTransform::ColumnsDefinition & columns_definition) + { + size_t num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + Columns res_columns(num_result_columns); + size_t next_column = 0; + + for (auto & desc : columns_definition.columns_to_aggregate) + { + auto column = std::move(columns[next_column]); + ++next_column; + + if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) + { + /// Unpack tuple into block. + size_t tuple_size = desc.column_numbers.size(); + for (size_t i = 0; i < tuple_size; ++i) + res_columns[desc.column_numbers[i]] = assert_cast(*column).getColumnPtr(i); + } + else + res_columns[desc.column_numbers[0]] = std::move(column); + } + + for (auto column_number : columns_definition.column_numbers_not_to_aggregate) + { + auto column = std::move(columns[next_column]); + ++next_column; + + res_columns[column_number] = std::move(column); + } + + chunk.setColumns(std::move(res_columns), num_rows); + } } SummingSortedTransform::SummingSortedTransform( @@ -302,4 +339,151 @@ SummingSortedTransform::SummingSortedTransform( current_row.resize(num_columns); } +void SummingSortedTransform::initializeInputs() +{ + queue = SortingHeap(cursors); + is_queue_initialized = true; +} + +void SummingSortedTransform::consume(Chunk chunk, size_t input_number) +{ + updateCursor(std::move(chunk), input_number); + + if (is_queue_initialized) + queue.push(cursors[input_number]); +} + +void SummingSortedTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk) + { + /// Extend lifetime of last chunk. + last_chunk = std::move(source_chunk); + last_chunk_sort_columns = std::move(cursors[source_num].all_columns); + + source_chunk = std::move(chunk); + cursors[source_num].reset(source_chunk.getColumns(), {}); + } + else + { + if (cursors[source_num].has_collation) + throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); + + source_chunk = std::move(chunk); + cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); + } +} + +void SummingSortedTransform::work() +{ + merge(); + prepareOutputChunk(merged_data); + + if (has_output_chunk) + finalizeChunk(output_chunk, getOutputs().back().getHeader().columns(), columns_definition); +} + +void SummingSortedTransform::merge() +{ + /// Take the rows in needed order and put them in `merged_columns` until rows no more than `max_block_size` + while (queue.isValid()) + { + bool key_differs; + bool has_previous_group = !last_key.empty(); + + SortCursor current = queue.current(); + + { + RowRef current_key; + current_key.set(current); + + if (!has_previous_group) /// The first key encountered. + { + key_differs = true; + current_row_is_zero = true; + } + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); + + last_key = current_key; + last_chunk_sort_columns.clear(); + } + + if (key_differs) + { + if (has_previous_group) + /// Write the data for the previous group. + insertCurrentRowIfNeeded(); + + if (merged_data.hasEnoughRows()) + { + /// The block is now full and the last row is calculated completely. + last_key.reset(); + return; + } + + setRow(current_row, current); + + /// Reset aggregation states for next row + for (auto & desc : columns_definition.columns_to_aggregate) + desc.createState(); + + // Start aggregations with current row + addRow(current); + + if (columns_definition.maps_to_sum.empty()) + { + /// We have only columns_to_aggregate. The status of current row will be determined + /// in 'insertCurrentRowIfNeeded' method on the values of aggregate functions. + current_row_is_zero = true; // NOLINT + } + else + { + /// We have complex maps that will be summed with 'mergeMap' method. + /// The single row is considered non zero, and the status after merging with other rows + /// will be determined in the branch below (when key_differs == false). + current_row_is_zero = false; // NOLINT + } + } + else + { + addRow(current); + + // Merge maps only for same rows + for (const auto & desc : columns_definition.maps_to_sum) + if (mergeMap(desc, current_row, current)) + current_row_is_zero = false; + } + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We get the next block from the corresponding source, if there is one. + queue.removeTop(); + requestDataForInput(current.impl->order); + return; + } + } + + /// We will write the data for the last group, if it is non-zero. + /// If it is zero, and without it the output stream will be empty, we will write it anyway. + insertCurrentRowIfNeeded(); + last_chunk_sort_columns.clear(); + is_finished = true; +} + + + } diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h index 20b49aa7ac8..662cc65d95d 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.h +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -103,6 +103,13 @@ public: size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } }; + String getName() const override { return "SummingSortedTransform"; } + void work() override; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + private: Row current_row; bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. @@ -125,10 +132,34 @@ private: struct RowRef { ColumnRawPtrs * sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. - UInt64 row_number = 0; + UInt64 row_num = 0; + + bool empty() const { return sort_columns == nullptr; } + void reset() { sort_columns = nullptr; } + + void set(SortCursor & cursor) + { + sort_columns = &cursor.impl->sort_columns; + row_num = cursor.impl->pos; + } + + bool hasEqualSortColumnsWith(const RowRef & other) + { + auto size = sort_columns->size(); + for (size_t col_number = 0; col_number < size; ++col_number) + { + auto & cur_column = (*sort_columns)[col_number]; + auto & other_column = (*other.sort_columns)[col_number]; + + if (0 != cur_column->compareAt(row_num, other.row_num, *other_column, 1)) + return false; + } + + return true; + } }; - RowRef last_row; + RowRef last_key; SortingHeap queue; bool is_queue_initialized = false; From 8e18f56d4f5eb233db7225623fd7d4b86d66797b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 1 Apr 2020 14:53:09 +0300 Subject: [PATCH 033/752] Try fix perftest distributed_aggregation.xml --- dbms/tests/performance/distributed_aggregation.xml | 12 ------------ .../config.d/perf-comparison-tweaks-config.xml | 2 ++ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/dbms/tests/performance/distributed_aggregation.xml b/dbms/tests/performance/distributed_aggregation.xml index 6fdd9fda7c5..25ba4169b9e 100644 --- a/dbms/tests/performance/distributed_aggregation.xml +++ b/dbms/tests/performance/distributed_aggregation.xml @@ -1,18 +1,6 @@ - - - 10 - 1000 - - - 50 - 60000 - - - select count() from (select sipHash64(zero) from zeros_mt(100000000) union all select sipHash64(zero) from zeros_mt(100000000)) select count(sipHash64(zero)) from remote('127.0.0.{{1,1}}', zeros_mt(100000000)) select count(sipHash64(zero)) from remote('127.0.0.{{1,2}}', zeros_mt(100000000)) select count(sipHash64(zero)) from remote('127.0.0.{{2,3}}', zeros_mt(100000000)) - diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml index 863a40718d9..a6d59fc3b4c 100644 --- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml @@ -1,4 +1,6 @@ + :: + true From 5b22a9596d6698ed8f794f25fa6604c2c6a99558 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 1 Apr 2020 15:16:39 +0300 Subject: [PATCH 034/752] fixup --- dbms/src/Columns/IColumn.cpp | 5 ++ dbms/src/Columns/IColumn.h | 1 + dbms/src/Core/Block.cpp | 16 +++--- dbms/src/Core/Block.h | 17 +++++- dbms/src/Interpreters/ActionsVisitor.cpp | 60 +++++++++++++++----- dbms/src/Interpreters/ActionsVisitor.h | 11 +++- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 30 +++++++++- 7 files changed, 115 insertions(+), 25 deletions(-) diff --git a/dbms/src/Columns/IColumn.cpp b/dbms/src/Columns/IColumn.cpp index a3ed0885651..df56823b6aa 100644 --- a/dbms/src/Columns/IColumn.cpp +++ b/dbms/src/Columns/IColumn.cpp @@ -9,6 +9,11 @@ namespace DB { +Field IColumn::get(size_t n) const +{ + return (*this)[n]; +} + String IColumn::dumpStructure() const { WriteBufferFromOwnString res; diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 090537d6770..aa9455fc6cc 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -70,6 +70,7 @@ public: /// Returns value of n-th element in universal Field representation. /// Is used in rare cases, since creation of Field instance is expensive usually. virtual Field operator[](size_t n) const = 0; + Field get(size_t n) const; /// Like the previous one, but avoids extra copying if Field is in a container, for example. virtual void get(size_t n, Field & res) const = 0; diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index a18d34af994..bc29a74f4eb 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -181,25 +181,25 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const } -ColumnWithTypeAndName & Block::getByName(const std::string & name) +const ColumnWithTypeAndName * Block::findByName(const std::string & name) const { auto it = index_by_name.find(name); if (index_by_name.end() == it) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - - return data[it->second]; + { + return nullptr; + } + return &data[it->second]; } const ColumnWithTypeAndName & Block::getByName(const std::string & name) const { - auto it = index_by_name.find(name); - if (index_by_name.end() == it) + auto * result = findByName(name); + if (!result) throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - return data[it->second]; + return *result; } diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 82b60c83efb..ce804ddc0b5 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -28,7 +28,7 @@ class Block { private: using Container = ColumnsWithTypeAndName; - using IndexByName = std::map; + using IndexByName = std::unordered_map; Container data; IndexByName index_by_name; @@ -64,7 +64,20 @@ public: ColumnWithTypeAndName & safeGetByPosition(size_t position); const ColumnWithTypeAndName & safeGetByPosition(size_t position) const; - ColumnWithTypeAndName & getByName(const std::string & name); + ColumnWithTypeAndName* findByName(const std::string & name) + { + return const_cast( + const_cast(this)->findByName(name)); + } + + const ColumnWithTypeAndName* findByName(const std::string & name) const; + + ColumnWithTypeAndName & getByName(const std::string & name) + { + return const_cast( + const_cast(this)->getByName(name)); + } + const ColumnWithTypeAndName & getByName(const std::string & name) const; Container::iterator begin() { return data.begin(); } diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index eb92ee76973..94c6cc000f9 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -195,17 +195,17 @@ SetPtr makeExplicitSet( return set; } -static String getUniqueName(const Block & block, const String & prefix) +static String getUniqueName(ActionsVisitor::Data & data, const String & prefix) { + auto & block = data.getSampleBlock(); auto result = prefix; if (block.has(result)) { - int i = 1; do { - result = prefix + "_" + toString(i); - ++i; + result = prefix + "_" + toString(data.next_unique_suffix); + ++data.next_unique_suffix; } while (block.has(result)); } @@ -468,7 +468,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, /// so that sets with the same literal representation do not fuse together (they can have different types). if (!prepared_set->empty()) - column.name = getUniqueName(data.getSampleBlock(), "__set"); + column.name = getUniqueName(data, "__set"); else column.name = child->getColumnName(); @@ -496,7 +496,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & ColumnWithTypeAndName column( ColumnConst::create(std::move(column_string), 1), std::make_shared(), - getUniqueName(data.getSampleBlock(), "__joinGet")); + getUniqueName(data, "__joinGet")); data.addAction(ExpressionAction::addColumn(column)); argument_types.push_back(column.type); argument_names.push_back(column.name); @@ -577,7 +577,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & /// We can not name `getColumnName()`, /// because it does not uniquely define the expression (the types of arguments can be different). - String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda"); + String lambda_name = getUniqueName(data, "__lambda"); auto function_capture = std::make_unique( lambda_actions, captured, lambda_arguments, result_type, result_name); @@ -612,16 +612,50 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, Data & data) { DataTypePtr type = applyVisitor(FieldToDataType(), literal.value); + const auto value = convertFieldToType(literal.value, *type); + + // FIXME why do we have a second pass with a clean sample block over the same + // AST here? Anyway, do not modify the column name if it is set already. + if (literal.unique_column_name.empty()) + { + const auto default_name = literal.getColumnName(); + auto & block = data.getSampleBlock(); + auto * existing_column = block.findByName(default_name); + + /* + * To approximate CSE, build all identical literals to a single temporary + * columns. We try to find the column by its default name, but after that + * we have to check that it contains the correct data. This might not be + * the case if it is a user-supplied column, or it is from under a join, + * etc. + * Overall, this is a hack around a generally poor name-based notion of + * column identity we currently use. + */ + if (existing_column + && existing_column->column + && isColumnConst(*existing_column->column) + && existing_column->column->size() == 1 + && existing_column->column->get(0) == value) + { + const_cast(literal).unique_column_name = default_name; + } + else + { + const_cast(literal).unique_column_name + = getUniqueName(data, default_name); + } + } + + if (data.hasColumn(literal.unique_column_name)) + { + return; + } ColumnWithTypeAndName column; - column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type)); + column.name = literal.unique_column_name; + column.column = type->createColumnConst(1, value); column.type = type; - // Always create columns for literals with a unique name. Otherwise, there - // may be some weird clashes, see 01101_literal_column_clash. - column.name = getUniqueName(data.getSampleBlock(), literal.getColumnName()); - const_cast(literal).unique_column_name = column.name; - data.addAction(ExpressionAction::addColumn(column)); } diff --git a/dbms/src/Interpreters/ActionsVisitor.h b/dbms/src/Interpreters/ActionsVisitor.h index f6db551ff33..e67e181e009 100644 --- a/dbms/src/Interpreters/ActionsVisitor.h +++ b/dbms/src/Interpreters/ActionsVisitor.h @@ -42,6 +42,7 @@ struct ScopeStack const Context & context; +public: ScopeStack(const ExpressionActionsPtr & actions, const Context & context_); void pushLevel(const NamesAndTypesList & input_columns); @@ -80,6 +81,13 @@ public: size_t visit_depth; ScopeStack actions_stack; + /* + * Remember the last unique column suffix to avoid quadratic behavior + * when we add lots of column with same prefix. One counter for all + * prefixes is good enough. + */ + int next_unique_suffix; + Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, @@ -95,7 +103,8 @@ public: only_consts(only_consts_), no_storage_or_local(no_storage_or_local_), visit_depth(0), - actions_stack(actions, context) + actions_stack(actions, context), + next_unique_suffix(actions_stack.getSampleBlock().columns() + 1) {} void updateActions(ExpressionActionsPtr & actions) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index cad4b3bd188..dc362542df9 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -855,7 +855,35 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & String result_name = ast->getAliasOrColumnName(); if (required_result_columns.empty() || required_result_columns.count(result_name)) { - result_columns.emplace_back(ast->getColumnName(), result_name); + std::string source_name = ast->getColumnName(); + + /* + * For temporary columns created by ExpressionAnalyzer for literals, + * use the correct source column. Using the default display name + * returned by getColumnName is not enough, and we have to use the + * column id set by EA. In principle, this logic applies to all kinds + * of columns, not only literals. Literals are especially problematic + * for two reasons: + * 1) confusing different literal columns leads to weird side + * effects (see 01101_literal_columns_clash); + * 2) the disambiguation mechanism in SyntaxAnalyzer, that, among + * other things, creates unique aliases for columns with same + * names from different tables, is applied before these temporary + * columns are created by ExpressionAnalyzer. + * Similar problems should also manifest for function columns, which + * are likewise created at a later stage by EA. + * In general, we need to have explicit separation between display + * names and identifiers for columns. This code is a workaround for + * a particular subclass of problems, and not a proper solution. + */ + if (auto as_literal = dynamic_cast(ast.get()); + as_literal) + { + source_name = as_literal->unique_column_name; + assert(!source_name.empty()); + } + + result_columns.emplace_back(source_name, result_name); step.required_output.push_back(result_columns.back().second); } } From ed5c6bff5eca46c8a0050b089759d80d814021d0 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 1 Apr 2020 15:21:13 +0300 Subject: [PATCH 035/752] fixpu --- dbms/src/Interpreters/ActionsVisitor.cpp | 2 +- .../queries/0_stateless/01101_literal_column_clash.reference | 2 ++ dbms/tests/queries/0_stateless/01101_literal_column_clash.sql | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 94c6cc000f9..30f3fc754b4 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -623,7 +623,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, auto * existing_column = block.findByName(default_name); /* - * To approximate CSE, build all identical literals to a single temporary + * To approximate CSE, bind all identical literals to a single temporary * columns. We try to find the column by its default name, but after that * we have to check that it contains the correct data. This might not be * the case if it is a user-supplied column, or it is from under a join, diff --git a/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference b/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference index 0dc94464bfc..5cbe1f1eea3 100644 --- a/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference +++ b/dbms/tests/queries/0_stateless/01101_literal_column_clash.reference @@ -1,3 +1,5 @@ 1 7 3 xyzabc 2 +1 3 +1 2 0 0 diff --git a/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql b/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql index 11e3b622277..3a2a71f126a 100644 --- a/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql +++ b/dbms/tests/queries/0_stateless/01101_literal_column_clash.sql @@ -6,5 +6,8 @@ join (select '1' as sid) as t2 on t2.sid = cast(t1.iid as String); -- even simpler cases select cast(7 as String), * from (select 3 "'String'"); SELECT concat('xyz', 'abc'), * FROM (SELECT 2 AS "'xyz'"); +with 3 as "1" select 1, "1"; +-- https://github.com/ClickHouse/ClickHouse/issues/9953 +select 1, * from (select 2 x) a left join (select 1, 3 y) b on y = x; From f4e4aeda7fb7f47586ee53cad9fb3dae0b0969cf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 1 Apr 2020 17:08:48 +0300 Subject: [PATCH 036/752] Add SummingSortedTransform [part 4] --- .../Merges/CollapsingSortedTransform.cpp | 2 + .../Merges/ReplacingSortedTransform.cpp | 2 + .../Merges/SummingSortedTransform.cpp | 171 ++++++++++++++++-- .../Merges/SummingSortedTransform.h | 141 +++++++++------ .../Merges/VersionedCollapsingTransform.cpp | 2 + 5 files changed, 241 insertions(+), 77 deletions(-) diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp index 9fcb7cea116..009aed0983f 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -29,6 +29,8 @@ CollapsingSortedTransform::CollapsingSortedTransform( , description(std::move(description_)) , sign_column_number(header.getPositionByName(sign_column)) , out_row_sources_buf(out_row_sources_buf_) + , source_chunks(num_inputs) + , cursors(num_inputs) , chunk_allocator(num_inputs + max_row_refs) { } diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp index 65654a98764..e39b33a5a46 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -19,6 +19,8 @@ ReplacingSortedTransform::ReplacingSortedTransform( , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) + , source_chunks(num_inputs) + , cursors(num_inputs) , chunk_allocator(num_inputs + max_row_refs) { if (!version_column.empty()) diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index 7ae127c2314..87bf533d5c5 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -1,14 +1,23 @@ #include + #include #include -#include -#include -#include +#include #include +#include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CORRUPTED_DATA; +} + namespace { bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) @@ -159,7 +168,7 @@ namespace std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) { // Create aggregator to sum this column - SummingSortedTransform::AggregateDescription desc; + detail::AggregateDescription desc; desc.is_agg_func_type = is_agg_func; desc.column_numbers = {i}; @@ -202,7 +211,7 @@ namespace } DataTypes argument_types; - SummingSortedTransform::AggregateDescription desc; + detail::AggregateDescription desc; SummingSortedTransform::MapDescription map_desc; column_num_it = map.second.begin(); @@ -323,20 +332,52 @@ namespace chunk.setColumns(std::move(res_columns), num_rows); } + + void setRow(Row & row, SortCursor & cursor, const Block & header) + { + size_t num_columns = row.size(); + for (size_t i = 0; i < num_columns; ++i) + { + try + { + cursor->all_columns[i]->get(cursor->pos, row[i]); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + + /// Find out the name of the column and throw more informative exception. + + String column_name; + if (i < header.columns()) + { + column_name = header.safeGetByPosition(i).name; + break; + } + + throw Exception("MergingSortedBlockInputStream failed to read row " + toString(cursor->pos) + + " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"), + ErrorCodes::CORRUPTED_DATA); + } + } + } } SummingSortedTransform::SummingSortedTransform( - size_t num_inputs, const Block & header, - SortDescription description_, - /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. - const Names & column_names_to_sum, - size_t max_block_size) - : IMergingTransform(num_inputs, header, header, true) - , columns_definition(defineColumns(header, description_, column_names_to_sum)) - , merged_data(getMergedDataColumns(header, columns_definition), false, max_block_size) + size_t num_inputs, const Block & header, + SortDescription description_, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size) + : IMergingTransform(num_inputs, header, header, true) + , columns_definition(defineColumns(header, description_, column_names_to_sum)) + , merged_data(getMergedDataColumns(header, columns_definition), false, max_block_size) + , description(std::move(description_)) + , source_chunks(num_inputs) + , cursors(num_inputs) { - size_t num_columns = header.columns(); - current_row.resize(num_columns); + current_row.resize(header.columns()); + merged_data.initAggregateDescription(columns_definition.columns_to_aggregate); } void SummingSortedTransform::initializeInputs() @@ -389,7 +430,103 @@ void SummingSortedTransform::work() prepareOutputChunk(merged_data); if (has_output_chunk) + { finalizeChunk(output_chunk, getOutputs().back().getHeader().columns(), columns_definition); + merged_data.initAggregateDescription(columns_definition.columns_to_aggregate); + } +} + +void SummingSortedTransform::insertCurrentRowIfNeeded() +{ + /// We have nothing to aggregate. It means that it could be non-zero, because we have columns_not_to_aggregate. + if (columns_definition.columns_to_aggregate.empty()) + current_row_is_zero = false; + + for (auto & desc : columns_definition.columns_to_aggregate) + { + // Do not insert if the aggregation state hasn't been created + if (desc.created) + { + if (desc.is_agg_func_type) + { + current_row_is_zero = false; + } + else + { + try + { + desc.function->insertResultInto(desc.state.data(), *desc.merged_column); + + /// Update zero status of current row + if (desc.column_numbers.size() == 1) + { + // Flag row as non-empty if at least one column number if non-zero + current_row_is_zero = current_row_is_zero && desc.merged_column->isDefaultAt(desc.merged_column->size() - 1); + } + else + { + /// It is sumMapWithOverflow aggregate function. + /// Assume that the row isn't empty in this case (just because it is compatible with previous version) + current_row_is_zero = false; + } + } + catch (...) + { + desc.destroyState(); + throw; + } + } + desc.destroyState(); + } + else + desc.merged_column->insertDefault(); + } + + /// If it is "zero" row, then rollback the insertion + /// (at this moment we need rollback only cols from columns_to_aggregate) + if (current_row_is_zero) + { + for (auto & desc : columns_definition.columns_to_aggregate) + desc.merged_column->popBack(1); + + return; + } + + merged_data.insertRow(current_row, columns_definition.column_numbers_not_to_aggregate); +} + +void SummingSortedTransform::addRow(SortCursor & cursor) +{ + for (auto & desc : columns_definition.columns_to_aggregate) + { + if (!desc.created) + throw Exception("Logical error in SummingSortedBlockInputStream, there are no description", ErrorCodes::LOGICAL_ERROR); + + if (desc.is_agg_func_type) + { + // desc.state is not used for AggregateFunction types + auto & col = cursor->all_columns[desc.column_numbers[0]]; + assert_cast(*desc.merged_column).insertMergeFrom(*col, cursor->pos); + } + else + { + // Specialized case for unary functions + if (desc.column_numbers.size() == 1) + { + auto & col = cursor->all_columns[desc.column_numbers[0]]; + desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr); + } + else + { + // Gather all source columns into a vector + ColumnRawPtrs columns(desc.column_numbers.size()); + for (size_t i = 0; i < desc.column_numbers.size(); ++i) + columns[i] = cursor->all_columns[desc.column_numbers[i]]; + + desc.add_function(desc.function.get(), desc.state.data(), columns.data(), cursor->pos, nullptr); + } + } + } } void SummingSortedTransform::merge() @@ -403,7 +540,7 @@ void SummingSortedTransform::merge() SortCursor current = queue.current(); { - RowRef current_key; + detail::RowRef current_key; current_key.set(current); if (!has_previous_group) /// The first key encountered. @@ -431,7 +568,7 @@ void SummingSortedTransform::merge() return; } - setRow(current_row, current); + setRow(current_row, current, getInputs().front().getHeader()); /// Reset aggregation states for next row for (auto & desc : columns_definition.columns_to_aggregate) diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h index 662cc65d95d..678ff6587a7 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.h +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -13,20 +13,9 @@ namespace DB { - - -class SummingSortedTransform : public IMergingTransform +namespace detail { -public: - - SummingSortedTransform( - size_t num_inputs, const Block & header, - SortDescription description_, - /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. - const Names & column_names_to_sum, - size_t max_block_size); - - /// Stores aggregation function, state, and columns to be used as function arguments + /// Stores aggregation function, state, and columns to be used as function arguments. struct AggregateDescription { /// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing. @@ -78,57 +67,31 @@ public: AggregateDescription(const AggregateDescription &) = delete; }; + /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. struct SummingMergedData : public MergedData { public: using MergedData::MergedData; + + void insertRow(const Row & row, const ColumnNumbers & column_numbers) + { + for (auto column_number :column_numbers) + columns[column_number]->insert(row[column_number]); + + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; + } + + /// Initialize aggregate descriptions with columns. + void initAggregateDescription(std::vector & columns_to_aggregate) + { + size_t num_columns = columns_to_aggregate.size(); + for (size_t column_number = 0; column_number < num_columns; ++column_number) + columns_to_aggregate[column_number].merged_column = columns[column_number].get(); + } }; - /// Stores numbers of key-columns and value-columns. - struct MapDescription - { - std::vector key_col_nums; - std::vector val_col_nums; - }; - - struct ColumnsDefinition - { - /// Columns with which values should be summed. - ColumnNumbers column_numbers_not_to_aggregate; - /// Columns which should be aggregated. - std::vector columns_to_aggregate; - /// Mapping for nested columns. - std::vector maps_to_sum; - - size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } - }; - - String getName() const override { return "SummingSortedTransform"; } - void work() override; - -protected: - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; - -private: - Row current_row; - bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. - - ColumnsDefinition columns_definition; - SummingMergedData merged_data; - - SortDescription description; - - /// Chunks currently being merged. - std::vector source_chunks; - SortCursorImpls cursors; - - /// In merging algorithm, we need to compare current sort key with the last one. - /// So, sorting columns for last row needed to be stored. - /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). - Chunk last_chunk; - ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. - struct RowRef { ColumnRawPtrs * sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. @@ -158,15 +121,73 @@ private: return true; } }; +} - RowRef last_key; +class SummingSortedTransform : public IMergingTransform +{ +public: + + SummingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size); + + /// Stores numbers of key-columns and value-columns. + struct MapDescription + { + std::vector key_col_nums; + std::vector val_col_nums; + }; + + struct ColumnsDefinition + { + /// Columns with which values should be summed. + ColumnNumbers column_numbers_not_to_aggregate; + /// Columns which should be aggregated. + std::vector columns_to_aggregate; + /// Mapping for nested columns. + std::vector maps_to_sum; + + size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } + }; + + String getName() const override { return "SummingSortedTransform"; } + void work() override; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + Row current_row; + bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. + + ColumnsDefinition columns_definition; + detail::SummingMergedData merged_data; + + SortDescription description; + + /// Chunks currently being merged. + std::vector source_chunks; + SortCursorImpls cursors; + + /// In merging algorithm, we need to compare current sort key with the last one. + /// So, sorting columns for last row needed to be stored. + /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). + Chunk last_chunk; + ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. + + detail::RowRef last_key; SortingHeap queue; bool is_queue_initialized = false; - void insertRow(); void merge(); void updateCursor(Chunk chunk, size_t source_num); + void addRow(SortCursor & cursor); + void insertCurrentRowIfNeeded(); }; } diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp index 4042c146724..8b8b2bfa063 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -17,6 +17,8 @@ VersionedCollapsingTransform::VersionedCollapsingTransform( , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) + , source_chunks(num_inputs) + , cursors(num_inputs) , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 1) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer , current_keys(max_rows_in_queue) , chunk_allocator(num_inputs + max_rows_in_queue + 1) /// +1 just in case (for current_row) From 77e6714031c1221f3f18c9be608c44adf96133a9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 1 Apr 2020 17:28:20 +0300 Subject: [PATCH 037/752] Fix build. --- dbms/src/Processors/Merges/SummingSortedTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index 87bf533d5c5..0741dc1cd10 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -265,6 +265,8 @@ namespace def.maps_to_sum.emplace_back(std::move(map_desc)); } } + + return def; } MutableColumns getMergedDataColumns( From a0f163c7618c6f90534a20eafc1573b82d23ef01 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 1 Apr 2020 21:00:26 +0300 Subject: [PATCH 038/752] Add AggregatingSortedTransform. --- .../Merges/AggregatingSortedTransform.cpp | 252 ++++++++++++++++++ .../Merges/AggregatingSortedTransform.h | 162 +++++++++++ .../Merges/CollapsingSortedTransform.h | 2 +- .../Merges/ReplacingSortedTransform.h | 4 +- dbms/src/Processors/Merges/RowRef.h | 56 ++-- .../Merges/SummingSortedTransform.cpp | 4 +- .../Merges/SummingSortedTransform.h | 217 +++++++-------- .../Merges/VersionedCollapsingTransform.h | 4 +- 8 files changed, 556 insertions(+), 145 deletions(-) create mode 100644 dbms/src/Processors/Merges/AggregatingSortedTransform.cpp create mode 100644 dbms/src/Processors/Merges/AggregatingSortedTransform.h diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp new file mode 100644 index 00000000000..23524dfc395 --- /dev/null +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -0,0 +1,252 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + AggregatingSortedTransform::ColumnsDefinition defineColumns( + const Block & header, const SortDescription & description) + { + AggregatingSortedTransform::ColumnsDefinition def = {}; + size_t num_columns = header.columns(); + + /// Fill in the column numbers that need to be aggregated. + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// We leave only states of aggregate functions. + if (!dynamic_cast(column.type.get()) + && !dynamic_cast(column.type->getCustomName())) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + /// Included into PK? + auto it = description.begin(); + for (; it != description.end(); ++it) + if (it->column_name == column.name || (it->column_name.empty() && it->column_number == i)) + break; + + if (it != description.end()) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + if (auto simple_aggr = dynamic_cast(column.type->getCustomName())) + { + auto type = recursiveRemoveLowCardinality(column.type); + if (type.get() == column.type.get()) + type = nullptr; + + // simple aggregate function + AggregatingSortedTransform::SimpleAggregateDescription desc(simple_aggr->getFunction(), i, type); + if (desc.function->allocatesMemoryInArena()) + def.allocates_memory_in_arena = true; + + def.columns_to_simple_aggregate.emplace_back(std::move(desc)); + } + else + { + // standard aggregate function + def.columns_to_aggregate.emplace_back(i); + } + } + } +} + +AggregatingSortedTransform::AggregatingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, size_t max_block_size) + : IMergingTransform(num_inputs, header, header, true) + , columns_definition(defineColumns(header, description_)) + , merged_data(header.cloneEmptyColumns(), false, max_block_size) + , description(std::move(description_)) + , source_chunks(num_inputs) + , cursors(num_inputs) +{ + merged_data.initAggregateDescription(columns_definition); +} + +void AggregatingSortedTransform::initializeInputs() +{ + queue = SortingHeap(cursors); + is_queue_initialized = true; +} + +void AggregatingSortedTransform::consume(Chunk chunk, size_t input_number) +{ + updateCursor(std::move(chunk), input_number); + + if (is_queue_initialized) + queue.push(cursors[input_number]); +} + +void AggregatingSortedTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + for (auto & desc : columns_definition.columns_to_simple_aggregate) + if (desc.type_to_convert) + columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk) + { + /// Extend lifetime of last chunk. + last_chunk = std::move(source_chunk); + last_chunk_sort_columns = std::move(cursors[source_num].all_columns); + + source_chunk = std::move(chunk); + cursors[source_num].reset(source_chunk.getColumns(), {}); + } + else + { + if (cursors[source_num].has_collation) + throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); + + source_chunk = std::move(chunk); + cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); + } +} + +void AggregatingSortedTransform::work() +{ + merge(); + prepareOutputChunk(merged_data); + + if (has_output_chunk) + { + size_t num_rows = output_chunk.getNumRows(); + auto columns = output_chunk.detachColumns(); + auto & header = getOutputs().back().getHeader(); + + for (auto & desc : columns_definition.columns_to_simple_aggregate) + { + if (desc.type_to_convert) + { + auto & from_type = header.getByPosition(desc.column_number).type; + auto & to_type = desc.type_to_convert; + columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type); + } + } + + output_chunk.setColumns(std::move(columns), num_rows); + + merged_data.initAggregateDescription(columns_definition); + } +} + +void AggregatingSortedTransform::merge() +{ + /// We take the rows in the correct order and put them in `merged_block`, while the rows are no more than `max_block_size` + while (queue.isValid()) + { + bool key_differs; + bool has_previous_group = !last_key.empty(); + + SortCursor current = queue.current(); + + { + detail::RowRef current_key; + current_key.set(current); + + if (!has_previous_group) /// The first key encountered. + key_differs = true; + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); + + last_key = current_key; + last_chunk_sort_columns.clear(); + } + + if (key_differs) + { + /// if there are enough rows accumulated and the last one is calculated completely + if (merged_data.hasEnoughRows()) + { + /// Write the simple aggregation result for the previous group. + insertSimpleAggregationResult(); + return; + } + + /// We will write the data for the group. We copy the values of ordinary columns. + merged_data.insertRow(current->all_columns, current->pos, + columns_definition.column_numbers_not_to_aggregate); + + /// Add the empty aggregation state to the aggregate columns. The state will be updated in the `addRow` function. + for (auto & column_to_aggregate : columns_definition.columns_to_aggregate) + column_to_aggregate.column->insertDefault(); + + /// Write the simple aggregation result for the previous group. + if (merged_data.mergedRows() > 0) + insertSimpleAggregationResult(); + + /// Reset simple aggregation states for next row + for (auto & desc : columns_definition.columns_to_simple_aggregate) + desc.createState(); + + if (columns_definition.allocates_memory_in_arena) + arena = std::make_unique(); + } + + addRow(current); + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We get the next block from the corresponding source, if there is one. + queue.removeTop(); + requestDataForInput(current.impl->order); + return; + } + } + + /// Write the simple aggregation result for the previous group. + if (merged_data.mergedRows() > 0) + insertSimpleAggregationResult(); + + last_chunk_sort_columns.clear(); + is_finished = true; +} + +void AggregatingSortedTransform::addRow(SortCursor & cursor) +{ + for (auto & desc : columns_definition.columns_to_aggregate) + desc.column->insertMergeFrom(*cursor->all_columns[desc.column_number], cursor->pos); + + for (auto & desc : columns_definition.columns_to_simple_aggregate) + { + auto & col = cursor->all_columns[desc.column_number]; + desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, arena.get()); + } +} + +void AggregatingSortedTransform::insertSimpleAggregationResult() +{ + for (auto & desc : columns_definition.columns_to_simple_aggregate) + { + desc.function->insertResultInto(desc.state.data(), *desc.column); + desc.destroyState(); + } +} + +} diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.h b/dbms/src/Processors/Merges/AggregatingSortedTransform.h new file mode 100644 index 00000000000..613ac0baa58 --- /dev/null +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.h @@ -0,0 +1,162 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +class ColumnAggregateFunction; + +class AggregatingSortedTransform : public IMergingTransform +{ +public: + AggregatingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, size_t max_block_size); + + struct SimpleAggregateDescription; + + struct ColumnsDefinition + { + struct AggregateDescription + { + ColumnAggregateFunction * column = nullptr; + const size_t column_number = 0; + + AggregateDescription() = default; + explicit AggregateDescription(size_t col_number) : column_number(col_number) {} + }; + + /// Columns with which numbers should not be aggregated. + ColumnNumbers column_numbers_not_to_aggregate; + std::vector columns_to_aggregate; + std::vector columns_to_simple_aggregate; + + /// Does SimpleAggregateFunction allocates memory in arena? + bool allocates_memory_in_arena = false; + }; + + String getName() const override { return "AggregatingSortedTransform"; } + void work() override; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + + /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. + struct AggregatingMergedData : public MergedData + { + public: + using MergedData::MergedData; + + void insertRow(const ColumnRawPtrs & raw_columns, size_t row, const ColumnNumbers & column_numbers) + { + for (auto column_number :column_numbers) + columns[column_number]->insertFrom(*raw_columns[column_number], row); + + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; + } + + /// Initialize aggregate descriptions with columns. + void initAggregateDescription(ColumnsDefinition & def) + { + for (auto & desc : def.columns_to_simple_aggregate) + desc.column = columns[desc.column_number].get(); + + for (auto & desc : def.columns_to_aggregate) + desc.column = typeid_cast(columns[desc.column_number].get()); + } + }; + + ColumnsDefinition columns_definition; + AggregatingMergedData merged_data; + + SortDescription description; + + /// Chunks currently being merged. + std::vector source_chunks; + SortCursorImpls cursors; + + /// In merging algorithm, we need to compare current sort key with the last one. + /// So, sorting columns for last row needed to be stored. + /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). + Chunk last_chunk; + ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. + + detail::RowRef last_key; + + SortingHeap queue; + bool is_queue_initialized = false; + + /// Memory pool for SimpleAggregateFunction + /// (only when allocates_memory_in_arena == true). + std::unique_ptr arena; + + void merge(); + void updateCursor(Chunk chunk, size_t source_num); + void addRow(SortCursor & cursor); + void insertSimpleAggregationResult(); + +public: + /// Stores information for aggregation of SimpleAggregateFunction columns + struct SimpleAggregateDescription + { + /// An aggregate function 'anyLast', 'sum'... + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + + size_t column_number = 0; + IColumn * column = nullptr; + const DataTypePtr type_to_convert; + + AlignedBuffer state; + bool created = false; + + SimpleAggregateDescription(AggregateFunctionPtr function_, const size_t column_number_, DataTypePtr type) + : function(std::move(function_)), column_number(column_number_), type_to_convert(std::move(type)) + { + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); + } + + void createState() + { + if (created) + return; + function->create(state.data()); + created = true; + } + + void destroyState() + { + if (!created) + return; + function->destroy(state.data()); + created = false; + } + + /// Explicitly destroy aggregation state if the stream is terminated + ~SimpleAggregateDescription() + { + destroyState(); + } + + SimpleAggregateDescription() = default; + SimpleAggregateDescription(SimpleAggregateDescription &&) = default; + SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; + }; +}; + +} diff --git a/dbms/src/Processors/Merges/CollapsingSortedTransform.h b/dbms/src/Processors/Merges/CollapsingSortedTransform.h index 58c97f964bc..46e3fb2e693 100644 --- a/dbms/src/Processors/Merges/CollapsingSortedTransform.h +++ b/dbms/src/Processors/Merges/CollapsingSortedTransform.h @@ -64,7 +64,7 @@ private: SortingHeap queue; bool is_queue_initialized = false; - using RowRef = detail::RowRef; + using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 4; /// first_negative, last_positive, last, current. RowRef first_negative_row; RowRef last_positive_row; diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.h b/dbms/src/Processors/Merges/ReplacingSortedTransform.h index 4f4b71c5b13..d28bd239cfe 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.h +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.h @@ -13,7 +13,7 @@ namespace DB { -class ReplacingSortedTransform : public IMergingTransform +class ReplacingSortedTransform final : public IMergingTransform { public: ReplacingSortedTransform( @@ -50,7 +50,7 @@ private: SortingHeap queue; bool is_queue_initialized = false; - using RowRef = detail::RowRef; + using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 3; /// last, current, selected. RowRef last_row; /// RowRef next_key; /// Primary key of next row. diff --git a/dbms/src/Processors/Merges/RowRef.h b/dbms/src/Processors/Merges/RowRef.h index 67d32d11277..ac4be79f560 100644 --- a/dbms/src/Processors/Merges/RowRef.h +++ b/dbms/src/Processors/Merges/RowRef.h @@ -103,10 +103,46 @@ inline void intrusive_ptr_release(SharedChunk * ptr) } /// This class represents a row in a chunk. -/// RowRef hold shared pointer to this chunk, possibly extending its life time. +struct RowRef +{ + ColumnRawPtrs * sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. + UInt64 row_num = 0; + + bool empty() const { return sort_columns == nullptr; } + void reset() { sort_columns = nullptr; } + + void set(SortCursor & cursor) + { + sort_columns = &cursor.impl->sort_columns; + row_num = cursor.impl->pos; + } + + static bool checkEquals(const ColumnRawPtrs * left, size_t left_row, const ColumnRawPtrs * right, size_t right_row) + { + auto size = left->size(); + for (size_t col_number = 0; col_number < size; ++col_number) + { + auto & cur_column = (*left)[col_number]; + auto & other_column = (*right)[col_number]; + + if (0 != cur_column->compareAt(left_row, right_row, *other_column, 1)) + return false; + } + + return true; + } + + bool hasEqualSortColumnsWith(const RowRef & other) + { + return checkEquals(sort_columns, row_num, other.sort_columns, other.row_num); + } +}; + +/// This class also represents a row in a chunk. +/// RowRefWithOwnedChunk hold shared pointer to this chunk, possibly extending its life time. /// It is needed, for example, in CollapsingTransform, where we need to store first negative row for current sort key. /// We do not copy data itself, because it may be potentially changed for each row. Performance for `set` is important. -struct RowRef +struct RowRefWithOwnedChunk { detail::SharedChunkPtr owned_chunk = nullptr; @@ -114,7 +150,7 @@ struct RowRef ColumnRawPtrs * sort_columns = nullptr; UInt64 row_num = 0; - void swap(RowRef & other) + void swap(RowRefWithOwnedChunk & other) { owned_chunk.swap(other.owned_chunk); std::swap(all_columns, other.all_columns); @@ -140,19 +176,9 @@ struct RowRef sort_columns = &owned_chunk->sort_columns; } - bool hasEqualSortColumnsWith(const RowRef & other) + bool hasEqualSortColumnsWith(const RowRefWithOwnedChunk & other) { - auto size = sort_columns->size(); - for (size_t col_number = 0; col_number < size; ++col_number) - { - auto & cur_column = (*sort_columns)[col_number]; - auto & other_column = (*other.sort_columns)[col_number]; - - if (0 != cur_column->compareAt(row_num, other.row_num, *other_column, 1)) - return false; - } - - return true; + return RowRef::checkEquals(sort_columns, row_num, other.sort_columns, other.row_num); } }; diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index 0741dc1cd10..99008025232 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -168,7 +168,7 @@ namespace std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) { // Create aggregator to sum this column - detail::AggregateDescription desc; + SummingSortedTransform::AggregateDescription desc; desc.is_agg_func_type = is_agg_func; desc.column_numbers = {i}; @@ -211,7 +211,7 @@ namespace } DataTypes argument_types; - detail::AggregateDescription desc; + SummingSortedTransform::AggregateDescription desc; SummingSortedTransform::MapDescription map_desc; column_num_it = map.second.begin(); diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h index 678ff6587a7..e7915cd3c8c 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.h +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -13,8 +14,100 @@ namespace DB { -namespace detail +class SummingSortedTransform final : public IMergingTransform { +public: + + SummingSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size); + + struct AggregateDescription; + + /// Stores numbers of key-columns and value-columns. + struct MapDescription + { + std::vector key_col_nums; + std::vector val_col_nums; + }; + + struct ColumnsDefinition + { + /// Columns with which values should be summed. + ColumnNumbers column_numbers_not_to_aggregate; + /// Columns which should be aggregated. + std::vector columns_to_aggregate; + /// Mapping for nested columns. + std::vector maps_to_sum; + + size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } + }; + + /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. + struct SummingMergedData : public MergedData + { + public: + using MergedData::MergedData; + + void insertRow(const Row & row, const ColumnNumbers & column_numbers) + { + for (auto column_number :column_numbers) + columns[column_number]->insert(row[column_number]); + + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; + } + + /// Initialize aggregate descriptions with columns. + void initAggregateDescription(std::vector & columns_to_aggregate) + { + size_t num_columns = columns_to_aggregate.size(); + for (size_t column_number = 0; column_number < num_columns; ++column_number) + columns_to_aggregate[column_number].merged_column = columns[column_number].get(); + } + }; + + String getName() const override { return "SummingSortedTransform"; } + void work() override; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + Row current_row; + bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. + + ColumnsDefinition columns_definition; + SummingMergedData merged_data; + + SortDescription description; + + /// Chunks currently being merged. + std::vector source_chunks; + SortCursorImpls cursors; + + /// In merging algorithm, we need to compare current sort key with the last one. + /// So, sorting columns for last row needed to be stored. + /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). + Chunk last_chunk; + ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. + + detail::RowRef last_key; + + SortingHeap queue; + bool is_queue_initialized = false; + + void merge(); + void updateCursor(Chunk chunk, size_t source_num); + void addRow(SortCursor & cursor); + void insertCurrentRowIfNeeded(); + +public: /// Stores aggregation function, state, and columns to be used as function arguments. struct AggregateDescription { @@ -66,128 +159,6 @@ namespace detail AggregateDescription(AggregateDescription &&) = default; AggregateDescription(const AggregateDescription &) = delete; }; - - /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. - struct SummingMergedData : public MergedData - { - public: - using MergedData::MergedData; - - void insertRow(const Row & row, const ColumnNumbers & column_numbers) - { - for (auto column_number :column_numbers) - columns[column_number]->insert(row[column_number]); - - ++total_merged_rows; - ++merged_rows; - /// TODO: sum_blocks_granularity += block_size; - } - - /// Initialize aggregate descriptions with columns. - void initAggregateDescription(std::vector & columns_to_aggregate) - { - size_t num_columns = columns_to_aggregate.size(); - for (size_t column_number = 0; column_number < num_columns; ++column_number) - columns_to_aggregate[column_number].merged_column = columns[column_number].get(); - } - }; - - struct RowRef - { - ColumnRawPtrs * sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. - UInt64 row_num = 0; - - bool empty() const { return sort_columns == nullptr; } - void reset() { sort_columns = nullptr; } - - void set(SortCursor & cursor) - { - sort_columns = &cursor.impl->sort_columns; - row_num = cursor.impl->pos; - } - - bool hasEqualSortColumnsWith(const RowRef & other) - { - auto size = sort_columns->size(); - for (size_t col_number = 0; col_number < size; ++col_number) - { - auto & cur_column = (*sort_columns)[col_number]; - auto & other_column = (*other.sort_columns)[col_number]; - - if (0 != cur_column->compareAt(row_num, other.row_num, *other_column, 1)) - return false; - } - - return true; - } - }; -} - -class SummingSortedTransform : public IMergingTransform -{ -public: - - SummingSortedTransform( - size_t num_inputs, const Block & header, - SortDescription description_, - /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. - const Names & column_names_to_sum, - size_t max_block_size); - - /// Stores numbers of key-columns and value-columns. - struct MapDescription - { - std::vector key_col_nums; - std::vector val_col_nums; - }; - - struct ColumnsDefinition - { - /// Columns with which values should be summed. - ColumnNumbers column_numbers_not_to_aggregate; - /// Columns which should be aggregated. - std::vector columns_to_aggregate; - /// Mapping for nested columns. - std::vector maps_to_sum; - - size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } - }; - - String getName() const override { return "SummingSortedTransform"; } - void work() override; - -protected: - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; - -private: - Row current_row; - bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. - - ColumnsDefinition columns_definition; - detail::SummingMergedData merged_data; - - SortDescription description; - - /// Chunks currently being merged. - std::vector source_chunks; - SortCursorImpls cursors; - - /// In merging algorithm, we need to compare current sort key with the last one. - /// So, sorting columns for last row needed to be stored. - /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). - Chunk last_chunk; - ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. - - detail::RowRef last_key; - - SortingHeap queue; - bool is_queue_initialized = false; - - void merge(); - void updateCursor(Chunk chunk, size_t source_num); - void addRow(SortCursor & cursor); - void insertCurrentRowIfNeeded(); }; } diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h index 722bd30feca..0dbdf8e2a40 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h @@ -15,7 +15,7 @@ namespace DB { -class VersionedCollapsingTransform : public IMergingTransform +class VersionedCollapsingTransform final : public IMergingTransform { public: /// Don't need version column. It's in primary key. @@ -53,7 +53,7 @@ private: SortingHeap queue; bool is_queue_initialized = false; - using RowRef = detail::RowRef; + using RowRef = detail::RowRefWithOwnedChunk; const size_t max_rows_in_queue; /// Rows with the same primary key and sign. FixedSizeDequeWithGaps current_keys; From 5b83ca2fb69ae5a5ad8cb00f6c9e839e09fad506 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 1 Apr 2020 21:06:55 +0300 Subject: [PATCH 039/752] Fix build. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 23524dfc395..1c94bca4e3a 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -60,6 +60,8 @@ namespace def.columns_to_aggregate.emplace_back(i); } } + + return def; } } From 9d6c88c78e7fcd8c650b0b7f1461ee5c7649806c Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 2 Apr 2020 00:27:21 +0300 Subject: [PATCH 040/752] SplitTokenExtractor::next unit-tests --- .../tests/gtest_SplitTokenExtractor.cpp | 168 ++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp index e69de29bb2d..6be6650369e 100644 --- a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace +{ +using namespace DB; +} + +struct SplitTokenExtractorTestCase +{ + const char * description; + const std::string source; + const std::vector tokens; +}; + +std::ostream & operator<<(std::ostream & ostr, const SplitTokenExtractorTestCase & test_case) +{ + return ostr << test_case.description; +} + +class SplitTokenExtractorTest : public ::testing::TestWithParam +{ +public: + void SetUp() override + { + const auto & param = GetParam(); + const auto & source = param.source; + data = std::make_unique>(source.data(), source.data() + source.size()); + } + + std::unique_ptr> data; +}; + +TEST_P(SplitTokenExtractorTest, next) +{ + const auto & param = GetParam(); + + SplitTokenExtractor token_extractor; + + size_t i = 0; + + size_t pos = 0; + size_t token_start = 0; + size_t token_len = 0; + + for (const auto & expected_token : param.tokens) + { + SCOPED_TRACE(++i); + EXPECT_TRUE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)); + EXPECT_EQ(expected_token, param.source.substr(token_start, token_len)) + << " token_start:" << token_start << " token_len: " << token_len; + } +} + +#define BINARY_STRING(str) std::string{str, sizeof(str)-1} + +INSTANTIATE_TEST_SUITE_P(ShortSingleToken, + SplitTokenExtractorTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Empty input sequence produces no tokens.", + "", + {} + }, + { + "Short single token", + "foo", + {"foo"} + }, + { + "Short single token surruonded by whitespace", + "\t\vfoo\n\r", + {"foo"} + } + }) +); + +INSTANTIATE_TEST_SUITE_P(UTF8, + SplitTokenExtractorTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Single token with mixed ASCII and UTF-8 chars", + BINARY_STRING("abc\u0442" "123\u0447XYZ\u043A"), + {"abc\u0442" "123\u0447XYZ\u043A"} + }, + { + "Multiple UTF-8 tokens", + BINARY_STRING("\u043F\u0440\u0438\u0432\u0435\u0442, u043C\u0438\u0440!"), + {"\u043F\u0440\u0438\u0432\u0435\u0442", "u043C\u0438\u0440"} + }, + }) +); + +INSTANTIATE_TEST_SUITE_P(MultipleTokens, + SplitTokenExtractorTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Multiple tokens separated by whitespace", + BINARY_STRING("\nabc 123\tXYZ\r"), + { + "abc", "123", "XYZ" + } + }, + { + "Multiple tokens separated by non-printable chars", + BINARY_STRING("\0abc\1" "123\2XYZ\4"), + { + "abc", "123", "XYZ" + } + }, + { + "ASCII table is split into numeric, upper case and lower case letters", + + BINARY_STRING("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16" + "\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNO" + "PQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c" + "\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1" + "\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6" + "\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb" + "\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0" + "\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"), + { + "0123456789", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz" + } + } + }) +); + + +INSTANTIATE_TEST_SUITE_P(SIMD_Cases, + SplitTokenExtractorTest, + ::testing::ValuesIn(std::initializer_list{ + { + "First 16 bytes are empty, then a shor token", + " abcdef", + {"abcdef"} + }, + { + "Token crosses bounday of 16-byte chunk", + " abcdef", + {"abcdef"} + }, + { + "Token ends at the end of 16-byte chunk", + " abcdef", + {"abcdef"} + }, + { + "Token crosses bondaries of multiple 16-byte chunks", + "abcdefghijklmnopqrstuvwxyz", + {"abcdefghijklmnopqrstuvwxyz"} + }, + }) +); From 90cb6a25cf1d10d6e45d83112dcdc6fc713415c6 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 2 Apr 2020 00:28:02 +0300 Subject: [PATCH 041/752] Fixed compilation issues and fixed several bugs in SplitTokenExtractor::next * Handling all characters above 0x80 as symbols (fixes UTF8 tokens) * Properly handling tokens that end exactly on haystack boundary. --- .../MergeTree/MergeTreeIndexFullText.cpp | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 5e4bf15418c..68d67a0c787 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -19,6 +19,10 @@ #include +#include +#include +#include + namespace DB { @@ -609,16 +613,17 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size while (*pos < len) { -#if __SSE2__ +#if defined(__SSE2__) // NOTE: we assume that `data` string is padded from the right with 15 zero-bytes. const __m128i haystack = _mm_loadu_si128(reinterpret_cast(data + *pos)); const size_t haystack_length = 16; -#if __SSE4_2__ +#if defined(__SSE4_2__) // With the help of https://www.strchr.com/strcmp_and_strlen_using_sse_4.2 - static const auto alnum_chars_ranges = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 'Z', 'A', 'z', 'a', '9', '0'); + static const auto alnum_chars_ranges = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + '\xFF', '\x80', 'z', 'a', 'Z', 'A', '9', '0'); // Every bit represents if `haystack` character is in the ranges (1) or not(0) - const auto result_bitmask = _mm_cvtsi128_si32(_mm_cmpestrm(alnum_chars_ranges, 6, haystack, haystack_length, _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS)); + const int result_bitmask = _mm_cvtsi128_si32(_mm_cmpestrm(alnum_chars_ranges, 8, haystack, haystack_length, _SIDD_CMP_RANGES)); #else // NOTE: -1 and +1 required since SSE2 has no `>=` and `<=` instructions on packed 8-bit integers (epi8). static const auto number_begin = _mm_set1_epi8('0' - 1); @@ -627,13 +632,16 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size static const auto alpha_lower_end = _mm_set1_epi8('z' + 1); static const auto alpha_upper_begin = _mm_set1_epi8('A' - 1); static const auto alpha_upper_end = _mm_set1_epi8('Z' + 1); + static const auto zero = _mm_set1_epi8(0); // every bit represents if `haystack` character `c` statisfies condition: - // (c > '0' - 1 && c < '9' + 1) || (c > 'a' - 1 && c < 'z' + 1) || (c > 'A' - 1 && c < 'Z' + 1) - const int result_bitmask = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128( - _mm_and_si128(_mm_cmpgt_epi8(haystack, number_begin), _mm_cmplt_epi8(haystack, number_end)), - _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_lower_begin), _mm_cmplt_epi8(haystack, alpha_lower_end))), - _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_upper_begin), _mm_cmplt_epi8(haystack, alpha_upper_end)))); + // (c < 0) || (c > '0' - 1 && c < '9' + 1) || (c > 'a' - 1 && c < 'z' + 1) || (c > 'A' - 1 && c < 'Z' + 1) + // < 0 since _mm_cmplt_epi8 threats chars as SIGNED, and hence all chars > 0x80 are negative. + const int result_bitmask = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(_mm_or_si128( + _mm_cmplt_epi8(haystack, zero), + _mm_and_si128(_mm_cmpgt_epi8(haystack, number_begin), _mm_cmplt_epi8(haystack, number_end))), + _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_lower_begin), _mm_cmplt_epi8(haystack, alpha_lower_end))), + _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_upper_begin), _mm_cmplt_epi8(haystack, alpha_upper_end)))); #endif // NOTE: __builtin_ctz family explicitly state that result is UNDEFINED if argument is 0 if (result_bitmask == 0) @@ -649,12 +657,15 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size const auto start = getTrailingZeroBits(result_bitmask); if (*token_len == 0) *token_start = *pos + start; + else if (start != 0) + // token is not continued in this haystack + return true; const auto l = getTrailingZeroBits(~(result_bitmask >> start)); *token_len += l; *pos += start + l; - if (start + l == 16) + if (start + l == haystack_length) // check if there are leftovers in next `haystack` continue; From f17fd7969ca01550fe1932aaaacb36c6255074c4 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 2 Apr 2020 00:32:12 +0300 Subject: [PATCH 042/752] Minor: excluded superflous includes --- dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp index 6be6650369e..2cd20a70821 100644 --- a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -10,10 +10,6 @@ #include #include -#include -#include -#include - namespace { using namespace DB; From 1176b5777e5b43473bd98b341ac798d89774a39b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 2 Apr 2020 00:57:00 +0300 Subject: [PATCH 043/752] added settings parsing and initialisation to DictionaryStructure --- dbms/src/Dictionaries/DictionaryStructure.cpp | 32 + dbms/src/Dictionaries/DictionaryStructure.h | 7 + dbms/src/Dictionaries/DirectDictionary.cpp | 582 ------------------ dbms/src/Dictionaries/DirectDictionary.h | 294 --------- dbms/src/Dictionaries/DirectDictionary.inc.h | 406 ------------ 5 files changed, 39 insertions(+), 1282 deletions(-) delete mode 100644 dbms/src/Dictionaries/DirectDictionary.cpp delete mode 100644 dbms/src/Dictionaries/DirectDictionary.h delete mode 100644 dbms/src/Dictionaries/DirectDictionary.inc.h diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index f8b8fbd6aab..132b9effcb7 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int TYPE_MISMATCH; extern const int BAD_ARGUMENTS; + extern const int NO_ELEMENTS_IN_CONFIG; } namespace @@ -193,6 +195,10 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration } attributes = getAttributes(config, config_prefix); + + settings = Settings(); + getSettings(config, config_prefix, settings); + if (attributes.empty()) throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS}; } @@ -354,4 +360,30 @@ std::vector DictionaryStructure::getAttributes( return res_attributes; } + +void DictionaryStructure::getSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Settings & settings) +{ + Poco::Util::AbstractConfiguration::Keys config_elems; + config.keys(config_prefix, config_elems); + + + for (const auto & config_elem : config_elems) + { + if (startsWith(config_elem, "settings")) + { + /* i won't do break after this if in case there can be multiple settings sections */ + + const auto prefix = config_prefix + '.' + config_elem; + Poco::Util::AbstractConfiguration::Keys setting_keys; + config.keys(prefix, setting_keys); + settings.loadSettingsFromConfig(prefix, config); + + } + } + +} + } diff --git a/dbms/src/Dictionaries/DictionaryStructure.h b/dbms/src/Dictionaries/DictionaryStructure.h index 2893dea2e4f..9a7428959a6 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.h +++ b/dbms/src/Dictionaries/DictionaryStructure.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,7 @@ struct DictionaryStructure final std::optional id; std::optional> key; std::vector attributes; + Settings settings; std::optional range_min; std::optional range_max; bool has_expressions = false; @@ -118,6 +120,11 @@ private: const std::string & config_prefix, const bool hierarchy_allowed = true, const bool allow_null_values = true); + + void getSettings( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Settings & settings); }; } diff --git a/dbms/src/Dictionaries/DirectDictionary.cpp b/dbms/src/Dictionaries/DirectDictionary.cpp deleted file mode 100644 index 9e0a77ebc91..00000000000 --- a/dbms/src/Dictionaries/DirectDictionary.cpp +++ /dev/null @@ -1,582 +0,0 @@ -#include "DirectDictionary.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "DirectDictionary.inc.h" -#include "DictionaryBlockInputStream.h" -#include "DictionaryFactory.h" - - -/* - * - * TODO: CHANGE EVENTS TO DIRECT DICTIONARY EVENTS (WTF? WHERE R THEY DECLARED????) - * -*/ - -namespace ProfileEvents -{ - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; -} - -namespace CurrentMetrics -{ - extern const Metric DictCacheRequests; -} - - -namespace DB -{ - namespace ErrorCodes - { - extern const int TYPE_MISMATCH; - extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; - extern const int LOGICAL_ERROR; - extern const int TOO_SMALL_BUFFER_SIZE; - } - - /* - * deleted inline size_t DirectDictionary::getCellIdx(const Key id) const - * - */ - - - DirectDictionary::DirectDictionary( - const std::string & name_, - const DictionaryStructure & dict_struct_, - DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_ - ) - : name{name_} - , dict_struct(dict_struct_) - , source_ptr{std::move(source_ptr_)} - , dict_lifetime(dict_lifetime_) - , log(&Logger::get("ExternalDictionaries")) - , rnd_engine(randomSeed()) - { - if (!this->source_ptr->supportsSelectiveLoad()) - throw Exception{name + ": source cannot be used with DirectDictionary", ErrorCodes::UNSUPPORTED_METHOD}; - - createAttributes(); - } - - - void DirectDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray & out) const - { - const auto null_value = std::get(hierarchical_attribute->null_values); - - getItemsNumberImpl(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; }); - } - - -/// Allow to use single value in same way as array. - static inline DirectDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) - { - return arr[idx]; - } - static inline DirectDictionary::Key getAt(const DirectDictionary::Key & value, const size_t) - { - return value; - } - - - template - void DirectDictionary::isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const - { - /// Transform all children to parents until ancestor id or null_value will be reached. - - size_t out_size = out.size(); - memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated" - - const auto null_value = std::get(hierarchical_attribute->null_values); - - PaddedPODArray children(out_size, 0); - PaddedPODArray parents(child_ids.begin(), child_ids.end()); - - while (true) - { - size_t out_idx = 0; - size_t parents_idx = 0; - size_t new_children_idx = 0; - - while (out_idx < out_size) - { - /// Already calculated - if (out[out_idx] != 0xFF) - { - ++out_idx; - continue; - } - - /// No parent - if (parents[parents_idx] == null_value) - { - out[out_idx] = 0; - } - /// Found ancestor - else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx)) - { - out[out_idx] = 1; - } - /// Loop detected - else if (children[new_children_idx] == parents[parents_idx]) - { - out[out_idx] = 1; - } - /// Found intermediate parent, add this value to search at next loop iteration - else - { - children[new_children_idx] = parents[parents_idx]; - ++new_children_idx; - } - - ++out_idx; - ++parents_idx; - } - - if (new_children_idx == 0) - break; - - /// Transform all children to its parents. - children.resize(new_children_idx); - parents.resize(new_children_idx); - - toParent(children, parents); - } - } - - void DirectDictionary::isInVectorVector( - const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const - { - isInImpl(child_ids, ancestor_ids, out); - } - - void DirectDictionary::isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const - { - isInImpl(child_ids, ancestor_id, out); - } - - void DirectDictionary::isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const - { - /// Special case with single child value. - - const auto null_value = std::get(hierarchical_attribute->null_values); - - PaddedPODArray child(1, child_id); - PaddedPODArray parent(1); - std::vector ancestors(1, child_id); - - /// Iteratively find all ancestors for child. - while (true) - { - toParent(child, parent); - - if (parent[0] == null_value) - break; - - child[0] = parent[0]; - ancestors.push_back(parent[0]); - } - - /// Assuming short hierarchy, so linear search is Ok. - for (size_t i = 0, out_size = out.size(); i < out_size; ++i) - out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end(); - } - - void DirectDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const - { - auto & attribute = getAttribute(attribute_name); - checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); - - const auto null_value = StringRef{std::get(attribute.null_values)}; - - getItemsString(attribute, ids, out, [&](const size_t) { return null_value; }); - } - - void DirectDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const - { - auto & attribute = getAttribute(attribute_name); - checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); - - getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); - } - - void DirectDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const - { - auto & attribute = getAttribute(attribute_name); - checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); - - getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); - } - - -/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag -/// true false found and valid -/// false true not found (something outdated, maybe our cell) -/// false false not found (other id stored with valid data) -/// true true impossible -/// -/// todo: split this func to two: find_for_get and find_for_set - DirectDictionary::FindResult DirectDictionary::findCellIdx(const Key & id, const CellMetadata::time_point_t now) const - { - auto pos = getCellIdx(id); - auto oldest_id = pos; - auto oldest_time = CellMetadata::time_point_t::max(); - const auto stop = pos + max_collision_length; - for (; pos < stop; ++pos) - { - const auto cell_idx = pos & size_overlap_mask; - const auto & cell = cells[cell_idx]; - - if (cell.id != id) - { - /// maybe we already found nearest expired cell (try minimize collision_length on insert) - if (oldest_time > now && oldest_time > cell.expiresAt()) - { - oldest_time = cell.expiresAt(); - oldest_id = cell_idx; - } - continue; - } - - if (cell.expiresAt() < now) - { - return {cell_idx, false, true}; - } - - return {cell_idx, true, false}; - } - - return {oldest_id, false, false}; - } - - - /* - * deleted most part of has, that stood for - * looking for a key in cache - * - * TODO: check whether we need last two arguments - * in update function (seems like no) - * - */ - - void DirectDictionary::has(const PaddedPODArray & ids, PaddedPODArray & out) const - { - std::vector required_ids(ids.size()); - std::copy(std::begin(ids), std::end(ids), std::begin(required_ids)); - - /// request new values - update( - required_ids, - [&](const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = true; - }, - [&](const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = false; - }); - } - - - void DirectDictionary::createAttributes() - { - const auto attributes_size = dict_struct.attributes.size(); - attributes.reserve(attributes_size); - - bytes_allocated += size * sizeof(CellMetadata); - bytes_allocated += attributes_size * sizeof(attributes.front()); - - for (const auto & attribute : dict_struct.attributes) - { - attribute_index_by_name.emplace(attribute.name, attributes.size()); - attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); - - if (attribute.hierarchical) - { - hierarchical_attribute = &attributes.back(); - - if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64) - throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH}; - } - } - } - - DirectDictionary::Attribute DirectDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) - { - Attribute attr{type, {}, {}}; - - switch (type) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ - bytes_allocated += size * sizeof(TYPE); \ - break; - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - case AttributeUnderlyingType::utString: - attr.null_values = null_value.get(); - attr.arrays = std::make_unique>(size); - bytes_allocated += size * sizeof(StringRef); - if (!string_arena) - string_arena = std::make_unique(); - break; - } - - return attr; - } - - void DirectDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const - { - switch (attribute.type) - { - case AttributeUnderlyingType::utUInt8: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utUInt16: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utUInt32: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utUInt64: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utUInt128: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utInt8: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utInt16: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utInt32: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utInt64: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utFloat32: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utFloat64: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - - case AttributeUnderlyingType::utDecimal32: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utDecimal64: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - case AttributeUnderlyingType::utDecimal128: - std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); - break; - - case AttributeUnderlyingType::utString: - { - const auto & null_value_ref = std::get(attribute.null_values); - auto & string_ref = std::get>(attribute.arrays)[idx]; - - if (string_ref.data != null_value_ref.data()) - { - if (string_ref.data) - string_arena->free(const_cast(string_ref.data), string_ref.size); - - string_ref = StringRef{null_value_ref}; - } - - break; - } - } - } - - void DirectDictionary::setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const - { - switch (attribute.type) - { - case AttributeUnderlyingType::utUInt8: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utUInt16: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utUInt32: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utUInt64: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utUInt128: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utInt8: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utInt16: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utInt32: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utInt64: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utFloat32: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utFloat64: - std::get>(attribute.arrays)[idx] = value.get(); - break; - - case AttributeUnderlyingType::utDecimal32: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utDecimal64: - std::get>(attribute.arrays)[idx] = value.get(); - break; - case AttributeUnderlyingType::utDecimal128: - std::get>(attribute.arrays)[idx] = value.get(); - break; - - case AttributeUnderlyingType::utString: - { - const auto & string = value.get(); - auto & string_ref = std::get>(attribute.arrays)[idx]; - const auto & null_value_ref = std::get(attribute.null_values); - - /// free memory unless it points to a null_value - if (string_ref.data && string_ref.data != null_value_ref.data()) - string_arena->free(const_cast(string_ref.data), string_ref.size); - - const auto str_size = string.size(); - if (str_size != 0) - { - auto string_ptr = string_arena->alloc(str_size + 1); - std::copy(string.data(), string.data() + str_size + 1, string_ptr); - string_ref = StringRef{string_ptr, str_size}; - } - else - string_ref = {}; - - break; - } - } - } - - DirectDictionary::Attribute & DirectDictionary::getAttribute(const std::string & attribute_name) const - { - const auto it = attribute_index_by_name.find(attribute_name); - if (it == std::end(attribute_index_by_name)) - throw Exception{name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS}; - - return attributes[it->second]; - } - - /* - * I've deleted: - * bool CacheDictionary::isEmptyCell(const UInt64 idx) const - * and - * PaddedPODArray CacheDictionary::getCachedIds() const - */ - - BlockInputStreamPtr DirectDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const - { - using BlockInputStreamType = DictionaryBlockInputStream; - - /* deleted pre-last argument getCachedIds() from this return (will something break then?) */ - return std::make_shared(shared_from_this(), max_block_size, column_names); - } - - std::exception_ptr DirectDictionary::getLastException() const - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - return last_exception; - } - - void registerDictionaryDirect(DictionaryFactory & factory) - { - auto create_layout = [=](const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr) -> DictionaryPtr - { - if (dict_struct.key) - throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; - - if (dict_struct.range_min || dict_struct.range_max) - throw Exception{name - + ": elements .structure.range_min and .structure.range_max should be defined only " - "for a dictionary of layout 'range_hashed'", - ErrorCodes::BAD_ARGUMENTS}; - const auto & layout_prefix = config_prefix + ".layout"; - - /* - * - * seems like this stands only for cache dictionaries - * - const auto size = config.getInt(layout_prefix + ".cache.size_in_cells"); - if (size == 0) - throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; - - */ - - const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - if (require_nonempty) - throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; - - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - - /* deleted last argument (size) in this return */ - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); - }; - factory.registerLayout("direct", create_layout, false); - } - - -} diff --git a/dbms/src/Dictionaries/DirectDictionary.h b/dbms/src/Dictionaries/DirectDictionary.h deleted file mode 100644 index 1431adccf02..00000000000 --- a/dbms/src/Dictionaries/DirectDictionary.h +++ /dev/null @@ -1,294 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "DictionaryStructure.h" -#include "IDictionary.h" -#include "IDictionarySource.h" - - -namespace DB -{ - class DirectDictionary final : public IDictionary - { - public: - /* Removed last argument (size_) */ - DirectDictionary( - const std::string & name_, - const DictionaryStructure & dict_struct_, - DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_ - ); - - std::string getName() const override { return name; } - - std::string getTypeName() const override { return "Cache"; } - - size_t getBytesAllocated() const override { return bytes_allocated + (string_arena ? string_arena->size() : 0); } - - size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } - - double getHitRate() const override - { - return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); - } - - size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - - double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } - - bool isCached() const override { return false; } - - std::shared_ptr clone() const override - { - return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, size); - } - - const IDictionarySource * getSource() const override { return source_ptr.get(); } - - const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } - - const DictionaryStructure & getStructure() const override { return dict_struct; } - - bool isInjective(const std::string & attribute_name) const override - { - return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; - } - - bool hasHierarchy() const override { return hierarchical_attribute; } - - void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; - - void isInVectorVector( - const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; - void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; - void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; - - std::exception_ptr getLastException() const override; - - template - using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; - -#define DECLARE(TYPE) \ - void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; - DECLARE(UInt8) - DECLARE(UInt16) - DECLARE(UInt32) - DECLARE(UInt64) - DECLARE(UInt128) - DECLARE(Int8) - DECLARE(Int16) - DECLARE(Int32) - DECLARE(Int64) - DECLARE(Float32) - DECLARE(Float64) - DECLARE(Decimal32) - DECLARE(Decimal64) - DECLARE(Decimal128) -#undef DECLARE - - void getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const; - -#define DECLARE(TYPE) \ - void get##TYPE( \ - const std::string & attribute_name, \ - const PaddedPODArray & ids, \ - const PaddedPODArray & def, \ - ResultArrayType & out) const; - DECLARE(UInt8) - DECLARE(UInt16) - DECLARE(UInt32) - DECLARE(UInt64) - DECLARE(UInt128) - DECLARE(Int8) - DECLARE(Int16) - DECLARE(Int32) - DECLARE(Int64) - DECLARE(Float32) - DECLARE(Float64) - DECLARE(Decimal32) - DECLARE(Decimal64) - DECLARE(Decimal128) -#undef DECLARE - - void - getString(const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) - const; - -#define DECLARE(TYPE) \ - void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; - DECLARE(UInt8) - DECLARE(UInt16) - DECLARE(UInt32) - DECLARE(UInt64) - DECLARE(UInt128) - DECLARE(Int8) - DECLARE(Int16) - DECLARE(Int32) - DECLARE(Int64) - DECLARE(Float32) - DECLARE(Float64) - DECLARE(Decimal32) - DECLARE(Decimal64) - DECLARE(Decimal128) -#undef DECLARE - - void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; - - void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; - - BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; - - private: - template - using ContainerType = Value[]; - template - using ContainerPtrType = std::unique_ptr>; - - struct CellMetadata final - { - using time_point_t = std::chrono::system_clock::time_point; - using time_point_rep_t = time_point_t::rep; - using time_point_urep_t = std::make_unsigned_t; - - static constexpr UInt64 EXPIRES_AT_MASK = std::numeric_limits::max(); - static constexpr UInt64 IS_DEFAULT_MASK = ~EXPIRES_AT_MASK; - - UInt64 id; - /// Stores both expiration time and `is_default` flag in the most significant bit - time_point_urep_t data; - - /// Sets expiration time, resets `is_default` flag to false - time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } - void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } - - bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } - void setDefault() { data |= IS_DEFAULT_MASK; } - }; - - struct Attribute final - { - AttributeUnderlyingType type; - std::variant< - UInt8, - UInt16, - UInt32, - UInt64, - UInt128, - Int8, - Int16, - Int32, - Int64, - Decimal32, - Decimal64, - Decimal128, - Float32, - Float64, - String> - null_values; - std::variant< - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType> - arrays; - }; - - void createAttributes(); - - Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); - - template - void getItemsNumberImpl( - Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; - - template - void getItemsString(Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; - - template - void update(const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; - - PaddedPODArray getCachedIds() const; - - bool isEmptyCell(const UInt64 idx) const; - - size_t getCellIdx(const Key id) const; - - void setDefaultAttributeValue(Attribute & attribute, const Key idx) const; - - void setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const; - - Attribute & getAttribute(const std::string & attribute_name) const; - - struct FindResult - { - const size_t cell_idx; - const bool valid; - const bool outdated; - }; - - FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; - - template - void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; - - const std::string name; - const DictionaryStructure dict_struct; - mutable DictionarySourcePtr source_ptr; - const DictionaryLifetime dict_lifetime; - Logger * const log; - - mutable std::shared_mutex rw_lock; - - /// Actual size will be increased to match power of 2 - const size_t size; - - /// all bits to 1 mask (size - 1) (0b1000 - 1 = 0b111) - const size_t size_overlap_mask; - - /// Max tries to find cell, overlaped with mask: if size = 16 and start_cell=10: will try cells: 10,11,12,13,14,15,0,1,2,3 - static constexpr size_t max_collision_length = 10; - - const size_t zero_cell_idx{getCellIdx(0)}; - std::map attribute_index_by_name; - mutable std::vector attributes; - mutable std::vector cells; - Attribute * hierarchical_attribute = nullptr; - std::unique_ptr string_arena; - - mutable std::exception_ptr last_exception; - mutable size_t error_count = 0; - mutable std::chrono::system_clock::time_point backoff_end_time; - - mutable pcg64 rnd_engine; - - mutable size_t bytes_allocated = 0; - mutable std::atomic element_count{0}; - mutable std::atomic hit_count{0}; - mutable std::atomic query_count{0}; - }; - -} diff --git a/dbms/src/Dictionaries/DirectDictionary.inc.h b/dbms/src/Dictionaries/DirectDictionary.inc.h deleted file mode 100644 index 68010b0fe19..00000000000 --- a/dbms/src/Dictionaries/DirectDictionary.inc.h +++ /dev/null @@ -1,406 +0,0 @@ -#include "CacheDictionary.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; -} - -namespace CurrentMetrics -{ - extern const Metric DictCacheRequests; -} - -namespace DB -{ - namespace ErrorCodes - { - extern const int TYPE_MISMATCH; - } - - template - void CacheDictionary::getItemsNumberImpl( - Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const - { - /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } - std::unordered_map> outdated_ids; - auto & attribute_array = std::get>(attribute.arrays); - const auto rows = ext::size(ids); - - size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; - - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - - const auto now = std::chrono::system_clock::now(); - /// fetch up-to-date values, decide which ones require update - for (const auto row : ext::range(0, rows)) - { - const auto id = ids[row]; - - /** cell should be updated if either: - * 1. ids do not match, - * 2. cell has expired, - * 3. explicit defaults were specified and cell was set default. */ - - const auto find_result = findCellIdx(id, now); - if (!find_result.valid) - { - outdated_ids[id].push_back(row); - if (find_result.outdated) - ++cache_expired; - else - ++cache_not_found; - } - else - { - ++cache_hit; - const auto & cell_idx = find_result.cell_idx; - const auto & cell = cells[cell_idx]; - out[row] = cell.isDefault() ? get_default(row) : static_cast(attribute_array[cell_idx]); - } - } - } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - - query_count.fetch_add(rows, std::memory_order_relaxed); - hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); - - if (outdated_ids.empty()) - return; - - std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); - - /// request new values - update( - required_ids, - [&](const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; - - for (const size_t row : outdated_ids[id]) - out[row] = static_cast(attribute_value); - }, - [&](const auto id, const auto) - { - for (const size_t row : outdated_ids[id]) - out[row] = get_default(row); - }); - } - - template - void CacheDictionary::getItemsString( - Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const - { - const auto rows = ext::size(ids); - - /// save on some allocations - out->getOffsets().reserve(rows); - - auto & attribute_array = std::get>(attribute.arrays); - - auto found_outdated_values = false; - - /// perform optimistic version, fallback to pessimistic if failed - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - - const auto now = std::chrono::system_clock::now(); - /// fetch up-to-date values, discard on fail - for (const auto row : ext::range(0, rows)) - { - const auto id = ids[row]; - - const auto find_result = findCellIdx(id, now); - if (!find_result.valid) - { - found_outdated_values = true; - break; - } - else - { - const auto & cell_idx = find_result.cell_idx; - const auto & cell = cells[cell_idx]; - const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; - out->insertData(string_ref.data, string_ref.size); - } - } - } - - /// optimistic code completed successfully - if (!found_outdated_values) - { - query_count.fetch_add(rows, std::memory_order_relaxed); - hit_count.fetch_add(rows, std::memory_order_release); - return; - } - - /// now onto the pessimistic one, discard possible partial results from the optimistic path - out->getChars().resize_assume_reserved(0); - out->getOffsets().resize_assume_reserved(0); - - /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } - std::unordered_map> outdated_ids; - /// we are going to store every string separately - std::unordered_map map; - - size_t total_length = 0; - size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0; - { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - - const auto now = std::chrono::system_clock::now(); - for (const auto row : ext::range(0, ids.size())) - { - const auto id = ids[row]; - - const auto find_result = findCellIdx(id, now); - if (!find_result.valid) - { - outdated_ids[id].push_back(row); - if (find_result.outdated) - ++cache_expired; - else - ++cache_not_found; - } - else - { - ++cache_hit; - const auto & cell_idx = find_result.cell_idx; - const auto & cell = cells[cell_idx]; - const auto string_ref = cell.isDefault() ? get_default(row) : attribute_array[cell_idx]; - - if (!cell.isDefault()) - map[id] = String{string_ref}; - - total_length += string_ref.size + 1; - } - } - } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired); - ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found); - ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit); - - query_count.fetch_add(rows, std::memory_order_relaxed); - hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release); - - /// request new values - if (!outdated_ids.empty()) - { - std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); - - update( - required_ids, - [&](const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; - - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * outdated_ids[id].size(); - }, - [&](const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - total_length += get_default(row).size + 1; - }); - } - - out->getChars().reserve(total_length); - - for (const auto row : ext::range(0, ext::size(ids))) - { - const auto id = ids[row]; - const auto it = map.find(id); - - const auto string_ref = it != std::end(map) ? StringRef{it->second} : get_default(row); - out->insertData(string_ref.data, string_ref.size); - } - } - - template - void CacheDictionary::update( - const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const - { - CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); - - std::unordered_map remaining_ids{requested_ids.size()}; - for (const auto id : requested_ids) - remaining_ids.insert({id, 0}); - - const auto now = std::chrono::system_clock::now(); - - const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; - - if (now > backoff_end_time) - { - try - { - if (error_count) - { - /// Recover after error: we have to clone the source here because - /// it could keep connections which should be reset after error. - source_ptr = source_ptr->clone(); - } - - Stopwatch watch; - auto stream = source_ptr->loadIds(requested_ids); - stream->readPrefix(); - - while (const auto block = stream->read()) - { - const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); - if (!id_column) - throw Exception{name + ": id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; - - const auto & ids = id_column->getData(); - - /// cache column pointers - const auto column_ptrs = ext::map( - ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); - - for (const auto i : ext::range(0, ids.size())) - { - const auto id = ids[i]; - - const auto find_result = findCellIdx(id, now); - const auto & cell_idx = find_result.cell_idx; - - auto & cell = cells[cell_idx]; - - for (const auto attribute_idx : ext::range(0, attributes.size())) - { - const auto & attribute_column = *column_ptrs[attribute_idx]; - auto & attribute = attributes[attribute_idx]; - - setAttributeValue(attribute, cell_idx, attribute_column[i]); - } - - /// if cell id is zero and zero does not map to this cell, then the cell is unused - if (cell.id == 0 && cell_idx != zero_cell_idx) - element_count.fetch_add(1, std::memory_order_relaxed); - - cell.id = id; - if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) - { - std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; - cell.setExpiresAt(now + std::chrono::seconds{distribution(rnd_engine)}); - } - else - cell.setExpiresAt(std::chrono::time_point::max()); - - /// inform caller - on_cell_updated(id, cell_idx); - /// mark corresponding id as found - remaining_ids[id] = 1; - } - } - - stream->readSuffix(); - - error_count = 0; - last_exception = std::exception_ptr{}; - backoff_end_time = std::chrono::system_clock::time_point{}; - - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); - } - catch (...) - { - ++error_count; - last_exception = std::current_exception(); - backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); - - tryLogException(last_exception, log, "Could not update cache dictionary '" + getName() + - "', next update is scheduled at " + DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(backoff_end_time))); - } - } - - size_t not_found_num = 0, found_num = 0; - - /// Check which ids have not been found and require setting null_value - for (const auto & id_found_pair : remaining_ids) - { - if (id_found_pair.second) - { - ++found_num; - continue; - } - ++not_found_num; - - const auto id = id_found_pair.first; - - const auto find_result = findCellIdx(id, now); - const auto & cell_idx = find_result.cell_idx; - auto & cell = cells[cell_idx]; - - if (error_count) - { - if (find_result.outdated) - { - /// We have expired data for that `id` so we can continue using it. - bool was_default = cell.isDefault(); - cell.setExpiresAt(backoff_end_time); - if (was_default) - cell.setDefault(); - if (was_default) - on_id_not_found(id, cell_idx); - else - on_cell_updated(id, cell_idx); - continue; - } - /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. - std::rethrow_exception(last_exception); - } - - /// Check if cell had not been occupied before and increment element counter if it hadn't - if (cell.id == 0 && cell_idx != zero_cell_idx) - element_count.fetch_add(1, std::memory_order_relaxed); - - cell.id = id; - - if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) - { - std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; - cell.setExpiresAt(now + std::chrono::seconds{distribution(rnd_engine)}); - } - else - cell.setExpiresAt(std::chrono::time_point::max()); - - /// Set null_value for each attribute - cell.setDefault(); - for (auto & attribute : attributes) - setDefaultAttributeValue(attribute, cell_idx); - - /// inform caller that the cell has not been found - on_id_not_found(id, cell_idx); - } - - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); - ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); - ProfileEvents::increment(ProfileEvents::DictCacheRequests); - } - -} From 5f561af792eac397c5561c8c2b5087d4520c29f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 2 Apr 2020 01:10:22 +0300 Subject: [PATCH 044/752] some fix for errors with shadowing name --- dbms/src/Dictionaries/DictionaryStructure.cpp | 4 ++-- dbms/src/Dictionaries/DictionaryStructure.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index 132b9effcb7..33196f627d8 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -364,7 +364,7 @@ std::vector DictionaryStructure::getAttributes( void DictionaryStructure::getSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Settings & settings) + Settings & dict_settings) { Poco::Util::AbstractConfiguration::Keys config_elems; config.keys(config_prefix, config_elems); @@ -379,7 +379,7 @@ void DictionaryStructure::getSettings( const auto prefix = config_prefix + '.' + config_elem; Poco::Util::AbstractConfiguration::Keys setting_keys; config.keys(prefix, setting_keys); - settings.loadSettingsFromConfig(prefix, config); + dict_settings.loadSettingsFromConfig(prefix, config); } } diff --git a/dbms/src/Dictionaries/DictionaryStructure.h b/dbms/src/Dictionaries/DictionaryStructure.h index 9a7428959a6..cd9d41f67ee 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.h +++ b/dbms/src/Dictionaries/DictionaryStructure.h @@ -124,7 +124,7 @@ private: void getSettings( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - Settings & settings); + Settings & dict_settings); }; } From 338c18ae3bb3d50695cd2b941286ea0e3aa647d7 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 2 Apr 2020 13:02:51 +0300 Subject: [PATCH 045/752] review fixes --- dbms/src/Interpreters/ActionsVisitor.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 30f3fc754b4..bb12664c8d5 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -200,14 +200,10 @@ static String getUniqueName(ActionsVisitor::Data & data, const String & prefix) auto & block = data.getSampleBlock(); auto result = prefix; - if (block.has(result)) + while (block.has(result)) { - do - { - result = prefix + "_" + toString(data.next_unique_suffix); - ++data.next_unique_suffix; - } - while (block.has(result)); + result = prefix + "_" + toString(data.next_unique_suffix); + ++data.next_unique_suffix; } return result; From 3cacef6b4478860639728b7d5cd64d16ea394bcc Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 2 Apr 2020 13:53:13 +0300 Subject: [PATCH 046/752] Fixed final garbage token Also, added getTrailingZeroBitsUnsafe(), updated comments and cleaned up test cases --- dbms/src/Common/BitHelpers.h | 15 +++++--- .../MergeTree/MergeTreeIndexFullText.cpp | 34 ++++++++++++------- .../tests/gtest_SplitTokenExtractor.cpp | 24 +++++++++---- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/dbms/src/Common/BitHelpers.h b/dbms/src/Common/BitHelpers.h index ba6a4c60a49..bc6d7413def 100644 --- a/dbms/src/Common/BitHelpers.h +++ b/dbms/src/Common/BitHelpers.h @@ -53,12 +53,10 @@ inline size_t getLeadingZeroBits(T x) } } +// Unsafe since __builtin_ctz()-family explicitly state that result is undefined on x == 0 template -inline size_t getTrailingZeroBits(T x) +inline size_t getTrailingZeroBitsUnsafe(T x) { - if (!x) - return sizeof(x) * 8; - if constexpr (sizeof(T) <= sizeof(unsigned int)) { return __builtin_ctz(x); @@ -73,6 +71,15 @@ inline size_t getTrailingZeroBits(T x) } } +template +inline size_t getTrailingZeroBits(T x) +{ + if (!x) + return sizeof(x) * 8; + + return getTrailingZeroBitsUnsafe(x); +} + /** Returns a mask that has '1' for `bits` LSB set: * maskLowBits(3) => 00000111 */ diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 68d67a0c787..af979010dc0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -614,7 +614,7 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size while (*pos < len) { #if defined(__SSE2__) - // NOTE: we assume that `data` string is padded from the right with 15 zero-bytes. + // NOTE: we assume that `data` string is padded from the right with 15 bytes. const __m128i haystack = _mm_loadu_si128(reinterpret_cast(data + *pos)); const size_t haystack_length = 16; @@ -632,40 +632,40 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size static const auto alpha_lower_end = _mm_set1_epi8('z' + 1); static const auto alpha_upper_begin = _mm_set1_epi8('A' - 1); static const auto alpha_upper_end = _mm_set1_epi8('Z' + 1); - static const auto zero = _mm_set1_epi8(0); + static const auto zero = _mm_set1_epi8(0); // every bit represents if `haystack` character `c` statisfies condition: // (c < 0) || (c > '0' - 1 && c < '9' + 1) || (c > 'a' - 1 && c < 'z' + 1) || (c > 'A' - 1 && c < 'Z' + 1) - // < 0 since _mm_cmplt_epi8 threats chars as SIGNED, and hence all chars > 0x80 are negative. + // < 0 since _mm_cmplt_epi8 threats chars as SIGNED, and so all chars > 0x80 are negative. const int result_bitmask = _mm_movemask_epi8(_mm_or_si128(_mm_or_si128(_mm_or_si128( _mm_cmplt_epi8(haystack, zero), _mm_and_si128(_mm_cmpgt_epi8(haystack, number_begin), _mm_cmplt_epi8(haystack, number_end))), _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_lower_begin), _mm_cmplt_epi8(haystack, alpha_lower_end))), _mm_and_si128(_mm_cmpgt_epi8(haystack, alpha_upper_begin), _mm_cmplt_epi8(haystack, alpha_upper_end)))); #endif - // NOTE: __builtin_ctz family explicitly state that result is UNDEFINED if argument is 0 if (result_bitmask == 0) { - // end of token started on previous haystack if (*token_len != 0) + // end of token started on previous haystack return true; *pos += haystack_length; continue; } - const auto start = getTrailingZeroBits(result_bitmask); + const auto token_start_pos_in_current_haystack = getTrailingZeroBitsUnsafe(result_bitmask); if (*token_len == 0) - *token_start = *pos + start; - else if (start != 0) - // token is not continued in this haystack + // new token + *token_start = *pos + token_start_pos_in_current_haystack; + else if (token_start_pos_in_current_haystack != 0) + // end of token starting in one of previous haystacks return true; - const auto l = getTrailingZeroBits(~(result_bitmask >> start)); - *token_len += l; + const auto token_bytes_in_current_haystack = getTrailingZeroBitsUnsafe(~(result_bitmask >> token_start_pos_in_current_haystack)); + *token_len += token_bytes_in_current_haystack; - *pos += start + l; - if (start + l == haystack_length) + *pos += token_start_pos_in_current_haystack + token_bytes_in_current_haystack; + if (token_start_pos_in_current_haystack + token_bytes_in_current_haystack == haystack_length) // check if there are leftovers in next `haystack` continue; @@ -686,6 +686,14 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size } #endif } + +#if defined(__SSE2__) + // Could happen only if string is not padded with zeroes, and we accidentally hopped over end of data. + if (*token_start > len) + return false; + *token_len = len - *token_start; +#endif + return *token_len > 0; } diff --git a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp index 2cd20a70821..b8686f962bc 100644 --- a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -55,13 +55,16 @@ TEST_P(SplitTokenExtractorTest, next) for (const auto & expected_token : param.tokens) { SCOPED_TRACE(++i); - EXPECT_TRUE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)); + ASSERT_TRUE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)); EXPECT_EQ(expected_token, param.source.substr(token_start, token_len)) << " token_start:" << token_start << " token_len: " << token_len; } + + ASSERT_FALSE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)); } -#define BINARY_STRING(str) std::string{str, sizeof(str)-1} +// Helper to allow strings with embedded '\0' chars. +#define BINARY_STRING(str) std::string{str, sizeof(str) - 1} INSTANTIATE_TEST_SUITE_P(ShortSingleToken, SplitTokenExtractorTest, @@ -89,13 +92,13 @@ INSTANTIATE_TEST_SUITE_P(UTF8, ::testing::ValuesIn(std::initializer_list{ { "Single token with mixed ASCII and UTF-8 chars", - BINARY_STRING("abc\u0442" "123\u0447XYZ\u043A"), + "abc\u0442" "123\u0447XYZ\u043A", {"abc\u0442" "123\u0447XYZ\u043A"} }, { "Multiple UTF-8 tokens", - BINARY_STRING("\u043F\u0440\u0438\u0432\u0435\u0442, u043C\u0438\u0440!"), - {"\u043F\u0440\u0438\u0432\u0435\u0442", "u043C\u0438\u0440"} + "\u043F\u0440\u0438\u0432\u0435\u0442, \u043C\u0438\u0440!", + {"\u043F\u0440\u0438\u0432\u0435\u0442", "\u043C\u0438\u0440"} }, }) ); @@ -105,7 +108,7 @@ INSTANTIATE_TEST_SUITE_P(MultipleTokens, ::testing::ValuesIn(std::initializer_list{ { "Multiple tokens separated by whitespace", - BINARY_STRING("\nabc 123\tXYZ\r"), + "\nabc 123\tXYZ\r", { "abc", "123", "XYZ" } @@ -130,7 +133,14 @@ INSTANTIATE_TEST_SUITE_P(MultipleTokens, "\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0" "\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"), { - "0123456789", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz" + "0123456789", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz", + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c" + "\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1" + "\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6" + "\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb" + "\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0" + "\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" } } }) From 20fc52f02be78b774927302a0139c73667d50ff3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Apr 2020 17:33:30 +0300 Subject: [PATCH 047/752] Added GraphiteRollupSortedTransform. --- .../Merges/GraphiteRollupSortedTransform.cpp | 385 ++++++++++++++++++ .../Merges/GraphiteRollupSortedTransform.h | 275 +++++++++++++ 2 files changed, 660 insertions(+) create mode 100644 dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp create mode 100644 dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h diff --git a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp new file mode 100644 index 00000000000..64654ecc73f --- /dev/null +++ b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp @@ -0,0 +1,385 @@ +#include + +namespace DB +{ + +static GraphiteRollupSortedTransform::ColumnsDefinition defineColumns( + const Block & header, const Graphite::Params & params) +{ + GraphiteRollupSortedTransform::ColumnsDefinition def; + + def.path_column_num = header.getPositionByName(params.path_column_name); + def.time_column_num = header.getPositionByName(params.time_column_name); + def.value_column_num = header.getPositionByName(params.value_column_name); + def.version_column_num = header.getPositionByName(params.version_column_name); + + size_t num_columns = header.columns(); + for (size_t i = 0; i < num_columns; ++i) + if (i != def.time_column_num && i != def.value_column_num && i != def.version_column_num) + def.unmodified_column_numbers.push_back(i); + + return def; +} + +GraphiteRollupSortedTransform::GraphiteRollupSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, size_t max_block_size, + Graphite::Params params_, time_t time_of_merge_) + : IMergingTransform(num_inputs, header, header, true) + , merged_data(header.cloneEmptyColumns(), false, max_block_size) + , description(std::move(description_)) + , source_chunks(num_inputs) + , cursors(num_inputs) + , params(std::move(params_)), time_of_merge(time_of_merge_) + , chunk_allocator(num_inputs + max_row_refs) +{ + size_t max_size_of_aggregate_state = 0; + size_t max_alignment_of_aggregate_state = 1; + + for (const auto & pattern : params.patterns) + { + if (pattern.function) + { + max_size_of_aggregate_state = std::max(max_size_of_aggregate_state, pattern.function->sizeOfData()); + max_alignment_of_aggregate_state = std::max(max_alignment_of_aggregate_state, pattern.function->alignOfData()); + } + } + + merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state); + columns_definition = defineColumns(header, params); +} + +Graphite::RollupRule GraphiteRollupSortedTransform::selectPatternForPath(StringRef path) const +{ + const Graphite::Pattern * first_match = &undef_pattern; + + for (const auto & pattern : params.patterns) + { + if (!pattern.regexp) + { + /// Default pattern + if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) + { + /// There is only default pattern for both retention and aggregation + return std::pair(&pattern, &pattern); + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + else if (pattern.regexp->match(path.data, path.size)) + { + /// General pattern with matched path + if (pattern.type == pattern.TypeAll) + { + /// Only for not default patterns with both function and retention parameters + return std::pair(&pattern, &pattern); + } + if (first_match->type == first_match->TypeUndef) + { + first_match = &pattern; + continue; + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + } + + return {nullptr, nullptr}; +} + +UInt32 GraphiteRollupSortedTransform::selectPrecision(const Graphite::Retentions & retentions, time_t time) const +{ + static_assert(is_signed_v, "time_t must be signed type"); + + for (const auto & retention : retentions) + { + if (time_of_merge - time >= static_cast(retention.age)) + return retention.precision; + } + + /// No rounding. + return 1; +} + +/** Round the unix timestamp to seconds precision. + * In this case, the date should not change. The date is calculated using the local time zone. + * + * If the rounding value is less than an hour, + * then, assuming that time zones that differ from UTC by a non-integer number of hours are not supported, + * just simply round the unix timestamp down to a multiple of 3600. + * And if the rounding value is greater, + * then we will round down the number of seconds from the beginning of the day in the local time zone. + * + * Rounding to more than a day is not supported. + */ +static time_t roundTimeToPrecision(const DateLUTImpl & date_lut, time_t time, UInt32 precision) +{ + if (precision <= 3600) + { + return time / precision * precision; + } + else + { + time_t date = date_lut.toDate(time); + time_t remainder = time - date; + return date + remainder / precision * precision; + } +} + +void GraphiteRollupSortedTransform::initializeInputs() +{ + queue = SortingHeap(cursors); + is_queue_initialized = true; +} + +void GraphiteRollupSortedTransform::consume(Chunk chunk, size_t input_number) +{ + updateCursor(std::move(chunk), input_number); + + if (is_queue_initialized) + queue.push(cursors[input_number]); +} + +void GraphiteRollupSortedTransform::updateCursor(Chunk chunk, size_t source_num) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); + + auto & source_chunk = source_chunks[source_num]; + + if (source_chunk) + { + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num].reset(source_chunk->getColumns(), {}); + } + else + { + if (cursors[source_num].has_collation) + throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); + + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); + } + + source_chunk->all_columns = cursors[source_num].all_columns; + source_chunk->sort_columns = cursors[source_num].sort_columns; +} + +void GraphiteRollupSortedTransform::work() +{ + merge(); + prepareOutputChunk(merged_data); +} + +void GraphiteRollupSortedTransform::merge() +{ + const DateLUTImpl & date_lut = DateLUT::instance(); + + /// Take rows in needed order and put them into `merged_data` until we get `max_block_size` rows. + /// + /// Variables starting with current_* refer to the rows previously popped from the queue that will + /// contribute towards current output row. + /// Variables starting with next_* refer to the row at the top of the queue. + + while (queue.isValid()) + { + SortCursor current = queue.current(); + + StringRef next_path = current->all_columns[columns_definition.path_column_num]->getDataAt(current->pos); + bool new_path = is_first || next_path != current_group_path; + + is_first = false; + + time_t next_row_time = current->all_columns[columns_definition.time_column_num]->getUInt(current->pos); + /// Is new key before rounding. + bool is_new_key = new_path || next_row_time != current_time; + + if (is_new_key) + { + /// Accumulate the row that has maximum version in the previous group of rows with the same key: + if (merged_data.wasGroupStarted()) + accumulateRow(current_subgroup_newest_row); + + Graphite::RollupRule next_rule = merged_data.currentRule(); + if (new_path) + next_rule = selectPatternForPath(next_path); + + const Graphite::RetentionPattern * retention_pattern = std::get<0>(next_rule); + time_t next_time_rounded; + if (retention_pattern) + { + UInt32 precision = selectPrecision(retention_pattern->retentions, next_row_time); + next_time_rounded = roundTimeToPrecision(date_lut, next_row_time, precision); + } + else + { + /// If no pattern has matched - take the value as-is. + next_time_rounded = next_row_time; + } + + /// Key will be new after rounding. It means new result row. + bool will_be_new_key = new_path || next_time_rounded != current_time_rounded; + + if (will_be_new_key) + { + if (merged_data.wasGroupStarted()) + { + finishCurrentGroup(); + + /// We have enough rows - return, but don't advance the loop. At the beginning of the + /// next call to merge() the same next_cursor will be processed once more and + /// the next output row will be created from it. + if (merged_data.hasEnoughRows()) + return; + } + + /// At this point previous row has been fully processed, so we can advance the loop + /// (substitute current_* values for next_*, advance the cursor). + + startNextGroup(current, next_rule); + + current_time_rounded = next_time_rounded; + } + + current_time = next_row_time; + } + + /// Within all rows with same key, we should leave only one row with maximum version; + /// and for rows with same maximum version - only last row. + if (is_new_key + || current->all_columns[columns_definition.version_column_num]->compareAt( + current->pos, current_subgroup_newest_row.row_num, + *(*current_subgroup_newest_row.all_columns)[columns_definition.version_column_num], + /* nan_direction_hint = */ 1) >= 0) + { + current_subgroup_newest_row.set(current, source_chunks[current.impl->order]); + + /// Small hack: group and subgroups have the same path, so we can set current_group_path here instead of startNextGroup + /// But since we keep in memory current_subgroup_newest_row's block, we could use StringRef for current_group_path and don't + /// make deep copy of the path. + current_group_path = next_path; + } + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We get the next block from the appropriate source, if there is one. + queue.removeTop(); + requestDataForInput(current.impl->order); + return; + } + } + + /// Write result row for the last group. + if (merged_data.wasGroupStarted()) + { + accumulateRow(current_subgroup_newest_row); + finishCurrentGroup(); + } + + is_finished = true; +} + +void GraphiteRollupSortedTransform::startNextGroup(SortCursor & cursor, Graphite::RollupRule next_rule) +{ + merged_data.startNextGroup(cursor->all_columns, cursor->pos, next_rule, columns_definition); +} + +void GraphiteRollupSortedTransform::finishCurrentGroup() +{ + merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition); +} + +void GraphiteRollupSortedTransform::accumulateRow(RowRef & row) +{ + merged_data.accumulateRow(row, columns_definition); +} + +void GraphiteRollupSortedTransform::GraphiteRollupMergedData::startNextGroup( + const ColumnRawPtrs & raw_columns, size_t row, + Graphite::RollupRule next_rule, ColumnsDefinition & def) +{ + const Graphite::AggregationPattern * aggregation_pattern = std::get<1>(next_rule); + + /// Copy unmodified column values (including path column). + for (size_t j : def.unmodified_column_numbers) + columns[j]->insertFrom(*raw_columns[j], row); + + if (aggregation_pattern) + { + aggregation_pattern->function->create(place_for_aggregate_state.data()); + aggregate_state_created = true; + } + + current_rule = next_rule; + was_group_started = true; +} + +void GraphiteRollupSortedTransform::GraphiteRollupMergedData::insertRow( + time_t time, RowRef & row, ColumnsDefinition & def) +{ + /// Insert calculated values of the columns `time`, `value`, `version`. + columns[def.time_column_num]->insert(time); + auto & row_ref_version_column = (*row.all_columns)[def.version_column_num]; + columns[def.version_column_num]->insertFrom(*row_ref_version_column, row.row_num); + + auto & value_column = columns[def.value_column_num]; + const Graphite::AggregationPattern * aggregation_pattern = std::get<1>(current_rule); + if (aggregate_state_created) + { + aggregation_pattern->function->insertResultInto(place_for_aggregate_state.data(), *value_column); + aggregation_pattern->function->destroy(place_for_aggregate_state.data()); + aggregate_state_created = false; + } + else + value_column->insertFrom(*(*row.all_columns)[def.value_column_num], row.row_num); + + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; + + was_group_started = false; +} + +void GraphiteRollupSortedTransform::GraphiteRollupMergedData::accumulateRow(RowRef & row, ColumnsDefinition & def) +{ + const Graphite::AggregationPattern * aggregation_pattern = std::get<1>(current_rule); + if (aggregate_state_created) + { + auto & column = (*row.all_columns)[def.value_column_num]; + aggregation_pattern->function->add(place_for_aggregate_state.data(), &column, row.row_num, nullptr); + } +} + +GraphiteRollupSortedTransform::GraphiteRollupMergedData::~GraphiteRollupMergedData() +{ + if (aggregate_state_created) + std::get<1>(current_rule)->function->destroy(place_for_aggregate_state.data()); +} + +} diff --git a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h new file mode 100644 index 00000000000..dd0ba6d9493 --- /dev/null +++ b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -0,0 +1,275 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +/** Intended for implementation of "rollup" - aggregation (rounding) of older data + * for a table with Graphite data (Graphite is the system for time series monitoring). + * + * Table with graphite data has at least the following columns (accurate to the name): + * Path, Time, Value, Version + * + * Path - name of metric (sensor); + * Time - time of measurement; + * Value - value of measurement; + * Version - a number, that for equal pairs of Path and Time, need to leave only record with maximum version. + * + * Each row in a table correspond to one value of one sensor. + * + * Pattern should contain function, retention scheme, or both of them. The order of patterns does mean as well: + * * Aggregation OR retention patterns should be first + * * Then aggregation AND retention full patterns have to be placed + * * default pattern without regexp must be the last + * + * Rollup rules are specified in the following way: + * + * pattern + * regexp + * function + * pattern + * regexp + * age -> precision + * age -> precision + * ... + * pattern + * regexp + * function + * age -> precision + * age -> precision + * ... + * pattern + * ... + * default + * function + * age -> precision + * ... + * + * regexp - pattern for sensor name + * default - if no pattern has matched + * + * age - minimal data age (in seconds), to start rounding with specified precision. + * precision - rounding precision (in seconds) + * + * function - name of aggregate function to be applied for values, that time was rounded to same. + * + * Example: + * + * + * + * \.max$ + * max + * + * + * click_cost + * any + * + * 0 + * 5 + * + * + * 86400 + * 60 + * + * + * + * max + * + * 0 + * 60 + * + * + * 3600 + * 300 + * + * + * 86400 + * 3600 + * + * + * + */ +namespace Graphite +{ + struct Retention + { + UInt32 age; + UInt32 precision; + }; + + using Retentions = std::vector; + + struct Pattern + { + std::shared_ptr regexp; + std::string regexp_str; + AggregateFunctionPtr function; + Retentions retentions; /// Must be ordered by 'age' descending. + enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically + }; + + using Patterns = std::vector; + using RetentionPattern = Pattern; + using AggregationPattern = Pattern; + + struct Params + { + String config_name; + String path_column_name; + String time_column_name; + String value_column_name; + String version_column_name; + Graphite::Patterns patterns; + }; + + using RollupRule = std::pair; +} + +/** Merges several sorted ports into one. + * + * For each group of consecutive identical values of the `path` column, + * and the same `time` values, rounded to some precision + * (where rounding accuracy depends on the template set for `path` + * and the amount of time elapsed from `time` to the specified time), + * keeps one line, + * performing the rounding of time, + * merge `value` values using the specified aggregate functions, + * as well as keeping the maximum value of the `version` column. + */ +class GraphiteRollupSortedTransform : public IMergingTransform +{ +public: + GraphiteRollupSortedTransform( + size_t num_inputs, const Block & header, + SortDescription description_, size_t max_block_size, + Graphite::Params params_, time_t time_of_merge_); + + String getName() const override { return "GraphiteRollupSortedTransform"; } + void work() override; + + struct ColumnsDefinition + { + size_t path_column_num; + size_t time_column_num; + size_t value_column_num; + size_t version_column_num; + + /// All columns other than 'time', 'value', 'version'. They are unmodified during rollup. + ColumnNumbers unmodified_column_numbers; + }; + + using RowRef = detail::RowRefWithOwnedChunk; + + /// Specialization for SummingSortedTransform. + class GraphiteRollupMergedData : public MergedData + { + public: + using MergedData::MergedData; + ~GraphiteRollupMergedData(); + + void startNextGroup(const ColumnRawPtrs & raw_columns, size_t row, + Graphite::RollupRule next_rule, ColumnsDefinition & def); + void insertRow(time_t time, RowRef & row, ColumnsDefinition & def); + void accumulateRow(RowRef & row, ColumnsDefinition & def); + bool wasGroupStarted() const { return was_group_started; } + + const Graphite::RollupRule & currentRule() const { return current_rule; } + void allocMemForAggregates(size_t size, size_t alignment) { place_for_aggregate_state.reset(size, alignment); } + + private: + Graphite::RollupRule current_rule = {nullptr, nullptr}; + AlignedBuffer place_for_aggregate_state; + bool aggregate_state_created = false; /// Invariant: if true then current_rule is not NULL. + bool was_group_started = false; + }; + +protected: + void initializeInputs() override; + void consume(Chunk chunk, size_t input_number) override; + +private: + Logger * log = &Logger::get("GraphiteRollupSortedBlockInputStream"); + + GraphiteRollupMergedData merged_data; + SortDescription description; + + /// Chunks currently being merged. + using SourceChunks = std::vector; + SourceChunks source_chunks; + SortCursorImpls cursors; + + SortingHeap queue; + bool is_queue_initialized = false; + + const Graphite::Params params; + ColumnsDefinition columns_definition; + + time_t time_of_merge; + + /// No data has been read. + bool is_first = true; + + /* | path | time | rounded_time | version | value | unmodified | + * ----------------------------------------------------------------------------------- + * | A | 11 | 10 | 1 | 1 | a | | + * | A | 11 | 10 | 3 | 2 | b |> subgroup(A, 11) | + * | A | 11 | 10 | 2 | 3 | c | |> group(A, 10) + * ----------------------------------------------------------------------------------|> + * | A | 12 | 10 | 0 | 4 | d | |> Outputs (A, 10, avg(2, 5), a) + * | A | 12 | 10 | 1 | 5 | e |> subgroup(A, 12) | + * ----------------------------------------------------------------------------------- + * | A | 21 | 20 | 1 | 6 | f | + * | B | 11 | 10 | 1 | 7 | g | + * ... + */ + + /// Path name of current bucket + StringRef current_group_path; + + static constexpr size_t max_row_refs = 2; /// current_subgroup_newest_row, current_row. + /// Last row with maximum version for current primary key (time bucket). + RowRef current_subgroup_newest_row; + + detail::SharedChunkAllocator chunk_allocator; + + /// Time of last read row + time_t current_time = 0; + time_t current_time_rounded = 0; + + const Graphite::Pattern undef_pattern = + { /// temporary empty pattern for selectPatternForPath + .regexp = nullptr, + .regexp_str = "", + .function = nullptr, + .retentions = DB::Graphite::Retentions(), + .type = undef_pattern.TypeUndef, + }; + + Graphite::RollupRule selectPatternForPath(StringRef path) const; + UInt32 selectPrecision(const Graphite::Retentions & retentions, time_t time) const; + + /// Insert the values into the resulting columns, which will not be changed in the future. + void startNextGroup(SortCursor & cursor, Graphite::RollupRule next_rule); + + /// Insert the calculated `time`, `value`, `version` values into the resulting columns by the last group of rows. + void finishCurrentGroup(); + + /// Update the state of the aggregate function with the new `value`. + void accumulateRow(RowRef & row); + + void merge(); + void updateCursor(Chunk chunk, size_t source_num); +}; + +} From 2daef385af8862a29c02181ae8ca01d700660408 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Apr 2020 19:28:50 +0300 Subject: [PATCH 048/752] Use SortedTransforms in merge. --- .../Merges/AggregatingSortedTransform.h | 2 +- .../Merges/GraphiteRollupSortedTransform.cpp | 2 +- .../Merges/GraphiteRollupSortedTransform.h | 2 +- .../Merges/ReplacingSortedTransform.cpp | 2 +- .../Merges/ReplacingSortedTransform.h | 2 +- .../Merges/SummingSortedTransform.cpp | 2 +- .../Merges/SummingSortedTransform.h | 2 +- .../Merges/VersionedCollapsingTransform.cpp | 2 +- .../Merges/VersionedCollapsingTransform.h | 10 +-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 81 +++++++++++-------- ...ream.cpp => MergeTreeSequentialSource.cpp} | 72 ++++++----------- ...utStream.h => MergeTreeSequentialSource.h} | 27 +++---- 13 files changed, 95 insertions(+), 113 deletions(-) rename dbms/src/Storages/MergeTree/{MergeTreeSequentialBlockInputStream.cpp => MergeTreeSequentialSource.cpp} (65%) rename dbms/src/Storages/MergeTree/{MergeTreeSequentialBlockInputStream.h => MergeTreeSequentialSource.h} (73%) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.h b/dbms/src/Processors/Merges/AggregatingSortedTransform.h index 613ac0baa58..5ebc41d6ccf 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.h +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.h @@ -19,7 +19,7 @@ class AggregatingSortedTransform : public IMergingTransform { public: AggregatingSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size); struct SimpleAggregateDescription; diff --git a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp index 64654ecc73f..acec6587f2c 100644 --- a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp +++ b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp @@ -22,7 +22,7 @@ static GraphiteRollupSortedTransform::ColumnsDefinition defineColumns( } GraphiteRollupSortedTransform::GraphiteRollupSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size, Graphite::Params params_, time_t time_of_merge_) : IMergingTransform(num_inputs, header, header, true) diff --git a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h index dd0ba6d9493..4dd394198ad 100644 --- a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -151,7 +151,7 @@ class GraphiteRollupSortedTransform : public IMergingTransform { public: GraphiteRollupSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size, Graphite::Params params_, time_t time_of_merge_); diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp index e39b33a5a46..d45e6f04aa6 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes } ReplacingSortedTransform::ReplacingSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, const String & version_column, size_t max_block_size, WriteBuffer * out_row_sources_buf_, diff --git a/dbms/src/Processors/Merges/ReplacingSortedTransform.h b/dbms/src/Processors/Merges/ReplacingSortedTransform.h index d28bd239cfe..ff447ee8da6 100644 --- a/dbms/src/Processors/Merges/ReplacingSortedTransform.h +++ b/dbms/src/Processors/Merges/ReplacingSortedTransform.h @@ -17,7 +17,7 @@ class ReplacingSortedTransform final : public IMergingTransform { public: ReplacingSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, const String & version_column, size_t max_block_size, WriteBuffer * out_row_sources_buf_ = nullptr, diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index 99008025232..b9359d2543f 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -366,7 +366,7 @@ namespace } SummingSortedTransform::SummingSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. const Names & column_names_to_sum, diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h index e7915cd3c8c..390f1113985 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.h +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -19,7 +19,7 @@ class SummingSortedTransform final : public IMergingTransform public: SummingSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. const Names & column_names_to_sum, diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp index 8b8b2bfa063..1e5043bb5b9 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -8,7 +8,7 @@ namespace DB static const size_t MAX_ROWS_IN_MULTIVERSION_QUEUE = 8192; VersionedCollapsingTransform::VersionedCollapsingTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, const String & sign_column_, size_t max_block_size, WriteBuffer * out_row_sources_buf_, diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h index 0dbdf8e2a40..04a18142072 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.h @@ -20,11 +20,11 @@ class VersionedCollapsingTransform final : public IMergingTransform public: /// Don't need version column. It's in primary key. VersionedCollapsingTransform( - size_t num_inputs, const Block & header, - SortDescription description_, const String & sign_column_, - size_t max_block_size, - WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false); + const Block & header, size_t num_inputs, + SortDescription description_, const String & sign_column_, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_ = nullptr, + bool use_average_block_sizes = false); String getName() const override { return "VersionedCollapsingTransform"; } void work() override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 2279618c9a0..129ecaea27e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 0c8c39b074c..b5356fbce88 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1,6 +1,6 @@ #include "MergeTreeDataMergerMutator.h" -#include +#include #include #include #include @@ -12,16 +12,19 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -660,7 +663,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /** Read from all parts, merge and write into a new one. * In passing, we calculate expression for sorting. */ - BlockInputStreams src_streams; + Pipes pipes; UInt64 watch_prev_elapsed = 0; /// We count total amount of bytes in parts @@ -687,18 +690,24 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor for (const auto & part : parts) { - auto input = std::make_unique( + auto input = std::make_unique( data, part, merging_column_names, read_with_direct_io, true); input->setProgressCallback( MergeProgressCallback(merge_entry, watch_prev_elapsed, horizontal_stage_progress)); - BlockInputStreamPtr stream = std::move(input); - if (data.hasPrimaryKey() || data.hasSkipIndices()) - stream = std::make_shared( - std::make_shared(stream, data.sorting_key_and_skip_indices_expr)); + Pipe pipe(std::move(input)); - src_streams.emplace_back(stream); + if (data.hasPrimaryKey() || data.hasSkipIndices()) + { + auto expr = std::make_shared(pipe.getHeader(), data.sorting_key_and_skip_indices_expr); + pipe.addSimpleTransform(std::move(expr)); + + auto materializing = std::make_shared(pipe.getHeader()); + pipe.addSimpleTransform(std::move(materializing)); + } + + pipes.emplace_back(std::move(pipe)); } Names sort_columns = data.sorting_key_columns; @@ -706,14 +715,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); - Block header = src_streams.at(0)->getHeader(); + Block header = pipes.at(0).getHeader(); for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); /// The order of the streams is important: when the key is matched, the elements go in the order of the source stream number. /// In the merged part, the lines with the same key must be in the ascending order of the identifier of original part, /// that is going in insertion order. - std::shared_ptr merged_stream; + ProcessorPtr merged_transform; /// If merge is vertical we cannot calculate it bool blocks_are_granules_size = (merge_alg == MergeAlgorithm::Vertical); @@ -722,45 +731,48 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor switch (data.merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: - merged_stream = std::make_unique( - src_streams, sort_description, merge_block_size, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size); + merged_transform = std::make_unique( + header, pipes.size(), sort_description, merge_block_size, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size); break; case MergeTreeData::MergingParams::Collapsing: - merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.sign_column, + merged_transform = std::make_unique( + header, pipes.size(), sort_description, data.merging_params.sign_column, merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size); break; case MergeTreeData::MergingParams::Summing: - merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.columns_to_sum, merge_block_size); + merged_transform = std::make_unique( + header, pipes.size(), sort_description, data.merging_params.columns_to_sum, merge_block_size); break; case MergeTreeData::MergingParams::Aggregating: - merged_stream = std::make_unique( - src_streams, sort_description, merge_block_size); + merged_transform = std::make_unique( + header, pipes.size(), sort_description, merge_block_size); break; case MergeTreeData::MergingParams::Replacing: - merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.version_column, + merged_transform = std::make_unique( + header, pipes.size(), sort_description, data.merging_params.version_column, merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size); break; case MergeTreeData::MergingParams::Graphite: - merged_stream = std::make_unique( - src_streams, sort_description, merge_block_size, + merged_transform = std::make_unique( + header, pipes.size(), sort_description, merge_block_size, data.merging_params.graphite_params, time_of_merge); break; case MergeTreeData::MergingParams::VersionedCollapsing: - merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.sign_column, + merged_transform = std::make_unique( + header, pipes.size(), sort_description, data.merging_params.sign_column, merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size); break; } + Pipe merged_pipe(std::move(pipes), std::move(merged_transform)); + BlockInputStreamPtr merged_stream = std::make_shared(std::move(merged_pipe)); + if (deduplicate) merged_stream = std::make_shared(merged_stream, SizeLimits(), 0 /*limit_hint*/, Names()); @@ -857,13 +869,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor MergeStageProgress column_progress(progress_before, column_sizes->columnWeight(column_name)); for (size_t part_num = 0; part_num < parts.size(); ++part_num) { - auto column_part_stream = std::make_shared( + auto column_part_source = std::make_shared( data, parts[part_num], column_names, read_with_direct_io, true); - column_part_stream->setProgressCallback( + column_part_source->setProgressCallback( MergeProgressCallback(merge_entry, watch_prev_elapsed, column_progress)); - column_part_streams[part_num] = std::move(column_part_stream); + column_part_streams[part_num] = std::make_shared( + Pipe(std::move(column_part_source))); } rows_sources_read_buf.seek(0, 0); diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp similarity index 65% rename from dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp rename to dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 9e0737810df..b2ac3d995f9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -8,16 +8,17 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } -MergeTreeSequentialBlockInputStream::MergeTreeSequentialBlockInputStream( +MergeTreeSequentialSource::MergeTreeSequentialSource( const MergeTreeData & storage_, - const MergeTreeData::DataPartPtr & data_part_, + MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) - : storage(storage_) - , data_part(data_part_) - , columns_to_read(columns_to_read_) + : SourceWithProgress(storage_.getSampleBlockForColumns(columns_to_read)) + , storage(storage_) + , data_part(std::move(data_part_)) + , columns_to_read(std::move(columns_to_read_)) , read_with_direct_io(read_with_direct_io_) , mark_cache(storage.global_context.getMarkCache()) { @@ -33,8 +34,6 @@ MergeTreeSequentialBlockInputStream::MergeTreeSequentialBlockInputStream( addTotalRowsApprox(data_part->rows_count); - header = storage.getSampleBlockForColumns(columns_to_read); - /// Add columns because we don't want to read empty blocks injectRequiredColumns(storage, data_part, columns_to_read); NamesAndTypesList columns_for_reader; @@ -62,33 +61,11 @@ MergeTreeSequentialBlockInputStream::MergeTreeSequentialBlockInputStream( /* uncompressed_cache = */ nullptr, mark_cache.get(), reader_settings); } - -void MergeTreeSequentialBlockInputStream::fixHeader(Block & header_block) const -{ - /// Types may be different during ALTER (when this stream is used to perform an ALTER). - for (const auto & name_type : data_part->getColumns()) - { - if (header_block.has(name_type.name)) - { - auto & elem = header_block.getByName(name_type.name); - if (!elem.type->equals(*name_type.type)) - { - elem.type = name_type.type; - elem.column = elem.type->createColumn(); - } - } - } -} - -Block MergeTreeSequentialBlockInputStream::getHeader() const -{ - return header; -} - -Block MergeTreeSequentialBlockInputStream::readImpl() +Chunk MergeTreeSequentialSource::generate() try { - Block res; + auto & header = getPort().getHeader(); + if (!isCancelled() && current_row < data_part->rows_count) { size_t rows_to_read = data_part->index_granularity.getMarkRows(current_mark); @@ -96,15 +73,15 @@ try auto & sample = reader->getColumns(); Columns columns(sample.size()); - size_t rows_readed = reader->readRows(current_mark, continue_reading, rows_to_read, columns); + size_t rows_read = reader->readRows(current_mark, continue_reading, rows_to_read, columns); - if (rows_readed) + if (rows_read) { - current_row += rows_readed; - current_mark += (rows_to_read == rows_readed); + current_row += rows_read; + current_mark += (rows_to_read == rows_read); bool should_evaluate_missing_defaults = false; - reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_readed); + reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read); if (should_evaluate_missing_defaults) { @@ -113,20 +90,21 @@ try reader->performRequiredConversions(columns); - res = header.cloneEmpty(); - /// Reorder columns and fill result block. size_t num_columns = sample.size(); + Columns res_columns; + res_columns.reserve(num_columns); + auto it = sample.begin(); for (size_t i = 0; i < num_columns; ++i) { - if (res.has(it->name)) - res.getByName(it->name).column = std::move(columns[i]); + if (header.has(it->name)) + res_columns.emplace_back(std::move(columns[i])); ++it; } - res.checkNumberOfRows(); + return Chunk(std::move(res_columns), rows_read); } } else @@ -134,7 +112,7 @@ try finish(); } - return res; + return {}; } catch (...) { @@ -144,8 +122,7 @@ catch (...) throw; } - -void MergeTreeSequentialBlockInputStream::finish() +void MergeTreeSequentialSource::finish() { /** Close the files (before destroying the object). * When many sources are created, but simultaneously reading only a few of them, @@ -155,7 +132,6 @@ void MergeTreeSequentialBlockInputStream::finish() data_part.reset(); } - -MergeTreeSequentialBlockInputStream::~MergeTreeSequentialBlockInputStream() = default; +MergeTreeSequentialSource::~MergeTreeSequentialSource() = default; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.h similarity index 73% rename from dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.h rename to dbms/src/Storages/MergeTree/MergeTreeSequentialSource.h index fd57a39fd7a..dac559913aa 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include @@ -9,40 +9,33 @@ namespace DB { /// Lightweight (in terms of logic) stream for reading single part from MergeTree -class MergeTreeSequentialBlockInputStream : public IBlockInputStream +class MergeTreeSequentialSource : public SourceWithProgress { public: - MergeTreeSequentialBlockInputStream( + MergeTreeSequentialSource( const MergeTreeData & storage_, - const MergeTreeData::DataPartPtr & data_part_, + MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet = false ); - ~MergeTreeSequentialBlockInputStream() override; + ~MergeTreeSequentialSource() override; - String getName() const override { return "MergeTreeSequentialBlockInputStream"; } - - Block getHeader() const override; - - /// Closes readers and unlock part locks - void finish(); + String getName() const override { return "MergeTreeSequentialSource"; } size_t getCurrentMark() const { return current_mark; } size_t getCurrentRow() const { return current_row; } protected: - Block readImpl() override; + Chunk generate() override; private: const MergeTreeData & storage; - Block header; - /// Data part will not be removed if the pointer owns it MergeTreeData::DataPartPtr data_part; @@ -52,7 +45,7 @@ private: /// Should read using direct IO bool read_with_direct_io; - Logger * log = &Logger::get("MergeTreeSequentialBlockInputStream"); + Logger * log = &Logger::get("MergeTreeSequentialSource"); std::shared_ptr mark_cache; using MergeTreeReaderPtr = std::unique_ptr; @@ -65,8 +58,8 @@ private: size_t current_row = 0; private: - void fixHeader(Block & header_block) const; - + /// Closes readers and unlock part locks + void finish(); }; } From 4510b25df2e982492306fd6909c583fe9e86d3b2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Apr 2020 19:39:35 +0300 Subject: [PATCH 049/752] Try fix build. --- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index fbc42de5517..0a9869adfd9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include From 87a456839b392637a0ee9e1c5a97f24201732851 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Apr 2020 19:40:50 +0300 Subject: [PATCH 050/752] Try fix build. --- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 0a9869adfd9..38055a6b67b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include From dd40c7dc726d12adaf2036947a71f4c770f503e3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Apr 2020 20:02:08 +0300 Subject: [PATCH 051/752] Try fix build. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 1c94bca4e3a..2aebd029f0a 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -66,7 +66,7 @@ namespace } AggregatingSortedTransform::AggregatingSortedTransform( - size_t num_inputs, const Block & header, + const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) : IMergingTransform(num_inputs, header, header, true) , columns_definition(defineColumns(header, description_)) From d16a435531ed740d08275c149d68e3a34d744bb1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Apr 2020 21:33:23 +0300 Subject: [PATCH 052/752] Try fix tests. --- dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index b2ac3d995f9..19bc6ddf790 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -15,7 +15,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) - : SourceWithProgress(storage_.getSampleBlockForColumns(columns_to_read)) + : SourceWithProgress(storage_.getSampleBlockForColumns(columns_to_read_)) , storage(storage_) , data_part(std::move(data_part_)) , columns_to_read(std::move(columns_to_read_)) From e50ebd5ee12a5bc226ac08aef0822feb1f0ec27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Fri, 3 Apr 2020 01:35:22 +0300 Subject: [PATCH 053/752] added support of custom settings to FileDictionarySource --- dbms/Dictionaries/DictionarySourceFactory.cpp | 7 ++-- dbms/Dictionaries/DictionaryStructure.cpp | 32 +------------------ dbms/Dictionaries/FileDictionarySource.cpp | 15 +++++++-- dbms/Dictionaries/FileDictionarySource.h | 8 ++--- 4 files changed, 22 insertions(+), 40 deletions(-) diff --git a/dbms/Dictionaries/DictionarySourceFactory.cpp b/dbms/Dictionaries/DictionarySourceFactory.cpp index fa3b3017ad2..8431e065dd4 100644 --- a/dbms/Dictionaries/DictionarySourceFactory.cpp +++ b/dbms/Dictionaries/DictionarySourceFactory.cpp @@ -84,10 +84,11 @@ DictionarySourcePtr DictionarySourceFactory::create( { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); - if (keys.size() != 1) - throw Exception{name + ": element dictionary.source should have exactly one child element", + + if (keys.empty() || keys.size() > 2) + throw Exception{name + ": element dictionary.source should have one or two child elements", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG}; - + const auto & source_type = keys.front(); const auto found = registered_sources.find(source_type); diff --git a/dbms/Dictionaries/DictionaryStructure.cpp b/dbms/Dictionaries/DictionaryStructure.cpp index 33196f627d8..fe4b1474e84 100644 --- a/dbms/Dictionaries/DictionaryStructure.cpp +++ b/dbms/Dictionaries/DictionaryStructure.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -196,9 +195,6 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration attributes = getAttributes(config, config_prefix); - settings = Settings(); - getSettings(config, config_prefix, settings); - if (attributes.empty()) throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS}; } @@ -360,30 +356,4 @@ std::vector DictionaryStructure::getAttributes( return res_attributes; } - -void DictionaryStructure::getSettings( - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Settings & dict_settings) -{ - Poco::Util::AbstractConfiguration::Keys config_elems; - config.keys(config_prefix, config_elems); - - - for (const auto & config_elem : config_elems) - { - if (startsWith(config_elem, "settings")) - { - /* i won't do break after this if in case there can be multiple settings sections */ - - const auto prefix = config_prefix + '.' + config_elem; - Poco::Util::AbstractConfiguration::Keys setting_keys; - config.keys(prefix, setting_keys); - dict_settings.loadSettingsFromConfig(prefix, config); - - } - } - -} - -} +} \ No newline at end of file diff --git a/dbms/Dictionaries/FileDictionarySource.cpp b/dbms/Dictionaries/FileDictionarySource.cpp index 5e7a784183c..7871b4a2a20 100644 --- a/dbms/Dictionaries/FileDictionarySource.cpp +++ b/dbms/Dictionaries/FileDictionarySource.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes FileDictionarySource::FileDictionarySource( const std::string & filepath_, const std::string & format_, - Block & sample_block_, const Context & context_, bool check_config) + Block & sample_block_, Context & context_, bool check_config) : filepath{filepath_} , format{format_} , sample_block{sample_block_} @@ -83,7 +83,18 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory) const auto filepath = config.getString(config_prefix + ".file.path"); const auto format = config.getString(config_prefix + ".file.format"); - return std::make_unique(filepath, format, sample_block, context, check_config); + Context context_local_copy(context); + if (config.has(config_prefix + ".settings")) + { + const auto prefix = config_prefix + ".settings"; + Settings settings; + + settings.loadSettingsFromConfig(prefix, config); + // const_cast(context).setSettings(settings); + context_local_copy.setSettings(settings); + } + + return std::make_unique(filepath, format, sample_block, context_local_copy, check_config); }; factory.registerSource("file", create_table_source); diff --git a/dbms/Dictionaries/FileDictionarySource.h b/dbms/Dictionaries/FileDictionarySource.h index 3d00c026e07..e22906633db 100644 --- a/dbms/Dictionaries/FileDictionarySource.h +++ b/dbms/Dictionaries/FileDictionarySource.h @@ -3,7 +3,7 @@ #include #include "IDictionarySource.h" #include - +#include namespace DB { @@ -11,14 +11,14 @@ namespace ErrorCodes { extern const int NOT_IMPLEMENTED; } -class Context; +// class Context; /// Allows loading dictionaries from a file with given format, does not support "random access" class FileDictionarySource final : public IDictionarySource { public: FileDictionarySource(const std::string & filepath_, const std::string & format_, - Block & sample_block_, const Context & context_, bool check_config); + Block & sample_block_, Context & context_, bool check_config); FileDictionarySource(const FileDictionarySource & other); @@ -62,7 +62,7 @@ private: const std::string filepath; const std::string format; Block sample_block; - const Context & context; + const Context context; Poco::Timestamp last_modification; }; From 9f7d1f992025618e22692d38ca06b0824e222f01 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Apr 2020 12:30:20 +0300 Subject: [PATCH 054/752] Fix style. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 5 +++++ dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp | 5 +++++ dbms/src/Processors/Merges/SummingSortedTransform.cpp | 2 -- dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp | 5 +++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 2aebd029f0a..3de4e7773f0 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { AggregatingSortedTransform::ColumnsDefinition defineColumns( diff --git a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp index acec6587f2c..5ada52790a3 100644 --- a/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp +++ b/dbms/src/Processors/Merges/GraphiteRollupSortedTransform.cpp @@ -3,6 +3,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static GraphiteRollupSortedTransform::ColumnsDefinition defineColumns( const Block & header, const Graphite::Params & params) { diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index b9359d2543f..af97999a4b9 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -623,6 +623,4 @@ void SummingSortedTransform::merge() is_finished = true; } - - } diff --git a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp index 1e5043bb5b9..c11cf46a6ca 100644 --- a/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/dbms/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static const size_t MAX_ROWS_IN_MULTIVERSION_QUEUE = 8192; VersionedCollapsingTransform::VersionedCollapsingTransform( From a3534046d1fd2dcc01f60d0f51b48f7dc1bbbdc4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Apr 2020 14:12:44 +0300 Subject: [PATCH 055/752] Try fix summing. --- dbms/src/Processors/Merges/SummingSortedTransform.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.h b/dbms/src/Processors/Merges/SummingSortedTransform.h index 390f1113985..2e07ae12115 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.h +++ b/dbms/src/Processors/Merges/SummingSortedTransform.h @@ -54,8 +54,12 @@ public: void insertRow(const Row & row, const ColumnNumbers & column_numbers) { - for (auto column_number :column_numbers) - columns[column_number]->insert(row[column_number]); + size_t next_column = columns.size() - column_numbers.size(); + for (auto column_number : column_numbers) + { + columns[next_column]->insert(row[column_number]); + ++next_column; + } ++total_merged_rows; ++merged_rows; From 704a94f022f50f5e6aedea9ed9581cd411df150b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Apr 2020 14:57:24 +0300 Subject: [PATCH 056/752] Try fix summing. --- .../Merges/AggregatingSortedTransform.cpp | 21 ++++++++---------- .../Merges/SummingSortedTransform.cpp | 22 ++++++++----------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 3de4e7773f0..239d8b95cdf 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -168,19 +168,13 @@ void AggregatingSortedTransform::merge() bool has_previous_group = !last_key.empty(); SortCursor current = queue.current(); + detail::RowRef current_key; + current_key.set(current); - { - detail::RowRef current_key; - current_key.set(current); - - if (!has_previous_group) /// The first key encountered. - key_differs = true; - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); - - last_key = current_key; - last_chunk_sort_columns.clear(); - } + if (!has_previous_group) /// The first key encountered. + key_differs = true; + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); if (key_differs) { @@ -189,6 +183,7 @@ void AggregatingSortedTransform::merge() { /// Write the simple aggregation result for the previous group. insertSimpleAggregationResult(); + last_key.reset(); return; } @@ -216,6 +211,8 @@ void AggregatingSortedTransform::merge() if (!current->isLast()) { + last_key = current_key; + last_chunk_sort_columns.clear(); queue.next(); } else diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index af97999a4b9..efdd7f85472 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -540,22 +540,16 @@ void SummingSortedTransform::merge() bool has_previous_group = !last_key.empty(); SortCursor current = queue.current(); + detail::RowRef current_key; + current_key.set(current); + if (!has_previous_group) /// The first key encountered. { - detail::RowRef current_key; - current_key.set(current); - - if (!has_previous_group) /// The first key encountered. - { - key_differs = true; - current_row_is_zero = true; - } - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); - - last_key = current_key; - last_chunk_sort_columns.clear(); + key_differs = true; + current_row_is_zero = true; } + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); if (key_differs) { @@ -605,6 +599,8 @@ void SummingSortedTransform::merge() if (!current->isLast()) { + last_key = current_key; + last_chunk_sort_columns.clear(); queue.next(); } else From 0fe5528842d9946b2b2c200f7a6127d5140cde74 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Apr 2020 15:15:08 +0300 Subject: [PATCH 057/752] Try fix summing. --- .../Merges/AggregatingSortedTransform.cpp | 21 ++++++++++-------- .../Merges/SummingSortedTransform.cpp | 22 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 239d8b95cdf..3de4e7773f0 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -168,13 +168,19 @@ void AggregatingSortedTransform::merge() bool has_previous_group = !last_key.empty(); SortCursor current = queue.current(); - detail::RowRef current_key; - current_key.set(current); - if (!has_previous_group) /// The first key encountered. - key_differs = true; - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); + { + detail::RowRef current_key; + current_key.set(current); + + if (!has_previous_group) /// The first key encountered. + key_differs = true; + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); + + last_key = current_key; + last_chunk_sort_columns.clear(); + } if (key_differs) { @@ -183,7 +189,6 @@ void AggregatingSortedTransform::merge() { /// Write the simple aggregation result for the previous group. insertSimpleAggregationResult(); - last_key.reset(); return; } @@ -211,8 +216,6 @@ void AggregatingSortedTransform::merge() if (!current->isLast()) { - last_key = current_key; - last_chunk_sort_columns.clear(); queue.next(); } else diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index efdd7f85472..af97999a4b9 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -540,16 +540,22 @@ void SummingSortedTransform::merge() bool has_previous_group = !last_key.empty(); SortCursor current = queue.current(); - detail::RowRef current_key; - current_key.set(current); - if (!has_previous_group) /// The first key encountered. { - key_differs = true; - current_row_is_zero = true; + detail::RowRef current_key; + current_key.set(current); + + if (!has_previous_group) /// The first key encountered. + { + key_differs = true; + current_row_is_zero = true; + } + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); + + last_key = current_key; + last_chunk_sort_columns.clear(); } - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); if (key_differs) { @@ -599,8 +605,6 @@ void SummingSortedTransform::merge() if (!current->isLast()) { - last_key = current_key; - last_chunk_sort_columns.clear(); queue.next(); } else From 47eccf233289eb7f1a4c00904733f8716acc65b0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Apr 2020 15:32:12 +0300 Subject: [PATCH 058/752] Try fix summing. --- dbms/src/Processors/Merges/RowRef.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dbms/src/Processors/Merges/RowRef.h b/dbms/src/Processors/Merges/RowRef.h index ac4be79f560..48ce92dbdbd 100644 --- a/dbms/src/Processors/Merges/RowRef.h +++ b/dbms/src/Processors/Merges/RowRef.h @@ -105,7 +105,8 @@ inline void intrusive_ptr_release(SharedChunk * ptr) /// This class represents a row in a chunk. struct RowRef { - ColumnRawPtrs * sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. + const IColumn ** sort_columns = nullptr; /// Point to sort_columns from SortCursor or last_chunk_sort_columns. + size_t num_columns = 0; UInt64 row_num = 0; bool empty() const { return sort_columns == nullptr; } @@ -113,19 +114,19 @@ struct RowRef void set(SortCursor & cursor) { - sort_columns = &cursor.impl->sort_columns; + sort_columns = cursor.impl->sort_columns.data(); + num_columns = cursor.impl->sort_columns.size(); row_num = cursor.impl->pos; } - static bool checkEquals(const ColumnRawPtrs * left, size_t left_row, const ColumnRawPtrs * right, size_t right_row) + static bool checkEquals(size_t size, const IColumn ** lhs, size_t lhs_row, const IColumn ** rhs, size_t rhs_row) { - auto size = left->size(); for (size_t col_number = 0; col_number < size; ++col_number) { - auto & cur_column = (*left)[col_number]; - auto & other_column = (*right)[col_number]; + auto & cur_column = lhs[col_number]; + auto & other_column = rhs[col_number]; - if (0 != cur_column->compareAt(left_row, right_row, *other_column, 1)) + if (0 != cur_column->compareAt(lhs_row, rhs_row, *other_column, 1)) return false; } @@ -134,7 +135,7 @@ struct RowRef bool hasEqualSortColumnsWith(const RowRef & other) { - return checkEquals(sort_columns, row_num, other.sort_columns, other.row_num); + return checkEquals(num_columns, sort_columns, row_num, other.sort_columns, other.row_num); } }; @@ -178,7 +179,8 @@ struct RowRefWithOwnedChunk bool hasEqualSortColumnsWith(const RowRefWithOwnedChunk & other) { - return RowRef::checkEquals(sort_columns, row_num, other.sort_columns, other.row_num); + return RowRef::checkEquals(sort_columns->size(), sort_columns->data(), row_num, + other.sort_columns->data(), other.row_num); } }; From c53b902d6c34001539cb43359f6ce119fe0aa3d6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 3 Apr 2020 17:09:01 +0300 Subject: [PATCH 059/752] Try fix summing. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 2 +- dbms/src/Processors/Merges/SummingSortedTransform.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 3de4e7773f0..b3a1be9f253 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -117,7 +117,7 @@ void AggregatingSortedTransform::updateCursor(Chunk chunk, size_t source_num) { /// Extend lifetime of last chunk. last_chunk = std::move(source_chunk); - last_chunk_sort_columns = std::move(cursors[source_num].all_columns); + last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); source_chunk = std::move(chunk); cursors[source_num].reset(source_chunk.getColumns(), {}); diff --git a/dbms/src/Processors/Merges/SummingSortedTransform.cpp b/dbms/src/Processors/Merges/SummingSortedTransform.cpp index af97999a4b9..f34a3b479d7 100644 --- a/dbms/src/Processors/Merges/SummingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/SummingSortedTransform.cpp @@ -411,7 +411,7 @@ void SummingSortedTransform::updateCursor(Chunk chunk, size_t source_num) { /// Extend lifetime of last chunk. last_chunk = std::move(source_chunk); - last_chunk_sort_columns = std::move(cursors[source_num].all_columns); + last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); source_chunk = std::move(chunk); cursors[source_num].reset(source_chunk.getColumns(), {}); From 6ecfc03cbb9cddfadecc96d1817b7386eb418ecc Mon Sep 17 00:00:00 2001 From: Avogar Date: Fri, 3 Apr 2020 23:44:13 +0300 Subject: [PATCH 060/752] Change parsing msgpack data. --- .../Formats/Impl/MsgPackRowInputFormat.cpp | 37 ++++++++++++------ .../Formats/Impl/MsgPackRowInputFormat.h | 3 +- .../01098_msgpack_format.reference | 2 + .../0_stateless/01098_msgpack_format.sh | 31 +++++++++++++-- .../0_stateless/data_msgpack/all_types.msgpk | Bin 200 -> 0 bytes .../data_msgpack/nested_arrays.msgpk | 1 - 6 files changed, 56 insertions(+), 18 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/data_msgpack/all_types.msgpk delete mode 100644 dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 0b3fb3d58ed..32f89c816c5 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -24,21 +24,34 @@ namespace ErrorCodes } MsgPackRowInputFormat::MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) - : IRowInputFormat(header_, in_, std::move(params_)), data_types(header_.getDataTypes()) {} + : IRowInputFormat(header_, in_, std::move(params_)), buf(in_), data_types(header_.getDataTypes()) {} bool MsgPackRowInputFormat::readObject() { - if (in.eof() && unpacker.nonparsed_size() == 0) + if (buf.eof()) return false; - while (!unpacker.next(object_handle)) + PeekableReadBufferCheckpoint checkpoint{buf}; + size_t offset; + bool need_more_data = true; + while (need_more_data) { - if (in.eof()) - throw Exception("Unexpected end of file while parsing MsgPack object.", ErrorCodes::INCORRECT_DATA); - unpacker.reserve_buffer(in.available()); - memcpy(unpacker.buffer(), in.position(), in.available()); - unpacker.buffer_consumed(in.available()); - in.position() += in.available(); + offset = 0; + try + { + object_handle = msgpack::unpack(buf.position(), buf.buffer().end() - buf.position(), offset); + need_more_data = false; + } + catch (msgpack::insufficient_bytes &) + { + buf.position() = buf.buffer().end(); + if (buf.eof()) + throw Exception("Unexpected end of file while parsing msgpack object.", ErrorCodes::INCORRECT_DATA); + buf.position() = buf.buffer().end(); + buf.makeContinuousMemoryFromCheckpointToPos(); + buf.rollbackToCheckpoint(); + } } + buf.position() += offset; return true; } @@ -168,9 +181,9 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & void registerInputFormatProcessorMsgPack(FormatFactory & factory) { factory.registerInputFormatProcessor("MsgPack", []( - ReadBuffer &buf, - const Block &sample, - const RowInputFormatParams ¶ms, + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams & params, const FormatSettings &) { return std::make_shared(sample, buf, params); diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 7daac811374..8ed23a1e0f4 100644 --- a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace DB @@ -20,8 +21,8 @@ private: bool readObject(); void insertObject(IColumn & column, DataTypePtr type, const msgpack::object & object); + PeekableReadBuffer buf; DataTypes data_types; - msgpack::unpacker unpacker; msgpack::object_handle object_handle; }; diff --git a/dbms/tests/queries/0_stateless/01098_msgpack_format.reference b/dbms/tests/queries/0_stateless/01098_msgpack_format.reference index aab048208bc..8059526a38f 100644 --- a/dbms/tests/queries/0_stateless/01098_msgpack_format.reference +++ b/dbms/tests/queries/0_stateless/01098_msgpack_format.reference @@ -6,3 +6,5 @@ 42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 [42] [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] +[0,1,2,3,42,253,254,255] +[255,254,253,42,3,2,1,0] diff --git a/dbms/tests/queries/0_stateless/01098_msgpack_format.sh b/dbms/tests/queries/0_stateless/01098_msgpack_format.sh index 2aaf2dfd527..afebd6de3dc 100755 --- a/dbms/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/dbms/tests/queries/0_stateless/01098_msgpack_format.sh @@ -4,26 +4,49 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; + $CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime, datetime64 DateTime64, array Array(UInt32)) ENGINE = Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, [1,2,3,4,5]), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, [5,4,3,2,1]),(42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, [42])"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/data_msgpack/all_types.msgpk; +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/tmp_msgpac_test_all_types.msgpk; -cat $CURDIR/data_msgpack/all_types.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; +cat $CURDIR/tmp_msgpac_test_all_types.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; + +rm $CURDIR/tmp_msgpac_test_all_types.msgpk $CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; $CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; + $CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (array1 Array(Array(UInt32)), array2 Array(Array(Array(String)))) ENGINE = Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES ([[1,2,3], [1001, 2002], [3167]], [[['one'], ['two']], [['three']],[['four'], ['five']]])"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/data_msgpack/nested_arrays.msgpk; +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/tmp_msgpack_test_nested_arrays.msgpk; -cat $CURDIR/data_msgpack/nested_arrays.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; +cat $CURDIR/tmp_msgpack_test_nested_arrays.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; +rm $CURDIR/tmp_msgpack_test_nested_arrays.msgpk; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; + + +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (array Array(UInt8)) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES ([0, 1, 2, 3, 42, 253, 254, 255]), ([255, 254, 253, 42, 3, 2, 1, 0])"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack FORMAT MsgPack" > $CURDIR/tmp_msgpack_type_conversion.msgpk; + +$CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (array Array(Int64)) ENGINE = Memory"; + +cat $CURDIR/tmp_msgpack_type_conversion.msgpk | $CLICKHOUSE_CLIENT --query="INSERT INTO msgpack FORMAT MsgPack"; +rm $CURDIR/tmp_msgpack_type_conversion.msgpk; $CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; diff --git a/dbms/tests/queries/0_stateless/data_msgpack/all_types.msgpk b/dbms/tests/queries/0_stateless/data_msgpack/all_types.msgpk deleted file mode 100644 index efefdf32a55f96112d8952e725c2023f9687cde3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 200 zcmX@}|Lp(&=l%o1c?JdsagVYW3>O+MHZWXj0CFz_g$*9;swCwUf}BZ jJe8G&nTb&g3Qjp`t`j-!;A(oPKStx*A`>H^_(UxL8(vuZ diff --git a/dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk b/dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk deleted file mode 100644 index 761ef1d5b6c..00000000000 --- a/dbms/tests/queries/0_stateless/data_msgpack/nested_arrays.msgpk +++ /dev/null @@ -1 +0,0 @@ -ґ _onetwothreefourfive \ No newline at end of file From 64a4640e0ec6590b969435906a1cdce6d62417a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Sat, 4 Apr 2020 00:32:06 +0300 Subject: [PATCH 061/752] added support of custom settings to all sources that needed having them --- .../ClickHouseDictionarySource.cpp | 7 ++++-- dbms/Dictionaries/DictionarySourceHelpers.cpp | 22 ++++++++++++++++++- dbms/Dictionaries/DictionarySourceHelpers.h | 12 +++++++++- .../ExecutableDictionarySource.cpp | 6 +++-- .../Dictionaries/ExecutableDictionarySource.h | 4 ++-- dbms/Dictionaries/FileDictionarySource.cpp | 14 +++--------- dbms/Dictionaries/FileDictionarySource.h | 2 +- dbms/Dictionaries/HTTPDictionarySource.cpp | 4 +++- dbms/Dictionaries/HTTPDictionarySource.h | 3 ++- dbms/Dictionaries/XDBCDictionarySource.cpp | 8 +++++++ 10 files changed, 60 insertions(+), 22 deletions(-) diff --git a/dbms/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/Dictionaries/ClickHouseDictionarySource.cpp index 0894a655724..45895ae93b2 100644 --- a/dbms/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/Dictionaries/ClickHouseDictionarySource.cpp @@ -12,7 +12,7 @@ #include "readInvalidateQuery.h" #include "writeParenthesisedString.h" #include "DictionaryFactory.h" - +#include "DictionarySourceHelpers.h" namespace DB { @@ -216,7 +216,10 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) const Context & context, bool /* check_config */) -> DictionarySourcePtr { - return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context); + Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); + /// Note that processors are not supported yet (see constructor), + /// hence it is not possible to override experimental_use_processors setting + return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context_local_copy); }; factory.registerSource("clickhouse", create_table_source); } diff --git a/dbms/Dictionaries/DictionarySourceHelpers.cpp b/dbms/Dictionaries/DictionarySourceHelpers.cpp index d01c7560832..7dfa67b5167 100644 --- a/dbms/Dictionaries/DictionarySourceHelpers.cpp +++ b/dbms/Dictionaries/DictionarySourceHelpers.cpp @@ -6,7 +6,9 @@ #include #include #include "DictionaryStructure.h" - +#include +#include +#include namespace DB { @@ -50,4 +52,22 @@ void formatKeys( out->flush(); } +Context copyContextAndApplySettings( + const std::string & config_prefix, + const Context & context, + const Poco::Util::AbstractConfiguration & config) +{ + Context local_context(context); + if (config.has(config_prefix + ".settings")) + { + const auto prefix = config_prefix + ".settings"; + Settings settings; + + settings.loadSettingsFromConfig(prefix, config); + local_context.setSettings(settings); + } + + return local_context; +} + } diff --git a/dbms/Dictionaries/DictionarySourceHelpers.h b/dbms/Dictionaries/DictionarySourceHelpers.h index 99d5df5bda7..7dc5f319432 100644 --- a/dbms/Dictionaries/DictionarySourceHelpers.h +++ b/dbms/Dictionaries/DictionarySourceHelpers.h @@ -3,7 +3,8 @@ #include #include #include - +#include +#include namespace DB { @@ -11,6 +12,9 @@ class IBlockOutputStream; using BlockOutputStreamPtr = std::shared_ptr; struct DictionaryStructure; +class Context; + + /// Write keys to block output stream. @@ -24,4 +28,10 @@ void formatKeys( const Columns & key_columns, const std::vector & requested_rows); +/// Used for applying settings to copied context in some register[...]Source functions +Context copyContextAndApplySettings( + const std::string & config_prefix, + const Context & context, + const Poco::Util::AbstractConfiguration & config); + } diff --git a/dbms/Dictionaries/ExecutableDictionarySource.cpp b/dbms/Dictionaries/ExecutableDictionarySource.cpp index d9903f759c0..6f4cd747b87 100644 --- a/dbms/Dictionaries/ExecutableDictionarySource.cpp +++ b/dbms/Dictionaries/ExecutableDictionarySource.cpp @@ -15,7 +15,7 @@ #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" #include "registerDictionaries.h" - +#include "DictionarySourceHelpers.h" namespace DB { @@ -232,9 +232,11 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) if (check_config) throw Exception("Dictionaries with Executable dictionary source is not allowed", ErrorCodes::DICTIONARY_ACCESS_DENIED); + Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); + return std::make_unique( dict_struct, config, config_prefix + ".executable", - sample_block, context); + sample_block, context_local_copy); }; factory.registerSource("executable", create_table_source); } diff --git a/dbms/Dictionaries/ExecutableDictionarySource.h b/dbms/Dictionaries/ExecutableDictionarySource.h index 879248663dc..f28d71ca5e3 100644 --- a/dbms/Dictionaries/ExecutableDictionarySource.h +++ b/dbms/Dictionaries/ExecutableDictionarySource.h @@ -3,7 +3,7 @@ #include "DictionaryStructure.h" #include "IDictionarySource.h" #include - +#include namespace Poco { class Logger; } @@ -56,7 +56,7 @@ private: const std::string update_field; const std::string format; Block sample_block; - const Context & context; + Context context; }; } diff --git a/dbms/Dictionaries/FileDictionarySource.cpp b/dbms/Dictionaries/FileDictionarySource.cpp index 7871b4a2a20..0f20ab8edc4 100644 --- a/dbms/Dictionaries/FileDictionarySource.cpp +++ b/dbms/Dictionaries/FileDictionarySource.cpp @@ -7,6 +7,7 @@ #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" #include "registerDictionaries.h" +#include "DictionarySourceHelpers.h" namespace DB { @@ -21,7 +22,7 @@ namespace ErrorCodes FileDictionarySource::FileDictionarySource( const std::string & filepath_, const std::string & format_, - Block & sample_block_, Context & context_, bool check_config) + Block & sample_block_, const Context & context_, bool check_config) : filepath{filepath_} , format{format_} , sample_block{sample_block_} @@ -83,16 +84,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory) const auto filepath = config.getString(config_prefix + ".file.path"); const auto format = config.getString(config_prefix + ".file.format"); - Context context_local_copy(context); - if (config.has(config_prefix + ".settings")) - { - const auto prefix = config_prefix + ".settings"; - Settings settings; - - settings.loadSettingsFromConfig(prefix, config); - // const_cast(context).setSettings(settings); - context_local_copy.setSettings(settings); - } + Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); return std::make_unique(filepath, format, sample_block, context_local_copy, check_config); }; diff --git a/dbms/Dictionaries/FileDictionarySource.h b/dbms/Dictionaries/FileDictionarySource.h index e22906633db..5155e3d81e1 100644 --- a/dbms/Dictionaries/FileDictionarySource.h +++ b/dbms/Dictionaries/FileDictionarySource.h @@ -18,7 +18,7 @@ class FileDictionarySource final : public IDictionarySource { public: FileDictionarySource(const std::string & filepath_, const std::string & format_, - Block & sample_block_, Context & context_, bool check_config); + Block & sample_block_, const Context & context_, bool check_config); FileDictionarySource(const FileDictionarySource & other); diff --git a/dbms/Dictionaries/HTTPDictionarySource.cpp b/dbms/Dictionaries/HTTPDictionarySource.cpp index 87d5381f30f..61f16797ce0 100644 --- a/dbms/Dictionaries/HTTPDictionarySource.cpp +++ b/dbms/Dictionaries/HTTPDictionarySource.cpp @@ -202,9 +202,11 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) if (dict_struct.has_expressions) throw Exception{"Dictionary source of type `http` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR}; + Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); + return std::make_unique( dict_struct, config, config_prefix + ".http", - sample_block, context, check_config); + sample_block, context_local_copy, check_config); }; factory.registerSource("http", create_table_source); } diff --git a/dbms/Dictionaries/HTTPDictionarySource.h b/dbms/Dictionaries/HTTPDictionarySource.h index d04a6f1a789..e7920132e83 100644 --- a/dbms/Dictionaries/HTTPDictionarySource.h +++ b/dbms/Dictionaries/HTTPDictionarySource.h @@ -7,6 +7,7 @@ #include #include "DictionaryStructure.h" #include "IDictionarySource.h" +#include namespace Poco { @@ -64,7 +65,7 @@ private: std::string update_field; const std::string format; Block sample_block; - const Context & context; + Context context; ConnectionTimeouts timeouts; }; diff --git a/dbms/Dictionaries/XDBCDictionarySource.cpp b/dbms/Dictionaries/XDBCDictionarySource.cpp index 8647c8ccac5..149764dda03 100644 --- a/dbms/Dictionaries/XDBCDictionarySource.cpp +++ b/dbms/Dictionaries/XDBCDictionarySource.cpp @@ -19,6 +19,9 @@ #include #include "registerDictionaries.h" +/// For custom settings support +// #include "DictionarySourceHelpers.h" + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC # include #endif @@ -269,6 +272,11 @@ void registerDictionarySourceJDBC(DictionarySourceFactory & factory) bool /* check_config */) -> DictionarySourcePtr { throw Exception{"Dictionary source of type `jdbc` is disabled until consistent support for nullable fields.", ErrorCodes::SUPPORT_IS_DISABLED}; + + /// Here is the support for custom settings. Good luck with fixing Nullable's segfault :) + /// NOTE: you should store the field context locally (const Context & -> Context) to make it work properly! + // Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); + // BridgeHelperPtr bridge = std::make_shared>(config, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".connection_string")); // return std::make_unique(dict_struct, config, config_prefix + ".jdbc", sample_block, context, bridge); }; From fbc11d3bf96665aef17d88ea3d1a29680b472193 Mon Sep 17 00:00:00 2001 From: Avogar Date: Sat, 4 Apr 2020 17:02:14 +0300 Subject: [PATCH 062/752] Move files --- dbms/CMakeLists.txt | 602 ------------------ .../metric_log/202004_1_521_104/checksums.txt | Bin 8834 -> 0 bytes .../metric_log/202004_1_521_104/columns.txt | 213 ------- .../metric_log/202004_1_521_104/count.txt | 1 - .../202004_1_521_104/minmax_event_date.idx | 1 - .../metric_log/202004_1_521_104/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_1_521_104/primary.idx | 1 - .../metric_log/202004_1_526_105/checksums.txt | Bin 8832 -> 0 bytes .../metric_log/202004_1_526_105/columns.txt | 213 ------- .../metric_log/202004_1_526_105/count.txt | 1 - .../202004_1_526_105/minmax_event_date.idx | 1 - .../metric_log/202004_1_526_105/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_1_526_105/primary.idx | 1 - .../metric_log/202004_522_522_0/checksums.txt | Bin 7190 -> 0 bytes .../metric_log/202004_522_522_0/columns.txt | 213 ------- .../metric_log/202004_522_522_0/count.txt | 1 - .../202004_522_522_0/minmax_event_date.idx | 1 - .../metric_log/202004_522_522_0/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_522_522_0/primary.idx | 1 - .../metric_log/202004_523_523_0/checksums.txt | Bin 6974 -> 0 bytes .../metric_log/202004_523_523_0/columns.txt | 213 ------- .../metric_log/202004_523_523_0/count.txt | 1 - .../202004_523_523_0/minmax_event_date.idx | 1 - .../metric_log/202004_523_523_0/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_523_523_0/primary.idx | 1 - .../metric_log/202004_524_524_0/checksums.txt | Bin 6745 -> 0 bytes .../metric_log/202004_524_524_0/columns.txt | 213 ------- .../metric_log/202004_524_524_0/count.txt | 1 - .../202004_524_524_0/minmax_event_date.idx | 1 - .../metric_log/202004_524_524_0/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_524_524_0/primary.idx | 1 - .../metric_log/202004_525_525_0/checksums.txt | Bin 6553 -> 0 bytes .../metric_log/202004_525_525_0/columns.txt | 213 ------- .../metric_log/202004_525_525_0/count.txt | 1 - .../202004_525_525_0/minmax_event_date.idx | 1 - .../metric_log/202004_525_525_0/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_525_525_0/primary.idx | 1 - .../metric_log/202004_526_526_0/checksums.txt | Bin 6663 -> 0 bytes .../metric_log/202004_526_526_0/columns.txt | 213 ------- .../metric_log/202004_526_526_0/count.txt | 1 - .../202004_526_526_0/minmax_event_date.idx | 1 - .../metric_log/202004_526_526_0/partition.dat | Bin 4 -> 0 bytes .../metric_log/202004_526_526_0/primary.idx | 1 - .../202004_5465_5997_129/checksums.txt | Bin 1284 -> 0 bytes .../text_log/202004_5465_5997_129/columns.txt | 14 - .../text_log/202004_5465_5997_129/count.txt | 1 - .../minmax_event_date.idx | 1 - .../202004_5465_5997_129/partition.dat | Bin 4 -> 0 bytes .../text_log/202004_5465_5997_129/primary.idx | 2 - .../text_log/202004_5998_5998_0/checksums.txt | Bin 1183 -> 0 bytes .../text_log/202004_5998_5998_0/columns.txt | 14 - .../text_log/202004_5998_5998_0/count.txt | 1 - .../202004_5998_5998_0/minmax_event_date.idx | 1 - .../text_log/202004_5998_5998_0/partition.dat | Bin 4 -> 0 bytes .../text_log/202004_5998_5998_0/primary.idx | 1 - .../text_log/202004_5999_5999_0/checksums.txt | Bin 1178 -> 0 bytes .../text_log/202004_5999_5999_0/columns.txt | 14 - .../text_log/202004_5999_5999_0/count.txt | 1 - .../202004_5999_5999_0/minmax_event_date.idx | 1 - .../text_log/202004_5999_5999_0/partition.dat | Bin 4 -> 0 bytes .../text_log/202004_5999_5999_0/primary.idx | 1 - .../text_log/202004_6000_6000_0/checksums.txt | Bin 1180 -> 0 bytes .../text_log/202004_6000_6000_0/columns.txt | 14 - .../text_log/202004_6000_6000_0/count.txt | 1 - .../202004_6000_6000_0/minmax_event_date.idx | 1 - .../text_log/202004_6000_6000_0/partition.dat | Bin 4 -> 0 bytes .../text_log/202004_6000_6000_0/primary.idx | 1 - .../text_log/202004_6001_6001_0/checksums.txt | Bin 1179 -> 0 bytes .../text_log/202004_6001_6001_0/columns.txt | 14 - .../text_log/202004_6001_6001_0/count.txt | 1 - .../202004_6001_6001_0/minmax_event_date.idx | 1 - .../text_log/202004_6001_6001_0/partition.dat | Bin 4 -> 0 bytes .../text_log/202004_6001_6001_0/primary.idx | 1 - src/CMakeLists.txt | 2 + .../Formats/Impl/MsgPackRowInputFormat.cpp | 0 .../Formats/Impl/MsgPackRowInputFormat.h | 0 .../Formats/Impl/MsgPackRowOutputFormat.cpp | 0 .../Formats/Impl/MsgPackRowOutputFormat.h | 0 .../01098_msgpack_format.reference | 0 .../0_stateless/01098_msgpack_format.sh | 0 80 files changed, 2 insertions(+), 2200 deletions(-) delete mode 100644 dbms/CMakeLists.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_521_104/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_521_104/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_521_104/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_521_104/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_521_104/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_521_104/primary.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_526_105/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_526_105/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_526_105/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_526_105/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_526_105/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_1_526_105/primary.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_522_522_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_522_522_0/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_522_522_0/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_522_522_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_522_522_0/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_522_522_0/primary.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_523_523_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_523_523_0/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_523_523_0/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_523_523_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_523_523_0/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_523_523_0/primary.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_524_524_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_524_524_0/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_524_524_0/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_524_524_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_524_524_0/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_524_524_0/primary.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_525_525_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_525_525_0/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_525_525_0/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_525_525_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_525_525_0/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_525_525_0/primary.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_526_526_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_526_526_0/columns.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_526_526_0/count.txt delete mode 100644 dbms/programs/server/data/system/metric_log/202004_526_526_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/metric_log/202004_526_526_0/partition.dat delete mode 100644 dbms/programs/server/data/system/metric_log/202004_526_526_0/primary.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_5465_5997_129/checksums.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5465_5997_129/columns.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5465_5997_129/count.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5465_5997_129/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_5465_5997_129/partition.dat delete mode 100644 dbms/programs/server/data/system/text_log/202004_5465_5997_129/primary.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_5998_5998_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5998_5998_0/columns.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5998_5998_0/count.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5998_5998_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_5998_5998_0/partition.dat delete mode 100644 dbms/programs/server/data/system/text_log/202004_5998_5998_0/primary.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_5999_5999_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5999_5999_0/columns.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5999_5999_0/count.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_5999_5999_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_5999_5999_0/partition.dat delete mode 100644 dbms/programs/server/data/system/text_log/202004_5999_5999_0/primary.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_6000_6000_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_6000_6000_0/columns.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_6000_6000_0/count.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_6000_6000_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_6000_6000_0/partition.dat delete mode 100644 dbms/programs/server/data/system/text_log/202004_6000_6000_0/primary.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_6001_6001_0/checksums.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_6001_6001_0/columns.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_6001_6001_0/count.txt delete mode 100644 dbms/programs/server/data/system/text_log/202004_6001_6001_0/minmax_event_date.idx delete mode 100644 dbms/programs/server/data/system/text_log/202004_6001_6001_0/partition.dat delete mode 100644 dbms/programs/server/data/system/text_log/202004_6001_6001_0/primary.idx rename {dbms/src => src}/Processors/Formats/Impl/MsgPackRowInputFormat.cpp (100%) rename {dbms/src => src}/Processors/Formats/Impl/MsgPackRowInputFormat.h (100%) rename {dbms/src => src}/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp (100%) rename {dbms/src => src}/Processors/Formats/Impl/MsgPackRowOutputFormat.h (100%) rename {dbms/tests => tests}/queries/0_stateless/01098_msgpack_format.reference (100%) rename {dbms/tests => tests}/queries/0_stateless/01098_msgpack_format.sh (100%) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt deleted file mode 100644 index cf22c741dd2..00000000000 --- a/dbms/CMakeLists.txt +++ /dev/null @@ -1,602 +0,0 @@ -set(ConfigIncludePath ${CMAKE_CURRENT_BINARY_DIR}/includes/configs CACHE INTERNAL "Path to generated configuration files.") -include_directories(${ConfigIncludePath}) - -if (USE_INCLUDE_WHAT_YOU_USE) - set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH}) -endif () - -if (USE_CLANG_TIDY) - set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") -endif () - -if(COMPILER_PIPE) - set(MAX_COMPILER_MEMORY 2500) -else() - set(MAX_COMPILER_MEMORY 1500) -endif() -if(MAKE_STATIC_LIBRARIES) - set(MAX_LINKER_MEMORY 3500) -else() - set(MAX_LINKER_MEMORY 2500) -endif() -include(../cmake/limit_jobs.cmake) - -set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h) -set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h) - -include (cmake/version.cmake) -message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}") -configure_file (src/Common/config.h.in ${CONFIG_COMMON}) -configure_file (src/Common/config_version.h.in ${CONFIG_VERSION}) -configure_file (src/Core/config_core.h.in ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include/config_core.h) - -if (NOT MSVC) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") -endif () - -if (USE_DEBUG_HELPERS) - set (INCLUDE_DEBUG_HELPERS "-I${ClickHouse_SOURCE_DIR}/base -include ${ClickHouse_SOURCE_DIR}/dbms/src/Core/iostream_debug_helpers.h") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${INCLUDE_DEBUG_HELPERS}") -endif () - -# Add some warnings that are not available even with -Wall -Wextra -Wpedantic. - -option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON) - -if (COMPILER_CLANG) - add_warning(pedantic) - no_warning(gnu-anonymous-struct) - no_warning(nested-anon-types) - no_warning(vla-extension) - no_warning(zero-length-array) - - add_warning(comma) - add_warning(conditional-uninitialized) - add_warning(covered-switch-default) - add_warning(deprecated) - add_warning(embedded-directive) - add_warning(empty-init-stmt) # linux-only - add_warning(extra-semi-stmt) # linux-only - add_warning(extra-semi) - add_warning(gnu-case-range) - add_warning(inconsistent-missing-destructor-override) - add_warning(newline-eof) - add_warning(old-style-cast) - add_warning(range-loop-analysis) - add_warning(redundant-parens) - add_warning(reserved-id-macro) - add_warning(shadow-field) # clang 8+ - add_warning(shadow-uncaptured-local) - add_warning(shadow) - add_warning(string-plus-int) # clang 8+ - add_warning(undef) - add_warning(unreachable-code-return) - add_warning(unreachable-code) - add_warning(unused-exception-parameter) - add_warning(unused-macros) - add_warning(unused-member-function) - add_warning(zero-as-null-pointer-constant) - - if (WEVERYTHING) - add_warning(everything) - no_warning(c++98-compat-pedantic) - no_warning(c++98-compat) - no_warning(c99-extensions) - no_warning(conversion) - no_warning(ctad-maybe-unsupported) # clang 9+, linux-only - no_warning(deprecated-dynamic-exception-spec) - no_warning(disabled-macro-expansion) - no_warning(documentation-unknown-command) - no_warning(double-promotion) - no_warning(exit-time-destructors) - no_warning(float-equal) - no_warning(global-constructors) - no_warning(gnu-anonymous-struct) - no_warning(missing-prototypes) - no_warning(missing-variable-declarations) - no_warning(nested-anon-types) - no_warning(packed) - no_warning(padded) - no_warning(return-std-move-in-c++11) # clang 7+ - no_warning(shift-sign-overflow) - no_warning(sign-conversion) - no_warning(switch-enum) - no_warning(undefined-func-template) - no_warning(unused-template) - no_warning(vla-extension) - no_warning(vla) - no_warning(weak-template-vtables) - no_warning(weak-vtables) - no_warning(zero-length-array) - - # TODO Enable conversion, sign-conversion, double-promotion warnings. - endif () -elseif (COMPILER_GCC) - # Add compiler options only to c++ compiler - function(add_cxx_compile_options option) - add_compile_options("$<$,CXX>:${option}>") - endfunction() - # Warn about boolean expression compared with an integer value different from true/false - add_cxx_compile_options(-Wbool-compare) - # Warn whenever a pointer is cast such that the required alignment of the target is increased. - add_cxx_compile_options(-Wcast-align) - # Warn whenever a pointer is cast so as to remove a type qualifier from the target type. - add_cxx_compile_options(-Wcast-qual) - # Warn when deleting a pointer to incomplete type, which may cause undefined behavior at runtime - add_cxx_compile_options(-Wdelete-incomplete) - # Warn if a requested optimization pass is disabled. Code is too big or too complex - add_cxx_compile_options(-Wdisabled-optimization) - # Warn about duplicated conditions in an if-else-if chain - add_cxx_compile_options(-Wduplicated-cond) - # Warn about a comparison between values of different enumerated types - add_cxx_compile_options(-Wenum-compare) - # Warn about uninitialized variables that are initialized with themselves - add_cxx_compile_options(-Winit-self) - # Warn about logical not used on the left hand side operand of a comparison - add_cxx_compile_options(-Wlogical-not-parentheses) - # Warn about suspicious uses of logical operators in expressions - add_cxx_compile_options(-Wlogical-op) - # Warn if there exists a path from the function entry to a use of the variable that is uninitialized. - add_cxx_compile_options(-Wmaybe-uninitialized) - # Warn when the indentation of the code does not reflect the block structure - add_cxx_compile_options(-Wmisleading-indentation) - # Warn if a global function is defined without a previous declaration - disabled because of build times - # add_cxx_compile_options(-Wmissing-declarations) - # Warn if a user-supplied include directory does not exist - add_cxx_compile_options(-Wmissing-include-dirs) - # Obvious - add_cxx_compile_options(-Wnon-virtual-dtor) - # Obvious - add_cxx_compile_options(-Wno-return-local-addr) - # This warning is disabled due to false positives if compiled with libc++: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90037 - #add_cxx_compile_options(-Wnull-dereference) - # Obvious - add_cxx_compile_options(-Wodr) - # Obvious - add_cxx_compile_options(-Wold-style-cast) - # Warn when a function declaration hides virtual functions from a base class - # add_cxx_compile_options(-Woverloaded-virtual) - # Warn about placement new expressions with undefined behavior - add_cxx_compile_options(-Wplacement-new=2) - # Warn about anything that depends on the “size of” a function type or of void - add_cxx_compile_options(-Wpointer-arith) - # Warn if anything is declared more than once in the same scope - add_cxx_compile_options(-Wredundant-decls) - # Member initialization reordering - add_cxx_compile_options(-Wreorder) - # Obvious - add_cxx_compile_options(-Wshadow) - # Warn if left shifting a negative value - add_cxx_compile_options(-Wshift-negative-value) - # Warn about a definition of an unsized deallocation function - add_cxx_compile_options(-Wsized-deallocation) - # Warn when the sizeof operator is applied to a parameter that is declared as an array in a function definition - add_cxx_compile_options(-Wsizeof-array-argument) - # Warn for suspicious length parameters to certain string and memory built-in functions if the argument uses sizeof - add_cxx_compile_options(-Wsizeof-pointer-memaccess) - - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9) - # Warn about overriding virtual functions that are not marked with the override keyword - add_cxx_compile_options(-Wsuggest-override) - endif () - - # Warn whenever a switch statement has an index of boolean type and the case values are outside the range of a boolean type - add_cxx_compile_options(-Wswitch-bool) - # Warn if a self-comparison always evaluates to true or false - add_cxx_compile_options(-Wtautological-compare) - # Warn about trampolines generated for pointers to nested functions - add_cxx_compile_options(-Wtrampolines) - # Obvious - add_cxx_compile_options(-Wunused) - # Warn if vector operation is not implemented via SIMD capabilities of the architecture - add_cxx_compile_options(-Wvector-operation-performance) -endif () - -if (COMPILER_GCC) - # If we leave this optimization enabled, gcc-7 replaces a pair of SSE intrinsics (16 byte load, store) with a call to memcpy. - # It leads to slow code. This is compiler bug. It looks like this: - # - # (gdb) bt - #0 memcpy (destination=0x7faa6e9f1638, source=0x7faa81d9e9a8, size=16) at ../libs/libmemcpy/memcpy.h:11 - #1 0x0000000005341c5f in _mm_storeu_si128 (__B=..., __P=) at /usr/lib/gcc/x86_64-linux-gnu/7/include/emmintrin.h:720 - #2 memcpySmallAllowReadWriteOverflow15Impl (n=, src=, dst=) at ../dbms/src/Common/memcpySmall.h:37 - - add_definitions ("-fno-tree-loop-distribute-patterns") -endif () - -add_subdirectory (src) - -set(dbms_headers) -set(dbms_sources) - -add_headers_and_sources(clickhouse_common_io src/Common) -add_headers_and_sources(clickhouse_common_io src/Common/HashTable) -add_headers_and_sources(clickhouse_common_io src/IO) -list (REMOVE_ITEM clickhouse_common_io_sources src/Common/malloc.cpp src/Common/new_delete.cpp) - -if(USE_RDKAFKA) - add_headers_and_sources(dbms src/Storages/Kafka) -endif() - - -list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) -list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON}) - -list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/FunctionHelpers.cpp src/Functions/extractTimeZoneFromFunctionArguments.cpp) -list (APPEND dbms_headers src/Functions/IFunctionImpl.h src/Functions/FunctionFactory.h src/Functions/FunctionHelpers.h src/Functions/extractTimeZoneFromFunctionArguments.h) - -list (APPEND dbms_sources - src/AggregateFunctions/AggregateFunctionFactory.cpp - src/AggregateFunctions/AggregateFunctionCombinatorFactory.cpp - src/AggregateFunctions/AggregateFunctionState.cpp - src/AggregateFunctions/parseAggregateFunctionParameters.cpp) - -list (APPEND dbms_headers - src/AggregateFunctions/IAggregateFunction.h - src/AggregateFunctions/IAggregateFunctionCombinator.h - src/AggregateFunctions/AggregateFunctionFactory.h - src/AggregateFunctions/AggregateFunctionCombinatorFactory.h - src/AggregateFunctions/AggregateFunctionState.h - src/AggregateFunctions/FactoryHelpers.h - src/AggregateFunctions/parseAggregateFunctionParameters.h) - -list (APPEND dbms_sources src/TableFunctions/ITableFunction.cpp src/TableFunctions/TableFunctionFactory.cpp) -list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h) -list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp src/Dictionaries/getDictionaryConfigurationFromAST.cpp) -list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h src/Dictionaries/getDictionaryConfigurationFromAST.h) - -if (NOT ENABLE_SSL) - list (REMOVE_ITEM clickhouse_common_io_sources src/Common/OpenSSLHelpers.cpp) - list (REMOVE_ITEM clickhouse_common_io_headers src/Common/OpenSSLHelpers.h) -endif () - -add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) - -add_library (clickhouse_malloc OBJECT src/Common/malloc.cpp) -set_source_files_properties(src/Common/malloc.cpp PROPERTIES COMPILE_FLAGS "-fno-builtin") - -add_library (clickhouse_new_delete STATIC src/Common/new_delete.cpp) -target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io) - -if (OS_FREEBSD) - target_compile_definitions (clickhouse_common_io PUBLIC CLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST) -endif () - -add_subdirectory(src/Common/ZooKeeper) -add_subdirectory(src/Common/Config) - -set (all_modules) -macro(add_object_library name common_path) - if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - add_headers_and_sources(dbms ${common_path}) - else () - list (APPEND all_modules ${name}) - add_headers_and_sources(${name} ${common_path}) - add_library(${name} SHARED ${${name}_sources} ${${name}_headers}) - target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all) - endif () -endmacro() - -add_object_library(clickhouse_access src/Access) -add_object_library(clickhouse_core src/Core) -add_object_library(clickhouse_compression src/Compression) -add_object_library(clickhouse_datastreams src/DataStreams) -add_object_library(clickhouse_datatypes src/DataTypes) -add_object_library(clickhouse_databases src/Databases) -add_object_library(clickhouse_disks src/Disks) -add_object_library(clickhouse_interpreters src/Interpreters) -add_object_library(clickhouse_interpreters_clusterproxy src/Interpreters/ClusterProxy) -add_object_library(clickhouse_columns src/Columns) -add_object_library(clickhouse_storages src/Storages) -add_object_library(clickhouse_storages_distributed src/Storages/Distributed) -add_object_library(clickhouse_storages_mergetree src/Storages/MergeTree) -add_object_library(clickhouse_storages_liveview src/Storages/LiveView) -add_object_library(clickhouse_client src/Client) -add_object_library(clickhouse_formats src/Formats) -add_object_library(clickhouse_processors src/Processors) -add_object_library(clickhouse_processors_executors src/Processors/Executors) -add_object_library(clickhouse_processors_formats src/Processors/Formats) -add_object_library(clickhouse_processors_formats_impl src/Processors/Formats/Impl) -add_object_library(clickhouse_processors_transforms src/Processors/Transforms) -add_object_library(clickhouse_processors_sources src/Processors/Sources) - - -if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) - add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) - set (all_modules dbms) -else() - add_library (dbms SHARED ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PUBLIC ${all_modules}) - list (APPEND all_modules dbms) - # force all split libs to be linked - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") -endif () - -macro (dbms_target_include_directories) - foreach (module ${all_modules}) - target_include_directories (${module} ${ARGN}) - endforeach () -endmacro () - -macro (dbms_target_link_libraries) - foreach (module ${all_modules}) - target_link_libraries (${module} ${ARGN}) - endforeach () -endmacro () - -if (USE_EMBEDDED_COMPILER) - dbms_target_link_libraries (PRIVATE ${REQUIRED_LLVM_LIBRARIES}) - dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) -endif () - -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") - # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. - set_source_files_properties( - src/Dictionaries/FlatDictionary.cpp - src/Dictionaries/HashedDictionary.cpp - src/Dictionaries/CacheDictionary.cpp - src/Dictionaries/TrieDictionary.cpp - src/Dictionaries/RangeHashedDictionary.cpp - src/Dictionaries/ComplexKeyHashedDictionary.cpp - src/Dictionaries/ComplexKeyCacheDictionary.cpp - src/Dictionaries/ComplexKeyCacheDictionary_generate1.cpp - src/Dictionaries/ComplexKeyCacheDictionary_generate2.cpp - src/Dictionaries/ComplexKeyCacheDictionary_generate3.cpp - src/Dictionaries/ODBCBlockInputStream.cpp - src/Dictionaries/HTTPDictionarySource.cpp - src/Dictionaries/LibraryDictionarySource.cpp - src/Dictionaries/ExecutableDictionarySource.cpp - src/Dictionaries/ClickHouseDictionarySource.cpp - PROPERTIES COMPILE_FLAGS -g0) -endif () - -# Otherwise it will slow down stack traces printing too much. -set_source_files_properties( - src/Common/Elf.cpp - src/Common/Dwarf.cpp - src/Common/SymbolIndex.cpp - PROPERTIES COMPILE_FLAGS "-O3 ${WITHOUT_COVERAGE}") - -target_link_libraries (clickhouse_common_io - PUBLIC - common - PRIVATE - string_utils - widechar_width - ${LINK_LIBRARIES_ONLY_ON_X86_64} - PUBLIC - ${DOUBLE_CONVERSION_LIBRARIES} - ryu - PUBLIC - ${Poco_Net_LIBRARY} - ${Poco_Util_LIBRARY} - ${Poco_Foundation_LIBRARY} - ${Poco_XML_LIBRARY} -) - -if(RE2_LIBRARY) - target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY}) -endif() -if(RE2_ST_LIBRARY) - target_link_libraries(clickhouse_common_io PUBLIC ${RE2_ST_LIBRARY}) -endif() - -target_link_libraries(clickhouse_common_io - PUBLIC - ${CITYHASH_LIBRARIES} - PRIVATE - ${Poco_XML_LIBRARY} - ${ZLIB_LIBRARIES} - ${EXECINFO_LIBRARIES} - PUBLIC - ${Boost_SYSTEM_LIBRARY} - ${Boost_PROGRAM_OPTIONS_LIBRARY} - PUBLIC - roaring -) - -if (USE_RDKAFKA) - dbms_target_link_libraries(PRIVATE ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY}) - if(NOT USE_INTERNAL_RDKAFKA_LIBRARY) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) - endif() -endif() - - -if(RE2_INCLUDE_DIR) - target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) -endif() - -if(CPUID_LIBRARY) - target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY}) -endif() - -if(CPUINFO_LIBRARY) - target_link_libraries(clickhouse_common_io PRIVATE ${CPUINFO_LIBRARY}) -endif() - -dbms_target_link_libraries ( - PRIVATE - clickhouse_parsers - clickhouse_common_config - clickhouse_common_zookeeper - string_utils # FIXME: not sure if it's private - PUBLIC - clickhouse_common_io - PRIVATE - clickhouse_dictionaries_embedded - ${LZ4_LIBRARY} - PUBLIC - ${MYSQLXX_LIBRARY} - PRIVATE - ${BTRIE_LIBRARIES} - ${Boost_PROGRAM_OPTIONS_LIBRARY} - ${Boost_FILESYSTEM_LIBRARY} - PUBLIC - ${Boost_SYSTEM_LIBRARY} -) - -target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include) # uses some includes from core -dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src/Core/include) - -target_include_directories(clickhouse_common_io SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR}) -dbms_target_include_directories(SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR}) - -dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) - -if (NOT USE_INTERNAL_LZ4_LIBRARY AND LZ4_INCLUDE_DIR) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR}) -endif () - -if (ZSTD_LIBRARY) - dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY}) - if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${ZSTD_INCLUDE_DIR}) - endif () -endif() - -if (NOT USE_INTERNAL_BOOST_LIBRARY) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) -endif () - -if (Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) - target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR}) - dbms_target_include_directories (SYSTEM PRIVATE ${Poco_SQL_INCLUDE_DIR}) -endif() - -if (USE_POCO_SQLODBC) - target_link_libraries (clickhouse_common_io PRIVATE ${Poco_SQL_LIBRARY}) - dbms_target_link_libraries (PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY}) - if (NOT USE_INTERNAL_POCO_LIBRARY) - target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_SQL_INCLUDE_DIR}) - dbms_target_include_directories (SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_SQLODBC_INCLUDE_DIR} SYSTEM PUBLIC ${Poco_SQL_INCLUDE_DIR}) - endif() -endif() - -if (Poco_Data_FOUND) - target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) - dbms_target_include_directories (SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) -endif() - -if (USE_POCO_DATAODBC) - target_link_libraries (clickhouse_common_io PRIVATE ${Poco_Data_LIBRARY}) - dbms_target_link_libraries (PRIVATE ${Poco_DataODBC_LIBRARY}) - if (NOT USE_INTERNAL_POCO_LIBRARY) - dbms_target_include_directories (SYSTEM PRIVATE ${ODBC_INCLUDE_DIRS} ${Poco_DataODBC_INCLUDE_DIR}) - endif() -endif() - -if (USE_POCO_MONGODB) - dbms_target_link_libraries (PRIVATE ${Poco_MongoDB_LIBRARY}) -endif() - -if (USE_POCO_REDIS) - dbms_target_link_libraries (PRIVATE ${Poco_Redis_LIBRARY}) -endif() - -if (USE_POCO_NETSSL) - target_link_libraries (clickhouse_common_io PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) - dbms_target_link_libraries (PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) -endif() - -if (USE_POCO_JSON) - dbms_target_link_libraries (PRIVATE ${Poco_JSON_LIBRARY}) -endif() - -dbms_target_link_libraries (PRIVATE ${Poco_Foundation_LIBRARY}) - -if (USE_ICU) - dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES}) - dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) -endif () - -if (USE_CAPNP) - dbms_target_link_libraries (PRIVATE ${CAPNP_LIBRARIES}) -endif () - -if (USE_PARQUET) - dbms_target_link_libraries(PRIVATE ${PARQUET_LIBRARY}) - if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR}) - endif () -endif () - -if (USE_AVRO) - dbms_target_link_libraries(PRIVATE ${AVROCPP_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${AVROCPP_INCLUDE_DIR}) -endif () - -if (OPENSSL_CRYPTO_LIBRARY) - dbms_target_link_libraries (PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) - target_link_libraries (clickhouse_common_io PRIVATE ${OPENSSL_CRYPTO_LIBRARY}) -endif () - -dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) -dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) - -if (USE_PROTOBUF) - dbms_target_link_libraries (PRIVATE ${Protobuf_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR}) -endif () - -if (USE_HDFS) - target_link_libraries (clickhouse_common_io PUBLIC ${HDFS3_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${HDFS3_INCLUDE_DIR}) -endif() - -if (USE_AWS_S3) - target_link_libraries (clickhouse_common_io PUBLIC ${AWS_S3_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_CORE_INCLUDE_DIR}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR}) -endif() - -if (USE_BROTLI) - target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) -endif() - -if (USE_JEMALLOC) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp - target_include_directories (clickhouse_new_delete SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) - - if(NOT MAKE_STATIC_LIBRARIES AND ${JEMALLOC_LIBRARIES} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") - # mallctl in dbms/src/Interpreters/AsynchronousMetrics.cpp - # Actually we link JEMALLOC to almost all libraries. - # This is just hotfix for some uninvestigated problem. - target_link_libraries(clickhouse_interpreters PRIVATE ${JEMALLOC_LIBRARIES}) - endif() -endif () - -dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) -target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) - -target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR}) - -target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) - -add_subdirectory (programs) -add_subdirectory (tests) - -if (ENABLE_TESTS AND USE_GTEST) - macro (grep_gtest_sources BASE_DIR DST_VAR) - # Cold match files that are not in tests/ directories - file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") - endmacro() - - # attach all dbms gtest sources - grep_gtest_sources(${ClickHouse_SOURCE_DIR}/dbms dbms_gtest_sources) - add_executable(unit_tests_dbms ${dbms_gtest_sources}) - - # gtest framework has substandard code - target_compile_options(unit_tests_dbms PRIVATE - -Wno-zero-as-null-pointer-constant - -Wno-undef - -Wno-sign-compare - -Wno-used-but-marked-unused - -Wno-missing-noreturn - -Wno-gnu-zero-variadic-macro-arguments - ) - - target_link_libraries(unit_tests_dbms PRIVATE ${GTEST_BOTH_LIBRARIES} clickhouse_functions clickhouse_parsers dbms clickhouse_common_zookeeper string_utils) - add_check(unit_tests_dbms) -endif () diff --git a/dbms/programs/server/data/system/metric_log/202004_1_521_104/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_1_521_104/checksums.txt deleted file mode 100644 index a0fdbe718cb259905a03dfc4cce35070c34c8b7c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8834 zcmYj%cR&+a_x8CnlQs!lI)n})7Nmn9Dn+RRf)vGy5k`oH1X3tAzy{cLZCF-UL0t=$ zwXS6?yz1)OSJ#dmMOQ&ZT~PVmQ2f5XGBe4Udry1LbIwghHkUD3qs!4aPE@ILWLn33 zPOXuvltGSu?5O;6hb|mH+mzNcsji=BV#Cbg&Hz3tfVRQg>7IjiYBi_S#&cS=JR^OO zEMsz(TBTEF#;fwVM3qXBBGXLvm>^efMD;J%SIr2LSp<&My+xJdqR^KwsNXd4!fQuY z)>{nmyEo3q>v*pOFliI!s3-drPKzzy)r%VR^3u+a#|oNu90HdCz!R21BfXP9_ruVe zF+uA;$6OQ4Tzk1^bs4l#ktV|q#x*Y;ySL}^bf2Q_;oW1w5YGbIp-&+2ijp(3xlEnH z1U1szQ2t1x)Jye^v!3+WQvJ_^^M3P*c#1Bd%D#F)n(L*gi&1{(8{Ct@0{;ZGwQnf2 znGBW@`EYrLme@q5E=tYKlxewPI!?z8l4;3Tnw`kf!EY4U;a|v#UQu8k63Z3+3|0=_ zBfuK31=_|b9NwvvTCPwVr^=W-LMGStCNj>^AjZ?lGLHUG7tSeUMOFj4W`I<@oQ5Kb9Rj!%Xj`{ZXcTHDC+EsCxFn9eAfL;O zRH@@Rbrv^^XuVnZth8Jj*|S-Z&|tC0{O0`$QFJSsvbWrP{1H@GF+@>(Ir>8N8X6~o z051jFuJ=-S55%bxbXxKzxq{QAs8%CbLL9IStxjjO{XC;R_Z;qiP5PHKe+hg?Iy0w- z%Vxd|tO+naRJVV);}gKcfX0h~mfS@AUGmP&|hgM|vTa?`P&)!!{f`^<)69pHh@o_AGK7z#VXlZmxO5$LZQpug;;djVV z96)t?#=3_N4T~XuRnMUv)HA_J>?y=UiQW}NkU@*%I9VpAj!-bDPp5>~kD~mbIEl+u z$TMU!kR?6^6w|4#-EPtu()=6+v4CVnc>Z)fg-hZ=7F z4d>*K)_3^wh#=aqBXq>Q(4%-RN2M-GQIqtQE3J*}p^}5~j zJpQns*k1-NcsG#?917;bS}`6^GOk}bIO828V=UuAY_t7^O?oAIg z&ki838h3a1pLLu1`jp@N{{Ek3G25(y0bw}_<^FU+QSz|3cRVNYcwt8nn}jCha!O7# znB-eh1JQmhzt6oz-d6_y?*50>+|qK5;!Y#czW=CSX8!X0adC6oe*Xuf{}k!FhW&%;_DE8#neNy4~8i8b}wuAt6|Zr5u0#X)v`+~zkZG631$El z1Gt!YeiyGKQKr_$>$I{O1JIBNxQy<;3Ro=(_-()h`{LLFtA>I42zaS9E5D`DuELXy zW~?-f^4ZPrN|2FMA^|gXQ7$p2T62K+OiBre%8|ky6-brPa0eAl0xUToJi((IZyDZc~^<6fv%+l$n%5eW% zNb&B7%Jn3TW_sjRP5y-$Lw|R5I zTxPkjN=(3_Q9Z$O_A=r95TkMVKCMgmD_Ba5#PNX3z=1ghxW>eST#A_XC$x>lUx>cL zG}3!fhLyQV*jQ_;Jr&FTyd82o?K?4!{SfQ;@NcV}-GZ9ZZq6CouZD(TgtLGV_oSgW zgsYT_A{~ogfrZ$D*XeWve?VU1#ROXi>uYZoLHk&qL5D{Tz5-JWiIE&BEe{#|o-)Pf zfKPQGruZeppAL83SR;d z>VzA)&@E-~N~1=45glN}I##yL*KN2s&uanMMlRKgYHm&%*0lEB(+ID+B`+F2ed|Ma zRsyjvcn#;&PbnB`Uf{IR?|yXn4*QXbRC zy-uT=)+e3rg5K#oRL=u73G-@nD@!!|dh|A@5T3=g&{bbd)X+j*SzqevO-EKF3ZCX$L8kx!UYdKlqPvM_~0m2rz`^cmgwAWlA^J{Y$6Rk>w} znS1GL0!N}}9~!=7AJ}-}QukLMjPqHiC}8!x0Q&)N(BN#H(uY9DCv@80{mAtY@!0Vm z`@cn&KRz;z5F5|tyJMa-Q(x03w>`P{YL?P@8iBozP?Glchbx0+N~KDhL<&GY_tf0Q z!EG3r>5I05S1^zerJYh#0-z@IRN-<>hD_aG^8YxEwL>^8QRUB|a}vCcvUG5a z1d+aQJM{DgG$^7lSG|HZ*KDTnTH4e>&+qVj@}=hhko*xZ&rqv0T!u=SS;4mwHzR5k z5MT%7(Vdn%C!7lpEEo?>;?lLYX}$W;RTZj2O^;H zO^P?uR$>L!#WftdR{-KmDX*}Y1Rlawl&OO&0Yp6_ivK{=Nf6C}gh-iOvB*R!{zb1y zJI%uvK#~w2FUz$gJmd|c_zwYfU>-gT5(ok;MRpGERY)vI20b4ZAEhNDQD}lXQ~p0^ z0>O>+QSs=Ycam*;mhQM=sNIs;FK$f`-G=^)OuW9lQZQ7~f92Gcu;aR~NW$yTAODId z!2%&ZNm=VJJ3yD#U$SJmUy%<^21>saP9r}I1>Q*1$R{1dF~^kDy%*6@^Kl2R=(byx z*CuU8ZD`a{{;cEI&%Hh2>bmx8<*YN$!GM?4?jxym3Y8Z=G&w@8R#_Y3ag3>hbtrjj z4$y2eJ2PCSm4!{za+{nT!{x?C$(l&DDu-tRH59R#skIL&ENNhX*OOrNiy$d8+zM}m zUJm}F!5(iWuLux{H@V;{GYj^MNZi5R*h3WS3+eH4g+hLkwI{d4%tC#>h$wA0ODlyD*9^#^Q$Y}u6pya;Hj(`$WcVo}O7UjAMp#m~VQ zb=b(kMNZK$*#=sK4nrdtD_lx5f)4z~fgnTz0iF#~Ow$a^hOPp8fMV#LR7i?Er;PMA z&1WV1Tk!4 zn3iCzj;HTp20x&mdyIIo)3|ha#KQWsf4QL+wCi45f=7n)rlWHOv(#bzem!H1hl50q zM+P2OTQu7Yo9?SfaHo=Xx-D_;F;TJx#k zg@=9<0{te$i$pjK5L7YVgqeJ?25*GxMW(jC=aJzW5RC-90bJ9QwK4@am$fH1Mjg)x zdwotoKDkJlG0@$M^q*DC29-`5QJBGTnG*!kAB2|uZsHQI3$&-UfGd}I(gZ&upn2Ai zP;9=QVzV<4Gs;dDmX)REvMgO-jTvQRiocoLvQh~i1xEVLl_lGa0^n_N#U9A%hvYH| z9thx{s?0!U*bCW8`({vh3_mC3@H>gd^p%0hclM?2PvdF>FxfTfC+h4YU_T0leE}OFM^-4p@gyh9$d3bzAipOZ?g3>4R8BgN`dYi>_wLv0ce$4@ zw7Iw`jN`YUugYA%3s*+1OvG55GjV_|q-v7faz_&=(S^{XT~B z0>NiOX@H^b~tZ-k2bG9CRi z-U`Tx%Zzevhvm&KInO62uHHIBb^OIfC_efkFQ1gZWe(%27o2(AFmGU)>Ci=gKRyaH z>S$Ei6^6gXDDzbcol;9iDHEh2$p!MdKwCCa@&hf+%_v_Vrp6~UEF({M$(XVLZ%wxR zv86ui5~uhyJH*8~8AnY?6dGAgv_}6?rw*nDOq53)a(Q!7eW3CDGoPD?8l=Z>p&q~4 zbQBgZQ%_zQ;2bS)BOL1-IvLizX~+K@r<}39>>c;M_xX9mS{DoFJ@$nDOL|U^hvHzv zmDUv7p)VUi^LzjvvXoc%M!F7tzT7g$;ke-Q!C=#q;%_g<77^hSqt0wdGjxjCo__Ar z=@H^5@0rdBlUf8I-rt0le-|rbW?fzV`7iSkXIGv9+WvSRDfOqd7nO0dCvG>3_c^`C z=t#LGNi@VYIiE_XZjTk_^p0Wo7kcP;4lG*wGy%-zzP3Ks-JHxDTDPaNv{2U=n7Fa z&!BF?J{$FvegC}uTuqi2;a6GJ6?xmk&ZmouGxvDcvp5WaWE8X`ilMO{QP4W3iAQR9 zQ@8VI>0yX)Diq#MI)8tL3zjGfs6$RWn-VScmOfT zbC$&TEmzN=XS2@Cm~c>AnH+T~%=?=M!RzI(ZQjmeOipY%q@9e5<^>RmPGqvTxrTT& zv3#B3y7^b$rilt>!Ly%?PW0XRcs|*-^OwV)s|rpFQp{~i3M&r&=`BF>K_4}F1do0} z;E*XoWYzP=pRPzRf3z6zyig@By4Let;0zMp9v|l)uu$d+Cl8_i`}xB{Kk|FB=XRRO zmayn|Fw)#Llq6d<(hYc~?Xmr`Z)4E3<${|kzVO{VBEP&UwYcc~zRYntL(#=$w8>pstUDW~ZZ~*^8L2 zd3x&EnN>?3`n7RYI~Hd>DlzFQb?hdC(+SCg6H|xF)moiQaSpMA%`l9XMp^IXFAh z!Ni%GNM3tf_OQqC=WoB}jDXP|fHmn;>+FGO^>aoQP69|qm${B#%Fr4g; zg_JFM_(Q!gG4FT7CTaoCt?o^0)6S3YTY9Whya)=eGHOAaRHOTT$#pi_A>yfh4nFN4 zCZ+~-MT<%LZ3*!;>fZ8)I?m)#*pt=X!98TX z1TE-Ua^$u6p~vk@p4E?N7tUdmK$CTb! zKyqMOe)!Ob<1DMjT@r^>ogd5u8n6pMkO=L3;XXx_B3+9QISN?EXD>LlSXS}6PqFRx zaZtZGuTP8k&&^Pdeu&>xj{CGv&42*4lmRg*X+&cokTpzkqzu*eTYOd?NCg zr@v-%T4 z>4k#rdtUsreJ9m|cAnrxMV@U@oSpHjcYsPT?4=k60QFd3Ihgj!(Q1GrNJHUI-cVxl zCh*1~)ve|*WAW|6ef#i&O~iuPDoDFN<-gvV9=-dWs*Zh7VT8w$u5&9$Co3>Nj)o9q*@23Z6afP`T-QWq4-;17|mu!4d+iU8y@9C$`UEWbRgUA~n zts7YX^hU_pCLL3oDwG9HN`b#Xm?CRg~qXqftT75>gOmCT0~-#%)BNc z5#2eH6fzV&95K5hQF*&fo=w_Fr=E$6Dr>RKd$Mr4?<1xaDOlMUp9tBSH*rP0=uhtT zu@aF5;&;q0q~=>uNHWJw9&W{6GeiRbb0mtq#g56GW+=}&s>O0EWusps)C9L{O|wQt&3kkkZPh<>(oeB2>Llpm}{Bpcp~HAl7^TTo8oa~nCtSo z*P6Spj2|qwaY_z3!ZI(Qb7aVl!6Mf|$ig=_K^dvgX|k6I@n^E=d43l|VkJTm8Dy7} zf+G?BVC5#Zgq1h(6J;6=mr2^ic)3!4_@v2JDKfuO*qSs}Yi;moFvesOKw5S~RBYfM zFDtADd;(drFAZ=NDd=NB-yq>=WC*5q*eMmTeM=!V8E8JYeRu_ka4-pP=U|Nu}aOvxL*K)rYq|YzY^fmc;ouI3- zNV{KRP;DPcylXqvI*9sz+hab}&9-6efIix%xv%_PTF``R9&?ZRX8BDUeR2MjcaN5h z5q4n%JDM3!bW~}zYV!mInQ}H*cOxm$l`iZB5Dv2!`E$}DH00$y`Tk|@ ztN2L|9A*z92a|c`#~lZ1ZeO)ouzUXr?%?evL^i4)`jZlX9EO*;da#ECKN?gU0dlgS zOwGJW#V`cy#kXX~A~X=h9it4^2$5+9plm*Iw59&8t0&&;C>RKa-OQ04z$y~VXPxpN zxU`O*Dfh4bxch&VXr_%9DHAmp{vJLrc9H?0FD8q7ZZ+cz$h#>AMA~LvJIO6FBP18| z1`>6y_&HmemHU4PUmWu6=WTzd&W+eYFtu{J@r}g$K1L_|PU*fw8GYE%8>fT0{uqj= zWk-F+kc9Ah##9x|GG3Rr+0QK79|$HWhfc6! z@9WD=M@LyA;gOE=y9r)TBC$l$(WTmWD^!|fAtpv5iN-@6`G$ST<#Y@D0W76XODO&a zpxWDF>yU^-IrOt86K00)Y?Kv7yxrg##bpqlQexCW&1lwK$nyp;^9n7Hskv+ZM!6+1 zIL^R|Jn!-~Iiz4bW0bYQQtVGNs2u~WJ>%rYA}KkW?R-FEA7IksbqcNgHD!r?Xol*X zsMQO2Taj0SHhUX|KO+aW{)f}?Nnoh6L$QDiE(_?ced;eAk_%`r=%k9fSe`p7CMTOp zjVvRNw#9A&M$|b+vlK8;A;vW2gp(;o+yS*XgkmV6Gd|qQiCt#M%RmO5W6Ko87>zjR zLJr_Hxhf?ikm79AHTr78q73q{2d#&;P|Mu<=i9X^%|hYWS^xS~PFV6g7;+6srkNIk z&lb_&|87WdZ#$oI{mkm#)7N~BMn4CS_Zf9G^VbBcPwra=+0ZW>h*wY^d_-P{mc1TW zlrm)Fxu(XAM=y^^1#hi9=egj6#fs^lz6(a(?tb|7{o6Iqit@tWqhr&8-cG9VFTJyq zTk+fFgofg z4$J#c@xNln|EN@$7BV+Vz58z3x<%y^pQam=SQ-rmA<8oI9Jw+_R+xSl8YS|~Lgv<$ z0p96jkCtwoIoo`X>!tBvkV_6Bp%M}0s diff --git a/dbms/programs/server/data/system/metric_log/202004_1_521_104/columns.txt b/dbms/programs/server/data/system/metric_log/202004_1_521_104/columns.txt deleted file mode 100644 index 6b901df244b..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_521_104/columns.txt +++ /dev/null @@ -1,213 +0,0 @@ -columns format version: 1 -211 columns: -`event_date` Date -`event_time` DateTime -`milliseconds` UInt64 -`ProfileEvent_Query` UInt64 -`ProfileEvent_SelectQuery` UInt64 -`ProfileEvent_InsertQuery` UInt64 -`ProfileEvent_FileOpen` UInt64 -`ProfileEvent_Seek` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorRead` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadFailed` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadBytes` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWrite` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteFailed` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteBytes` UInt64 -`ProfileEvent_ReadBufferAIORead` UInt64 -`ProfileEvent_ReadBufferAIOReadBytes` UInt64 -`ProfileEvent_WriteBufferAIOWrite` UInt64 -`ProfileEvent_WriteBufferAIOWriteBytes` UInt64 -`ProfileEvent_ReadCompressedBytes` UInt64 -`ProfileEvent_CompressedReadBufferBlocks` UInt64 -`ProfileEvent_CompressedReadBufferBytes` UInt64 -`ProfileEvent_UncompressedCacheHits` UInt64 -`ProfileEvent_UncompressedCacheMisses` UInt64 -`ProfileEvent_UncompressedCacheWeightLost` UInt64 -`ProfileEvent_IOBufferAllocs` UInt64 -`ProfileEvent_IOBufferAllocBytes` UInt64 -`ProfileEvent_ArenaAllocChunks` UInt64 -`ProfileEvent_ArenaAllocBytes` UInt64 -`ProfileEvent_FunctionExecute` UInt64 -`ProfileEvent_TableFunctionExecute` UInt64 -`ProfileEvent_MarkCacheHits` UInt64 -`ProfileEvent_MarkCacheMisses` UInt64 -`ProfileEvent_CreatedReadBufferOrdinary` UInt64 -`ProfileEvent_CreatedReadBufferAIO` UInt64 -`ProfileEvent_CreatedReadBufferAIOFailed` UInt64 -`ProfileEvent_CreatedReadBufferMMap` UInt64 -`ProfileEvent_CreatedReadBufferMMapFailed` UInt64 -`ProfileEvent_CreatedWriteBufferOrdinary` UInt64 -`ProfileEvent_CreatedWriteBufferAIO` UInt64 -`ProfileEvent_CreatedWriteBufferAIOFailed` UInt64 -`ProfileEvent_DiskReadElapsedMicroseconds` UInt64 -`ProfileEvent_DiskWriteElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkReceiveElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkSendElapsedMicroseconds` UInt64 -`ProfileEvent_ThrottlerSleepMicroseconds` UInt64 -`ProfileEvent_QueryMaskingRulesMatch` UInt64 -`ProfileEvent_ReplicatedPartFetches` UInt64 -`ProfileEvent_ReplicatedPartFailedFetches` UInt64 -`ProfileEvent_ObsoleteReplicatedParts` UInt64 -`ProfileEvent_ReplicatedPartMerges` UInt64 -`ProfileEvent_ReplicatedPartFetchesOfMerged` UInt64 -`ProfileEvent_ReplicatedPartMutations` UInt64 -`ProfileEvent_ReplicatedPartChecks` UInt64 -`ProfileEvent_ReplicatedPartChecksFailed` UInt64 -`ProfileEvent_ReplicatedDataLoss` UInt64 -`ProfileEvent_InsertedRows` UInt64 -`ProfileEvent_InsertedBytes` UInt64 -`ProfileEvent_DelayedInserts` UInt64 -`ProfileEvent_RejectedInserts` UInt64 -`ProfileEvent_DelayedInsertsMilliseconds` UInt64 -`ProfileEvent_DuplicatedInsertedBlocks` UInt64 -`ProfileEvent_ZooKeeperInit` UInt64 -`ProfileEvent_ZooKeeperTransactions` UInt64 -`ProfileEvent_ZooKeeperList` UInt64 -`ProfileEvent_ZooKeeperCreate` UInt64 -`ProfileEvent_ZooKeeperRemove` UInt64 -`ProfileEvent_ZooKeeperExists` UInt64 -`ProfileEvent_ZooKeeperGet` UInt64 -`ProfileEvent_ZooKeeperSet` UInt64 -`ProfileEvent_ZooKeeperMulti` UInt64 -`ProfileEvent_ZooKeeperCheck` UInt64 -`ProfileEvent_ZooKeeperClose` UInt64 -`ProfileEvent_ZooKeeperWatchResponse` UInt64 -`ProfileEvent_ZooKeeperUserExceptions` UInt64 -`ProfileEvent_ZooKeeperHardwareExceptions` UInt64 -`ProfileEvent_ZooKeeperOtherExceptions` UInt64 -`ProfileEvent_ZooKeeperWaitMicroseconds` UInt64 -`ProfileEvent_ZooKeeperBytesSent` UInt64 -`ProfileEvent_ZooKeeperBytesReceived` UInt64 -`ProfileEvent_DistributedConnectionFailTry` UInt64 -`ProfileEvent_DistributedConnectionMissingTable` UInt64 -`ProfileEvent_DistributedConnectionStaleReplica` UInt64 -`ProfileEvent_DistributedConnectionFailAtAll` UInt64 -`ProfileEvent_CompileAttempt` UInt64 -`ProfileEvent_CompileSuccess` UInt64 -`ProfileEvent_CompileFunction` UInt64 -`ProfileEvent_CompiledFunctionExecute` UInt64 -`ProfileEvent_CompileExpressionsMicroseconds` UInt64 -`ProfileEvent_CompileExpressionsBytes` UInt64 -`ProfileEvent_ExternalSortWritePart` UInt64 -`ProfileEvent_ExternalSortMerge` UInt64 -`ProfileEvent_ExternalAggregationWritePart` UInt64 -`ProfileEvent_ExternalAggregationMerge` UInt64 -`ProfileEvent_ExternalAggregationCompressedBytes` UInt64 -`ProfileEvent_ExternalAggregationUncompressedBytes` UInt64 -`ProfileEvent_SlowRead` UInt64 -`ProfileEvent_ReadBackoff` UInt64 -`ProfileEvent_ReplicaYieldLeadership` UInt64 -`ProfileEvent_ReplicaPartialShutdown` UInt64 -`ProfileEvent_SelectedParts` UInt64 -`ProfileEvent_SelectedRanges` UInt64 -`ProfileEvent_SelectedMarks` UInt64 -`ProfileEvent_Merge` UInt64 -`ProfileEvent_MergedRows` UInt64 -`ProfileEvent_MergedUncompressedBytes` UInt64 -`ProfileEvent_MergesTimeMilliseconds` UInt64 -`ProfileEvent_MergeTreeDataWriterRows` UInt64 -`ProfileEvent_MergeTreeDataWriterUncompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterCompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocks` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocksAlreadySorted` UInt64 -`ProfileEvent_CannotRemoveEphemeralNode` UInt64 -`ProfileEvent_LeaderElectionAcquiredLeadership` UInt64 -`ProfileEvent_RegexpCreated` UInt64 -`ProfileEvent_ContextLock` UInt64 -`ProfileEvent_StorageBufferFlush` UInt64 -`ProfileEvent_StorageBufferErrorOnFlush` UInt64 -`ProfileEvent_StorageBufferPassedAllMinThresholds` UInt64 -`ProfileEvent_StorageBufferPassedTimeMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedRowsMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedBytesMaxThreshold` UInt64 -`ProfileEvent_DictCacheKeysRequested` UInt64 -`ProfileEvent_DictCacheKeysRequestedMiss` UInt64 -`ProfileEvent_DictCacheKeysRequestedFound` UInt64 -`ProfileEvent_DictCacheKeysExpired` UInt64 -`ProfileEvent_DictCacheKeysNotFound` UInt64 -`ProfileEvent_DictCacheKeysHit` UInt64 -`ProfileEvent_DictCacheRequestTimeNs` UInt64 -`ProfileEvent_DictCacheRequests` UInt64 -`ProfileEvent_DictCacheLockWriteNs` UInt64 -`ProfileEvent_DictCacheLockReadNs` UInt64 -`ProfileEvent_DistributedSyncInsertionTimeoutExceeded` UInt64 -`ProfileEvent_DataAfterMergeDiffersFromReplica` UInt64 -`ProfileEvent_DataAfterMutationDiffersFromReplica` UInt64 -`ProfileEvent_PolygonsAddedToPool` UInt64 -`ProfileEvent_PolygonsInPoolAllocatedBytes` UInt64 -`ProfileEvent_RWLockAcquiredReadLocks` UInt64 -`ProfileEvent_RWLockAcquiredWriteLocks` UInt64 -`ProfileEvent_RWLockReadersWaitMilliseconds` UInt64 -`ProfileEvent_RWLockWritersWaitMilliseconds` UInt64 -`ProfileEvent_DNSError` UInt64 -`ProfileEvent_RealTimeMicroseconds` UInt64 -`ProfileEvent_UserTimeMicroseconds` UInt64 -`ProfileEvent_SystemTimeMicroseconds` UInt64 -`ProfileEvent_SoftPageFaults` UInt64 -`ProfileEvent_HardPageFaults` UInt64 -`ProfileEvent_VoluntaryContextSwitches` UInt64 -`ProfileEvent_InvoluntaryContextSwitches` UInt64 -`ProfileEvent_OSIOWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUVirtualTimeMicroseconds` UInt64 -`ProfileEvent_OSReadBytes` UInt64 -`ProfileEvent_OSWriteBytes` UInt64 -`ProfileEvent_OSReadChars` UInt64 -`ProfileEvent_OSWriteChars` UInt64 -`ProfileEvent_CreatedHTTPConnections` UInt64 -`ProfileEvent_CannotWriteToWriteBufferDiscard` UInt64 -`ProfileEvent_QueryProfilerSignalOverruns` UInt64 -`CurrentMetric_Query` Int64 -`CurrentMetric_Merge` Int64 -`CurrentMetric_PartMutation` Int64 -`CurrentMetric_ReplicatedFetch` Int64 -`CurrentMetric_ReplicatedSend` Int64 -`CurrentMetric_ReplicatedChecks` Int64 -`CurrentMetric_BackgroundPoolTask` Int64 -`CurrentMetric_BackgroundMovePoolTask` Int64 -`CurrentMetric_BackgroundSchedulePoolTask` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueBatches` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueKeys` Int64 -`CurrentMetric_DiskSpaceReservedForMerge` Int64 -`CurrentMetric_DistributedSend` Int64 -`CurrentMetric_QueryPreempted` Int64 -`CurrentMetric_TCPConnection` Int64 -`CurrentMetric_MySQLConnection` Int64 -`CurrentMetric_HTTPConnection` Int64 -`CurrentMetric_InterserverConnection` Int64 -`CurrentMetric_OpenFileForRead` Int64 -`CurrentMetric_OpenFileForWrite` Int64 -`CurrentMetric_Read` Int64 -`CurrentMetric_Write` Int64 -`CurrentMetric_SendScalars` Int64 -`CurrentMetric_SendExternalTables` Int64 -`CurrentMetric_QueryThread` Int64 -`CurrentMetric_ReadonlyReplica` Int64 -`CurrentMetric_LeaderReplica` Int64 -`CurrentMetric_MemoryTracking` Int64 -`CurrentMetric_MemoryTrackingInBackgroundProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundMoveProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundSchedulePool` Int64 -`CurrentMetric_MemoryTrackingForMerges` Int64 -`CurrentMetric_LeaderElection` Int64 -`CurrentMetric_EphemeralNode` Int64 -`CurrentMetric_ZooKeeperSession` Int64 -`CurrentMetric_ZooKeeperWatch` Int64 -`CurrentMetric_ZooKeeperRequest` Int64 -`CurrentMetric_DelayedInserts` Int64 -`CurrentMetric_ContextLockWait` Int64 -`CurrentMetric_StorageBufferRows` Int64 -`CurrentMetric_StorageBufferBytes` Int64 -`CurrentMetric_DictCacheRequests` Int64 -`CurrentMetric_Revision` Int64 -`CurrentMetric_VersionInteger` Int64 -`CurrentMetric_RWLockWaitingReaders` Int64 -`CurrentMetric_RWLockWaitingWriters` Int64 -`CurrentMetric_RWLockActiveReaders` Int64 -`CurrentMetric_RWLockActiveWriters` Int64 -`CurrentMetric_GlobalThread` Int64 -`CurrentMetric_GlobalThreadActive` Int64 -`CurrentMetric_LocalThread` Int64 -`CurrentMetric_LocalThreadActive` Int64 -`CurrentMetric_DistributedFilesToInsert` Int64 diff --git a/dbms/programs/server/data/system/metric_log/202004_1_521_104/count.txt b/dbms/programs/server/data/system/metric_log/202004_1_521_104/count.txt deleted file mode 100644 index 7e72ea4040f..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_521_104/count.txt +++ /dev/null @@ -1 +0,0 @@ -3983 \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_1_521_104/minmax_event_date.idx b/dbms/programs/server/data/system/metric_log/202004_1_521_104/minmax_event_date.idx deleted file mode 100644 index 73ef9660d53..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_521_104/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_1_521_104/partition.dat b/dbms/programs/server/data/system/metric_log/202004_1_521_104/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/metric_log/202004_1_521_104/primary.idx b/dbms/programs/server/data/system/metric_log/202004_1_521_104/primary.idx deleted file mode 100644 index 8589c74d308..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_521_104/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^G^G^G^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_1_526_105/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_1_526_105/checksums.txt deleted file mode 100644 index b3c0729db40f97eff00486477257162d0fb4c979..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8832 zcmYLPcR*9g*PVGK<)sW#Lk&$tQ96RC6s0N{B-pTE2oH#a1d@OvB8a+(U01>07P0qL zvFnOI*Ist*uHD790(L>=n-K8(EAPEz-prkQ?>Xn*i87m2=I9G@_0E}EU9Q65T*&J5 zDy=5eIgokrD(U4D?HAA9zpD1_IOMD>PjLmnUkgCnz^$a`zyh6))ff_4gHEMP8=z3; zWa+d8nv6tkA)BPts#6sD9Pf0MW&<3uu7BSZw)2L)a&^|1*S@xb>ssMRo4@bH>uW>y zSYOwq+dpKw%Ybjlt;Pc#U;0Ij}~*0@5xZ0<@VA zRN(PZDy0GMi9%O2EH6W0V22j41?&KY0e_|64sE1?slXAf$20oI0;}-BY|$DZk@}4Q zc4#Fa?OmckqgG>JCm9m7%A64jm7xb7;~ED9s02^r90VRlv1&yTn-Q~P{P6T4gf^MYFJSdkfvsQ(35p125J1xa>EJaDH1YH~@;rr-O=j^63fYVp ztuB$(WwFz-)%$pvPhxMl-TaliBx-Zl5%YI`iX~g2@S0|(MZuVZk?EFn{j&eQtsqe{ z;G#u-P-CY74ir)ZZ!un;?73zQ|d9ax#*^+VPe%&HUy16rN6 zwd;GYB6W4jr+RklyM`wK4Fx1x07&5t$O!`xa#oY^3qV@hLa>|=#({Rbr9yZrIxm~e zWpxVm5N*aHi0!oyJS3PHfQs=+>k zB#1yzn#|^@RZ7KFXoF4xg6go?u>^+T7-=S&$cON*x&E)+$yY1v7hiV!FZZppfoO%U zu^wDm*lOlN%2l`c?PGPlNnR8{1$eR_G}jMs8u7@RaPa59{~|S?zmzy~j+T}BUMR*k z_FJ&I=90OyLvLrNwjt}5_X{P(ruY$bhmR83T&=DsMTc`(rO7gG)I~Cn{?~V|WZ|Lv zamAt>D=Kd)e#t3|oOy?*o&0^gU4HWV*(-Lv{kad_^C1%ms6q~$%55SW=#yQx9v(Nt zEvGGT#oO(L`|r)bqn8@4jSd(-ad5(~W4<*!@9h`DK}mo{^KjmT%>#U|c#T@HP^VS0 zdOco6BLh4jvZO4e;Vke(2l1~$lYy0Z9}a`%*5zl)KP&f_d9Dq6==$^!b9oqSBg9KN zQ@Q7|T9$14Fepqym-STuceD$S3mF2ec=ZA_9;aQOG~kN1;k2$Ga~OLrDr051^TH&<${_8FK+bX7>uQB&| z=iX8RIx+zT0dC|7oyzbU4{f1e7n?1cWp~ImL4JMr`Cpp`id&(GzR!%=sA$|X%zfE^ z#|}Mj=Bo-JHJ4HZ`Bj5_Oe?9$mMcL`6 z*8I!S9qMHl7f(FM_NM2>?&5T2$tWTTBbu%#4{vP~e7$4sPP%RYx8k$7srlGur#$Ss zxZbxa+yeh0R^J`{uEfjv`6z-21IGOj+@qlvn3kkt+1xw>dxnqpQx<}CopB*Z;KD$_ z>Cd2R91Fo(*cq+OeCOm5{l!Eq7hED-SX}}K$1Q*HhjW3-XK97jsS0~qGgEK)TZC|>NxFf)OF;Y3EX6)nGMX{A`QB2vg*-IHh00oHvRRAe<3ZR|l z*7%Z7wLc;4tuI3I4%I~Ng#*Tg@0t8d;qPxQmi!&^E=cqPZ_d^Gg%jottdESG2mU@k z(p$Ha1YrH!CuNW#HxG!am!(ud=1tc8QdxDWgD zW7X}JUlz<#D!W}xBiaaf>C)V8Cs-Seux5U1SBLNRk$^&4?7wFMupC^8o`fMxgMn6& zt{!S>E9gc4M8LZ=2mxtwr9!Q^P1y?W(j7h&egSR3pw%g|*Z~EZnXGO=k>LyIE*@+G zP6^2m0pZEoiRq*ST>^aAfi^0hSIWRDvnFx@`Dg{59|lIhYaNmvM|_vf&cgtAYw?wN zy$CY@>Wc|&TvdZ*PqKpz1OdQzIIDX~n zsgtokvcJx~TzGp(%!U;)HmMN|01u+xgUmWVx2`rF+Q6LF!QTPK= zyD)n^w{$NpURZDV-a05{#<35DlPwp5Hn?c|k%mKSf=^rhy9_K`m-GG!FoQ#ZabxWr zE(2Qb1Y9so?~Tk1Q*549VRL&=W)TF!*X=sg!&Vo&8J`U+IIS+lp`v190)i_ zkL6G|UoZj7APgDbZOyO@M2>`oKuvVY4Qba(06Dfqcx1e+0@z4BFdiBKQE&nhCjoyZ zki{re>iOojg7rpA+NB;k4}`MBL`9wr)*){o!Fd3n3G&eYfDnWNWD6aoy=tI=o$=BHxuo%u!Lx0F{zwi8<4kBI{$T{5N5LizE+U>d&m%{i zc_P{K!PjhQ9CdhS*jCsElV^qZoG|az_V*h`&zfWJvf()p(aG&Tvb7B*@}q{xqjfs1 zoftjGGQutbzcm+-jGQTtQWzAGnFe-)t8qtoWn=3pm*x6H!MAJfW6AVRCL z#evZ{Wrj=8I?!DjG#WUe-|#Df`GO7ZsK(No`N9{N#2a&vU;;o|qDrk+on)NwE#76G zF%_CS%O4U<9h&jY%{L7{+0mUTgMpe+-?=g$J1A)sA#4kdWeknRhRHFRLAxM)5g z36t{#y-B>YM0lb~e*}Z_bI=8vHcRTRB1n*94_bKAA+v!5O(Q9;3CB?g;6W}I%>cHD zBq=Zhbn@(ef+9VM@Dx>U1+H`O9b;MmQ)*796C_^10w|HX-GwA=0z{#V=%VKmNzrOW zo<5FXmSGY&-en2ZziNwj_~(za^3P224F7cj?;D&pW)0KUD15!&e`|Ab__3jZB#sKM zF)G9n0A7G-3BY9h$bVCn1CkD4_AY4MA;kB z#R+Tw9D01*J2&Olem8xq0d=Pb8jh6*MkkRsL<0e7p3;gTWD893lAZkf?ZR2H)W?SFTQ0PasphraIWK>=m#Vk4V8b;AIHr^HC&#poa3p8s&=}@PnvK}8;$pOhTB|KEL{Cz(Y(_fQ_NTCA zewe#Q6@)kuwZMbTIBAX^K?}Pct3bQfzTr`K?qG!_VP=88TRAYc z7y^Nrv5sZ%YGVPQYdw%RbipGzMwQMi7NY(D1P#+Dp(XN#4z|6|LNo@0`bPXERx0tB zGweCwKvxt+*%%+BV4^Z^{5~!f31o7u&TtY34T?P({qE( z=m^lB+ibcE24POYRXo!I_`v`!W15KvUCR?!e?9KAGOy;wjJig1LAgx}%#}^) zCrJ%T@$8ctcwM(8t>x#eUu=j34xQb-iCj!*pYPp!+`=O}KFII*bEgAm-s3qnwbv&6 z+9S39=1GPlaH8YkFE+e^z(^>zPJ1cB7jQr0T&nH1hKYUWHPUgvy~{=ap;k^~3$PqA z9LPU2&E{pr6KiSQ81yv955m&kl&Q2vn*elSGh$ua;rW_-kYvL66z+&UGj3HVT5{$* zEyOg~ydTvabJ))_@Y_BGPFX(W2q2-UTxFJu_r+@pwdw+m0e4l>ZNudgRS!cPm>A(t z)U>iB0{p3+{t=PN{DMne2y5V{&sIIQF=pIMQWpK87y51cy58f?d)7^}gDtSzzdgq{ zHm^DGEOkSBAV}s7ANJ)&q~P+((i)!QSnK*T zP2@jr#rE@#-s9MQE(=(cDie&ESBxF~)N;|Bv6D%QW1-%TUp;5ZUt8|LCfl4n2~uKS=j$eKl+qUdp{X#U~$nQO}X*cW!I` zFQ`k%IN*Y|;H5M$r~$94fx+4JvZq}0?x1_urYh!x`G>%sbM9}%EBL*Dn;F}fp7zJl zmFI@N95)G%=l~R{=Nlk`tEl#1S?ewjcJ{guqP~%R^M42VXNv2p@Zcb^!%NY}1R_Uw z;QQ>OnIn4(P!+}#lYC>IGpHY){5gYlQ+c<(vr1R~F>6lEzMGq0&HSdNm*MdR=a=po zziHp28%g=|?6Tdx@rVv+a~YZ0MgLH3aD{k$ zQ{&9aiw|brQd_gLNc?jo!^^~ilPP21VPK}eD;CNnxbcMEb}I;QICEf=&ivig^HdY~tqp$R-qFSIUpYtH_{mMP^^-Ey!A&U{aNQ7AgGY4LSIT?d-7kA%Q0jim( z;BVZt^hD1$9_=)XqA{Yt4J6|j`+!(C%J7N5lwM-_Dm;C~|3I76E z?6gsx5-@G+$n6#|^&HB$lfLf~Xq#3LN=!=Ls=3h2Y)CUPFVTvYtw@%4Wi z9$dCudRmfiSC50ot@%(d-Q@g?xDRGxmum^}p#69q?y|o3nhnlE3 z8eqqaH0pKADAbrX5$p@d!DOROuFAr-w+wgQbp@JgLc-jJSn%+Tf`_>bh>yLg!QzNy zoVl(JZ~CCq409;9ey8koIl${cTncL<#Rk^`;0vnWaB}0ZUW96 zKHNmnb8+I%A!dJAW~gkQI}a{2yPjyHKkY)iCxkwe75u-V%dhX+Mx zT)<5N?aY6lz;@vx#vdOJtl}`|U7E;^bkM2aLuwr#Ssn6koVu}g()`6Z3qv)Lo^cPZ z{S#HUe^rmb5xmtrGzMS_WNyGV$Ny7PmDTto!HMSi zEt!?NqtXnG#l`0)Z0sOl4^BD}f0>~)(t^Psn}LWNhYKBScsvhaja_esz4G4TuXyY9 zLk)$3FH6db?Ki%^5rzrvpzi3Ks#TQ>F@id~|N(%Yke*Zl998QW%jU^#NQo#UJL9`&u5#RhooQqbtB?!r(IO(|7%toeDU!ZWnC*}b7 zER0xXgSui%o-xc~z?fg32%=VF4+gpq}{;kAbJ!9u<@)+$&SE@#=C;S%PW81@5*#lhnzP{>&|t27)F zffKmvx0$jM4)N|c-m=f<`Jtnj-LJi$VL#eVNa(e`Yxn|3@VJzoI4{&s(3x6+TrB-? zGsMpx;tCs|Hq1WZpW7fE8h&qVNZ`(j$4d+Fa6|g8nCJsud2unLE;#rc%M9e2!oU`| z({LiT<_;9ynEw49r)mGazW%?qF3|}lV%Zn}^60lk;(+zd$ggC#Md4I@5JPl`Mt?vC zBr7ym2{Z{omRKVaQqk@MP-6V{Bw?cAEl<0i-^I13%D-j1|%Z6^N(oo7frY5~t<0JIJ$ zm1$zs1^VpI7)1W$@Fx!7n;H^~kjKZZ>{-A8U;R|$<_=hmJ0~edg^tH%VxmgpFi57) z)~ZkRvt?G=qtT!X!fgN)2gI;c6qKl#v=N}=(1v*-LN%Df#{gqVgyNtW)H~oOZU{iJ zXf(is+~HMQ6boJPWyceeso^Q)MS269+ZQ)i!g2Ay6K*{ix^-M(n__X1*m+X_U@7ML zxxSCBzRTXf%Sv0!sS`W?yNdghO2+VDeNSPcM>|N>!9LboE;ui!x>dKt>j-(RxTwOI z(*3F{Z~3~o|5i@-(@wmdJBIfY6Gl_x@d2CGV9;5~)VRa>%FYevLT8>Z=K*i16W`wr z`Eo3nH}EIH--e=4K(@M~5TP|=;CHeOGZc#Yfwd!8RaQ1W@YFkxl`uiVpL}+NkPX9U zlmkc{b^H1KtdmqTHa=kQUkm(Yc-0N%5!?d4DRG?Pa(`(yzj=A@-99(hH$%KqW5b_h zQlNBrnTI!Xko&VHwdXKY|8t?nx*yM|O;po~QW2GDIGCX()7hpKhc+(Q*zL8)|g7C~VP4RbXlOg2 zVTmHRnAb+A9>h@aRb@(8mrpmZ^@Kg;AT)yMRwDtHhF|%*-UiNXCjs`vG9- z8!=I#W3L67<(0*w8lD6{?{qRgnV@{)HT7Y(D2SwBd+yu$B&fRZZSkpW#{&}i11c@C zKy6UHCTx&DNf8}L(7>fPL0_36dozJPL#b_#A9SbV&`CfM{~Ln1xL-Mu>|ChvQYxQF z`hpH!xPzg4!BTvlsnIKx`J@B#;!^yMzL^b|0+j+p5-vziPy!RkqHuyDc&@0fy9={e zOe>+Nqg|N-DYFr0-SOGGK2NKmxV9)8c6PenTk)0n4-W=!!z2TxEbO-T^{2bi+o3~T z>n#^817fzlzn@{v{cIh#)4Or;yDPs3AN@0)S@B!LMff7D;z8xxwuZO~qQIn#LVm3! z!X5K`@Bt2xynb`By8H;G4^spWn;bL@_!(5W&$;idw@&>SwZ-e)96xtIH~*QzW)~QE zwPgD=_q@-kRWGj&h&ayt^>q*KPz3@w9JEqtcb=`Ed?(EB$>usQeN5XyV3u1Q-9+Bv zvJJU)$~e3KQpxyTAFAj4-o|_-QbtG_X36%A4}<$8j~^i1-vu`#Mgvi}rot*$rO8!H zO1lf1gsO~5)XgjX{L;oAnYO8XhShG5OXGnk51&4&#*j^2z43n|C|8$;W7el?|IRP> n4{NAS`2+-cI#n(%^1bnBr;>t9DPMA_a;pF3eJef7SAhQmg3-Tk diff --git a/dbms/programs/server/data/system/metric_log/202004_1_526_105/columns.txt b/dbms/programs/server/data/system/metric_log/202004_1_526_105/columns.txt deleted file mode 100644 index 6b901df244b..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_526_105/columns.txt +++ /dev/null @@ -1,213 +0,0 @@ -columns format version: 1 -211 columns: -`event_date` Date -`event_time` DateTime -`milliseconds` UInt64 -`ProfileEvent_Query` UInt64 -`ProfileEvent_SelectQuery` UInt64 -`ProfileEvent_InsertQuery` UInt64 -`ProfileEvent_FileOpen` UInt64 -`ProfileEvent_Seek` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorRead` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadFailed` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadBytes` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWrite` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteFailed` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteBytes` UInt64 -`ProfileEvent_ReadBufferAIORead` UInt64 -`ProfileEvent_ReadBufferAIOReadBytes` UInt64 -`ProfileEvent_WriteBufferAIOWrite` UInt64 -`ProfileEvent_WriteBufferAIOWriteBytes` UInt64 -`ProfileEvent_ReadCompressedBytes` UInt64 -`ProfileEvent_CompressedReadBufferBlocks` UInt64 -`ProfileEvent_CompressedReadBufferBytes` UInt64 -`ProfileEvent_UncompressedCacheHits` UInt64 -`ProfileEvent_UncompressedCacheMisses` UInt64 -`ProfileEvent_UncompressedCacheWeightLost` UInt64 -`ProfileEvent_IOBufferAllocs` UInt64 -`ProfileEvent_IOBufferAllocBytes` UInt64 -`ProfileEvent_ArenaAllocChunks` UInt64 -`ProfileEvent_ArenaAllocBytes` UInt64 -`ProfileEvent_FunctionExecute` UInt64 -`ProfileEvent_TableFunctionExecute` UInt64 -`ProfileEvent_MarkCacheHits` UInt64 -`ProfileEvent_MarkCacheMisses` UInt64 -`ProfileEvent_CreatedReadBufferOrdinary` UInt64 -`ProfileEvent_CreatedReadBufferAIO` UInt64 -`ProfileEvent_CreatedReadBufferAIOFailed` UInt64 -`ProfileEvent_CreatedReadBufferMMap` UInt64 -`ProfileEvent_CreatedReadBufferMMapFailed` UInt64 -`ProfileEvent_CreatedWriteBufferOrdinary` UInt64 -`ProfileEvent_CreatedWriteBufferAIO` UInt64 -`ProfileEvent_CreatedWriteBufferAIOFailed` UInt64 -`ProfileEvent_DiskReadElapsedMicroseconds` UInt64 -`ProfileEvent_DiskWriteElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkReceiveElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkSendElapsedMicroseconds` UInt64 -`ProfileEvent_ThrottlerSleepMicroseconds` UInt64 -`ProfileEvent_QueryMaskingRulesMatch` UInt64 -`ProfileEvent_ReplicatedPartFetches` UInt64 -`ProfileEvent_ReplicatedPartFailedFetches` UInt64 -`ProfileEvent_ObsoleteReplicatedParts` UInt64 -`ProfileEvent_ReplicatedPartMerges` UInt64 -`ProfileEvent_ReplicatedPartFetchesOfMerged` UInt64 -`ProfileEvent_ReplicatedPartMutations` UInt64 -`ProfileEvent_ReplicatedPartChecks` UInt64 -`ProfileEvent_ReplicatedPartChecksFailed` UInt64 -`ProfileEvent_ReplicatedDataLoss` UInt64 -`ProfileEvent_InsertedRows` UInt64 -`ProfileEvent_InsertedBytes` UInt64 -`ProfileEvent_DelayedInserts` UInt64 -`ProfileEvent_RejectedInserts` UInt64 -`ProfileEvent_DelayedInsertsMilliseconds` UInt64 -`ProfileEvent_DuplicatedInsertedBlocks` UInt64 -`ProfileEvent_ZooKeeperInit` UInt64 -`ProfileEvent_ZooKeeperTransactions` UInt64 -`ProfileEvent_ZooKeeperList` UInt64 -`ProfileEvent_ZooKeeperCreate` UInt64 -`ProfileEvent_ZooKeeperRemove` UInt64 -`ProfileEvent_ZooKeeperExists` UInt64 -`ProfileEvent_ZooKeeperGet` UInt64 -`ProfileEvent_ZooKeeperSet` UInt64 -`ProfileEvent_ZooKeeperMulti` UInt64 -`ProfileEvent_ZooKeeperCheck` UInt64 -`ProfileEvent_ZooKeeperClose` UInt64 -`ProfileEvent_ZooKeeperWatchResponse` UInt64 -`ProfileEvent_ZooKeeperUserExceptions` UInt64 -`ProfileEvent_ZooKeeperHardwareExceptions` UInt64 -`ProfileEvent_ZooKeeperOtherExceptions` UInt64 -`ProfileEvent_ZooKeeperWaitMicroseconds` UInt64 -`ProfileEvent_ZooKeeperBytesSent` UInt64 -`ProfileEvent_ZooKeeperBytesReceived` UInt64 -`ProfileEvent_DistributedConnectionFailTry` UInt64 -`ProfileEvent_DistributedConnectionMissingTable` UInt64 -`ProfileEvent_DistributedConnectionStaleReplica` UInt64 -`ProfileEvent_DistributedConnectionFailAtAll` UInt64 -`ProfileEvent_CompileAttempt` UInt64 -`ProfileEvent_CompileSuccess` UInt64 -`ProfileEvent_CompileFunction` UInt64 -`ProfileEvent_CompiledFunctionExecute` UInt64 -`ProfileEvent_CompileExpressionsMicroseconds` UInt64 -`ProfileEvent_CompileExpressionsBytes` UInt64 -`ProfileEvent_ExternalSortWritePart` UInt64 -`ProfileEvent_ExternalSortMerge` UInt64 -`ProfileEvent_ExternalAggregationWritePart` UInt64 -`ProfileEvent_ExternalAggregationMerge` UInt64 -`ProfileEvent_ExternalAggregationCompressedBytes` UInt64 -`ProfileEvent_ExternalAggregationUncompressedBytes` UInt64 -`ProfileEvent_SlowRead` UInt64 -`ProfileEvent_ReadBackoff` UInt64 -`ProfileEvent_ReplicaYieldLeadership` UInt64 -`ProfileEvent_ReplicaPartialShutdown` UInt64 -`ProfileEvent_SelectedParts` UInt64 -`ProfileEvent_SelectedRanges` UInt64 -`ProfileEvent_SelectedMarks` UInt64 -`ProfileEvent_Merge` UInt64 -`ProfileEvent_MergedRows` UInt64 -`ProfileEvent_MergedUncompressedBytes` UInt64 -`ProfileEvent_MergesTimeMilliseconds` UInt64 -`ProfileEvent_MergeTreeDataWriterRows` UInt64 -`ProfileEvent_MergeTreeDataWriterUncompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterCompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocks` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocksAlreadySorted` UInt64 -`ProfileEvent_CannotRemoveEphemeralNode` UInt64 -`ProfileEvent_LeaderElectionAcquiredLeadership` UInt64 -`ProfileEvent_RegexpCreated` UInt64 -`ProfileEvent_ContextLock` UInt64 -`ProfileEvent_StorageBufferFlush` UInt64 -`ProfileEvent_StorageBufferErrorOnFlush` UInt64 -`ProfileEvent_StorageBufferPassedAllMinThresholds` UInt64 -`ProfileEvent_StorageBufferPassedTimeMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedRowsMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedBytesMaxThreshold` UInt64 -`ProfileEvent_DictCacheKeysRequested` UInt64 -`ProfileEvent_DictCacheKeysRequestedMiss` UInt64 -`ProfileEvent_DictCacheKeysRequestedFound` UInt64 -`ProfileEvent_DictCacheKeysExpired` UInt64 -`ProfileEvent_DictCacheKeysNotFound` UInt64 -`ProfileEvent_DictCacheKeysHit` UInt64 -`ProfileEvent_DictCacheRequestTimeNs` UInt64 -`ProfileEvent_DictCacheRequests` UInt64 -`ProfileEvent_DictCacheLockWriteNs` UInt64 -`ProfileEvent_DictCacheLockReadNs` UInt64 -`ProfileEvent_DistributedSyncInsertionTimeoutExceeded` UInt64 -`ProfileEvent_DataAfterMergeDiffersFromReplica` UInt64 -`ProfileEvent_DataAfterMutationDiffersFromReplica` UInt64 -`ProfileEvent_PolygonsAddedToPool` UInt64 -`ProfileEvent_PolygonsInPoolAllocatedBytes` UInt64 -`ProfileEvent_RWLockAcquiredReadLocks` UInt64 -`ProfileEvent_RWLockAcquiredWriteLocks` UInt64 -`ProfileEvent_RWLockReadersWaitMilliseconds` UInt64 -`ProfileEvent_RWLockWritersWaitMilliseconds` UInt64 -`ProfileEvent_DNSError` UInt64 -`ProfileEvent_RealTimeMicroseconds` UInt64 -`ProfileEvent_UserTimeMicroseconds` UInt64 -`ProfileEvent_SystemTimeMicroseconds` UInt64 -`ProfileEvent_SoftPageFaults` UInt64 -`ProfileEvent_HardPageFaults` UInt64 -`ProfileEvent_VoluntaryContextSwitches` UInt64 -`ProfileEvent_InvoluntaryContextSwitches` UInt64 -`ProfileEvent_OSIOWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUVirtualTimeMicroseconds` UInt64 -`ProfileEvent_OSReadBytes` UInt64 -`ProfileEvent_OSWriteBytes` UInt64 -`ProfileEvent_OSReadChars` UInt64 -`ProfileEvent_OSWriteChars` UInt64 -`ProfileEvent_CreatedHTTPConnections` UInt64 -`ProfileEvent_CannotWriteToWriteBufferDiscard` UInt64 -`ProfileEvent_QueryProfilerSignalOverruns` UInt64 -`CurrentMetric_Query` Int64 -`CurrentMetric_Merge` Int64 -`CurrentMetric_PartMutation` Int64 -`CurrentMetric_ReplicatedFetch` Int64 -`CurrentMetric_ReplicatedSend` Int64 -`CurrentMetric_ReplicatedChecks` Int64 -`CurrentMetric_BackgroundPoolTask` Int64 -`CurrentMetric_BackgroundMovePoolTask` Int64 -`CurrentMetric_BackgroundSchedulePoolTask` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueBatches` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueKeys` Int64 -`CurrentMetric_DiskSpaceReservedForMerge` Int64 -`CurrentMetric_DistributedSend` Int64 -`CurrentMetric_QueryPreempted` Int64 -`CurrentMetric_TCPConnection` Int64 -`CurrentMetric_MySQLConnection` Int64 -`CurrentMetric_HTTPConnection` Int64 -`CurrentMetric_InterserverConnection` Int64 -`CurrentMetric_OpenFileForRead` Int64 -`CurrentMetric_OpenFileForWrite` Int64 -`CurrentMetric_Read` Int64 -`CurrentMetric_Write` Int64 -`CurrentMetric_SendScalars` Int64 -`CurrentMetric_SendExternalTables` Int64 -`CurrentMetric_QueryThread` Int64 -`CurrentMetric_ReadonlyReplica` Int64 -`CurrentMetric_LeaderReplica` Int64 -`CurrentMetric_MemoryTracking` Int64 -`CurrentMetric_MemoryTrackingInBackgroundProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundMoveProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundSchedulePool` Int64 -`CurrentMetric_MemoryTrackingForMerges` Int64 -`CurrentMetric_LeaderElection` Int64 -`CurrentMetric_EphemeralNode` Int64 -`CurrentMetric_ZooKeeperSession` Int64 -`CurrentMetric_ZooKeeperWatch` Int64 -`CurrentMetric_ZooKeeperRequest` Int64 -`CurrentMetric_DelayedInserts` Int64 -`CurrentMetric_ContextLockWait` Int64 -`CurrentMetric_StorageBufferRows` Int64 -`CurrentMetric_StorageBufferBytes` Int64 -`CurrentMetric_DictCacheRequests` Int64 -`CurrentMetric_Revision` Int64 -`CurrentMetric_VersionInteger` Int64 -`CurrentMetric_RWLockWaitingReaders` Int64 -`CurrentMetric_RWLockWaitingWriters` Int64 -`CurrentMetric_RWLockActiveReaders` Int64 -`CurrentMetric_RWLockActiveWriters` Int64 -`CurrentMetric_GlobalThread` Int64 -`CurrentMetric_GlobalThreadActive` Int64 -`CurrentMetric_LocalThread` Int64 -`CurrentMetric_LocalThreadActive` Int64 -`CurrentMetric_DistributedFilesToInsert` Int64 diff --git a/dbms/programs/server/data/system/metric_log/202004_1_526_105/count.txt b/dbms/programs/server/data/system/metric_log/202004_1_526_105/count.txt deleted file mode 100644 index 4af051ca985..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_526_105/count.txt +++ /dev/null @@ -1 +0,0 @@ -4020 \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_1_526_105/minmax_event_date.idx b/dbms/programs/server/data/system/metric_log/202004_1_526_105/minmax_event_date.idx deleted file mode 100644 index 73ef9660d53..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_526_105/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_1_526_105/partition.dat b/dbms/programs/server/data/system/metric_log/202004_1_526_105/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/metric_log/202004_1_526_105/primary.idx b/dbms/programs/server/data/system/metric_log/202004_1_526_105/primary.idx deleted file mode 100644 index d1841709c4e..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_1_526_105/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^G^G^Gȯ^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_522_522_0/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_522_522_0/checksums.txt deleted file mode 100644 index d96bfb26c4829bb370ed371e1edcccb2af8c67c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7190 zcmYLOcR*8T_df3(8ShQ<21o*71(czP0)h%C5~iqZ;;5q-$rYm^!3+djwOR*lMQz=q zYF$68wXIcat5rX3>!{j(ty=e}+G>Zh)>gFgdqaTluY2!%^NxGYbDr~@ldADz)kK%4 z#uYKn;jA&cBkDw_%j&SFN2Fl>r{SXpxNT2<3)-pSuAO`H=C&srEYD6PM_=h$2>vay8t25Gi34s9{{8werR2!tAA1JI-kT3mjlj!h zL5nu}ab)1*JqJzd=$g;|xEz~QjErx#r1cMd<%BYR)x;-J7F$bn0jS7pz#8IHfp4s+ z8ZTNrHn95#0vnucgckoCGku$Dt#Z?EH9H%I)LP7LvCJcS#B8&hzH)6q z8biu32qSCgdWi!-n>ko)SOr=`k_m#za$rLubK$miA5AI~_-#YrB~^$O^O=`2iO@Bkpw zfDP?74Iao`6Dw-XRbshF7pN00`3`51=&Tl}pl6!c{Y#3|bMF^V`Kl1bu3x-#Br+<} zE={YS9W|)ly8L3gdDY0-EGYpgnGbAO+(I{*5X zh*O_|JR67`SYbCK>j`C9)FXBGy>sgddo_&v>uk>I-2;SYmHWCNW5J}yM-Mj?N9C<9 z*_-gQt06@~3IMzg7h=)d1twEzj>B#jH}m8s(gL_O|Yj947aG^y? z2Gh@-Lnpp~1%;y7B0BSIFQ9AN3_6!df$8O9t<73x_8<+}2MpKtonda2N*#WUAtqKW zM6$9hAWpi{MP8Z&k}C}Lu3Cs@RX#TgozeXwvBu$SFga<6Snbu-^lX>g&nDzzoB~{l z1eHnYH^1`DxY6}Y;vZetzxC0&wA_oxc&+Tpg-4$yCr%iC?N{-u|Kt56qzHKOCN+5f zM+j#?w}JLJVWHDeCAwU6^)8r1P!wj4g)U?lMQ(aIXjLE6@JSl<%Uz9Z&4$=Xm(Sb? znpCPq#)TI@{rs}$F`JgoM|~%4;`_b~QRD+UDYY21vY7%IL03u}1)aziipRi0VE-#4 zO$xOks4pNgj)EngEorDIEBuxZ7ZPPYr1%?4YDIg3JcnYnd_VoD?z4A~`y#S;E&nMu z@Z+YDrc21Uq@W@6*0)o>uDto{#OMdx#sudb$-Npk>&B!h zlmCj|VR;uBb^9myy|U7ShR)oVvG-~t4-+~zS#Vqr8w zqof^NgP2Mh+|Gs?is?ZN(-*wQ7xwa~8_XzmiegQzTbw5+y(DU3eur;5Nl^+&5GEe# z(M71;(Vxj9%7^``WWuDdkBJnWT_nxg==zqRBkx|_yuWsTzr|dA{d>r`_`emy@6{Ze zg=hcH?$f<`FlGx&22#7tgm5&KVP*n(4r=-&;|2n~up|u_xU1?zoya5dI^A+8W_TnF z0x}B>+yNlj{sHt>&b5xPA-XRSyN{!&S$<{riO4emz}qbl&R zgM^i*LT0&RauKT~M?sF-Vzaj_!IYD%HVA=i8;c3akT_Iye$T)?l2|P|zhs|jx~1;w z{vh7`_4Kde=IVEaFEt{gp}6JlzdcjmKh#T{{%F^GLkF?#%rS~bU-IApvVcPAk*pnQ zqq*7<4#!|oto?x?PD>_H90oBy>xQT+R4OKgBX=ntUIm_O!}E`b{{+;-0)ogB`s*K& zZwszPR_b(&qYR!`N7HX*7EQwDESt?yb%50yXimc3^g>dAFUcA2vGY=$p;H5Jtj}Hg zxL)vGj@fQ^xXURk)rpnbNJF<`(0IAukk}spg|m?+hZLAWyd&4@sxmtt2>(lZ!G>H| z=%}fs+h@7m4GPl4XbcfKAor#KLziSg*?IM~&evFjYcZoYcoREmq;C@Xf#P71waV#m ziB%4}WvoOi{EC<|B)tUk3Ls1Ak;@QMlJBus-BAVMI+mRaybn!8!7zyxo~oz-de^m$ zhP{It%QmRYs~4+0(Q2gUvTt-xH@@@CZXWV z{7+2mkvoo?XOB%^@zqge44s`C%RRWae#^f`+32Tn`iR?MMv#zNO6jqQFoEVy8iN)+ zna^s4(M)@$GDOb-BvCSnSp!R2fW*qwh8Rj5Ugi)!Ma+JX&xDeEv(;ws*9q@I9oEGcTNC-kL znwZ-I-qk;S|6#E>;{bDvTu>n6-nzKDD<&P>WJuE=vb9mkaK7(=h&5Kh+7B{;co zsxyh#1mJ^m!I$;%76vCnWs%ipv(CaH^pP4^=EVqZmheNPKo>)iSZj4UB|5x_qeRbm zJ%bNmc^ViB{9^)f0K1yCxhV| znW4BlWgZ*LNqw|PQbC5UOUMkWiytx^%z*Yxw3*@9P$tu4t(gUZgg#;4{#ftN4`o=o z{~V+>bp95xypO9CIgFp{87j@QnQLAB7~EI=T+`$1<_86%R}Q|Mer^7eRlmKThm0zR z?5_s{{;$WRq~s@OW;FiXiKS?edeNXV0y>Py+rT~3rUVCP{^B0<&w&HUQ)TXt${rx& z+r7`%jUM@2dS+BVy*MZ^q(>kkWz_!fP!o^wlj5$D=txlG$}Bf!!eP9I4w4E`cxgr1 z1#z9@@KW$MeJ+q-xtb2pxl-?3VWga70fKp463x1m!K53~geD(Es%0P_3S>3JR93jn zHn9kY&i_`$ zSTyN^!dquj<%dfZBmW|JDl!46hQN&DxdrCQJJha5Ge}7=VfFNjig!h$JYrgh=p@8Fby?plIL7N zS-qy7{9*X&>l-$%n0_zp7Nx?+@;ID)S73HpN@)(yH+yWm zdg|~#Xb(SsrqjTZ`GI(UXtO3`+vN>^-+!uc4F53xjn$XWjY7slOCryC#yrM4 zDY58r$A<1eri%oPiiHw}{L87aJ5Xi>FGuUcmvKxYNH0+36(4FHd}?({*hu;7rl7e~ zV)r0pndVJ#&0EtRoQXcwkdbqAQ5S@AdvU%Jhin8CDO%=6wlZGa4@2)v*Ewt+yPKA)XM-{;CR>;H z(PO0OAfn9yOh6L%R+2HRYLaJyAESdz*LdsbKrJ4OnM^O|$-JD0UqV?$X6M8csR;$v z+pG?&80zWyUE0uEbe@?&k(Zfw|&bf{%j}HCjmF>v5 z?@!aWk?_ZFV}|c~M{|wY5SUKQZlRm(lp_nwFi~mkW#hFHos;V5y}TM9VQ8r-^(M*i zKA(D%C5p(hNqVar4oI0~fvfutdlOYtMSJ-LDN4)vHO{V{v= zm78ec;dH*WXafqHosduB@9AgN#l7=W1$iZI=J?nXJKo z|BvvEOULZ#*Tvm9hh+i~HipRZ1n|2}hO z#-b3zZu(<-lvb63b1~WjYS$c<;$&cprhDVJs~Kyj7pp{Tow(VbBO_#Lypko9xhPxFOz6+%1I&%1 zHjSkZ?SzO;%EKiUIi*8}TAgl>*>(Wo(*dMAQqvb5L~&k0fmh<~X}kpycrnvK3N~Fy zNyWgD*C9%A3cN(qn&zZjL`lVXE%6XNo`8Lk5}1%PzOs8DjTyukTSG^Y2uLdCa~=7gYm@TazCXVb?~=c0_a^)bUP4u6&&gpA7`k1c-t z;Ins`O}&+;FEqNlegn+)?(N@4Au31nIQ|VFk0ok+SApLEX%TpF>c&JniidsHWtH|wTOD-d9qhd zD!mOH4gMwo(tte~KvCnZr*S_{Ns~f6uNNksorf>;$AhPE=)lHPZ1)TtQGp`~Fk7az2mZ587^Zi{2G{V&kaaSAJj z!YZizC97z&?AMa#+93MOD12sCtmM%l@WOwCCZW(VG+L*~VwJwSRq+qQ^#N!?qDlCy2?i$CcCI>Zg>PG*Bv z$X3YJNaCx<^xA%qSs{wkOr3WSb}v zt^TYWjekgEV2z|bJK@Vxd4HhaqNjHqYQhyl)kH+8 z**|RzRhmUsyQiSUHQr&f_SNC%!DKl2c`FEtBo#_h_9`;huLrW59%Gr3yhqvlCGbi# zl8sbg52Yg!q#0Do;j}P9KWdVfPv9<*(#6T^;B^#_6YrkwAOFU^TXHvsp@}CLKkNA zh^hd-RXs~J)NzFGgPv-8#~wHm(fIzeCBI(SwykT+!`G1UjZMq8^qtmw@z*CmkKMxe zIFxpoC8I#=ZFW*4gfZ&~6e1!GP#e_9asm-;wf30FngbWV2ITpQ5h) zc>uXb4|zx0?m61J;}Qni#S9&ZU^&zpOTh_+)~hld4X(BUmh=R!vdCj|TQ4vg62@|j z?|}NQO+3E_C6>6yuVBdUC=~DQa5dQv9HT5Eh&CF=vh4@;-Wn>#vfZIAyHCP=EHcw( zxZP#W8casfj@%0gCvPi!?@BlrAdtUlv6IVif{#B)GQ%+ik>o^|aNJ+Tk3h<{UPCeB z&|jtW4-{9e!+w$WfGSaYsrrz}tLVQ`?jG)XHyZQK7x}KwF3rsd%&u7R{V)%xM12`- zup)oSrzL z{b+~kQ|sp;E9ZSa!URcfYt0_%pPIg#{@YEn)P6B+>xjD>@+Q?5qKNG+aW7wr3bL9$ z`PlH2wz|m#h%RG-XWXbM-=F?qdusRZH3JS`o!GS!)HQy&Pqkh4XnSSG{&s(KrsU#r zeZ?=Q=>KGwCO;Ll%S&QyAHKJFN0)PcDS(E9^5n$1+8V39##~={79J?9mU>Qo%T~9| z@croOKHa`5tNo}El(n=KZrv<-5Z%LKc5|H~Z|~O~+I(ix%%1&kFEc#>q1I`wp_N1r ZI$Ke~etkXd$w#NY``}ttnrzv>@c#_Hinss( diff --git a/dbms/programs/server/data/system/metric_log/202004_522_522_0/columns.txt b/dbms/programs/server/data/system/metric_log/202004_522_522_0/columns.txt deleted file mode 100644 index 6b901df244b..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_522_522_0/columns.txt +++ /dev/null @@ -1,213 +0,0 @@ -columns format version: 1 -211 columns: -`event_date` Date -`event_time` DateTime -`milliseconds` UInt64 -`ProfileEvent_Query` UInt64 -`ProfileEvent_SelectQuery` UInt64 -`ProfileEvent_InsertQuery` UInt64 -`ProfileEvent_FileOpen` UInt64 -`ProfileEvent_Seek` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorRead` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadFailed` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadBytes` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWrite` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteFailed` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteBytes` UInt64 -`ProfileEvent_ReadBufferAIORead` UInt64 -`ProfileEvent_ReadBufferAIOReadBytes` UInt64 -`ProfileEvent_WriteBufferAIOWrite` UInt64 -`ProfileEvent_WriteBufferAIOWriteBytes` UInt64 -`ProfileEvent_ReadCompressedBytes` UInt64 -`ProfileEvent_CompressedReadBufferBlocks` UInt64 -`ProfileEvent_CompressedReadBufferBytes` UInt64 -`ProfileEvent_UncompressedCacheHits` UInt64 -`ProfileEvent_UncompressedCacheMisses` UInt64 -`ProfileEvent_UncompressedCacheWeightLost` UInt64 -`ProfileEvent_IOBufferAllocs` UInt64 -`ProfileEvent_IOBufferAllocBytes` UInt64 -`ProfileEvent_ArenaAllocChunks` UInt64 -`ProfileEvent_ArenaAllocBytes` UInt64 -`ProfileEvent_FunctionExecute` UInt64 -`ProfileEvent_TableFunctionExecute` UInt64 -`ProfileEvent_MarkCacheHits` UInt64 -`ProfileEvent_MarkCacheMisses` UInt64 -`ProfileEvent_CreatedReadBufferOrdinary` UInt64 -`ProfileEvent_CreatedReadBufferAIO` UInt64 -`ProfileEvent_CreatedReadBufferAIOFailed` UInt64 -`ProfileEvent_CreatedReadBufferMMap` UInt64 -`ProfileEvent_CreatedReadBufferMMapFailed` UInt64 -`ProfileEvent_CreatedWriteBufferOrdinary` UInt64 -`ProfileEvent_CreatedWriteBufferAIO` UInt64 -`ProfileEvent_CreatedWriteBufferAIOFailed` UInt64 -`ProfileEvent_DiskReadElapsedMicroseconds` UInt64 -`ProfileEvent_DiskWriteElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkReceiveElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkSendElapsedMicroseconds` UInt64 -`ProfileEvent_ThrottlerSleepMicroseconds` UInt64 -`ProfileEvent_QueryMaskingRulesMatch` UInt64 -`ProfileEvent_ReplicatedPartFetches` UInt64 -`ProfileEvent_ReplicatedPartFailedFetches` UInt64 -`ProfileEvent_ObsoleteReplicatedParts` UInt64 -`ProfileEvent_ReplicatedPartMerges` UInt64 -`ProfileEvent_ReplicatedPartFetchesOfMerged` UInt64 -`ProfileEvent_ReplicatedPartMutations` UInt64 -`ProfileEvent_ReplicatedPartChecks` UInt64 -`ProfileEvent_ReplicatedPartChecksFailed` UInt64 -`ProfileEvent_ReplicatedDataLoss` UInt64 -`ProfileEvent_InsertedRows` UInt64 -`ProfileEvent_InsertedBytes` UInt64 -`ProfileEvent_DelayedInserts` UInt64 -`ProfileEvent_RejectedInserts` UInt64 -`ProfileEvent_DelayedInsertsMilliseconds` UInt64 -`ProfileEvent_DuplicatedInsertedBlocks` UInt64 -`ProfileEvent_ZooKeeperInit` UInt64 -`ProfileEvent_ZooKeeperTransactions` UInt64 -`ProfileEvent_ZooKeeperList` UInt64 -`ProfileEvent_ZooKeeperCreate` UInt64 -`ProfileEvent_ZooKeeperRemove` UInt64 -`ProfileEvent_ZooKeeperExists` UInt64 -`ProfileEvent_ZooKeeperGet` UInt64 -`ProfileEvent_ZooKeeperSet` UInt64 -`ProfileEvent_ZooKeeperMulti` UInt64 -`ProfileEvent_ZooKeeperCheck` UInt64 -`ProfileEvent_ZooKeeperClose` UInt64 -`ProfileEvent_ZooKeeperWatchResponse` UInt64 -`ProfileEvent_ZooKeeperUserExceptions` UInt64 -`ProfileEvent_ZooKeeperHardwareExceptions` UInt64 -`ProfileEvent_ZooKeeperOtherExceptions` UInt64 -`ProfileEvent_ZooKeeperWaitMicroseconds` UInt64 -`ProfileEvent_ZooKeeperBytesSent` UInt64 -`ProfileEvent_ZooKeeperBytesReceived` UInt64 -`ProfileEvent_DistributedConnectionFailTry` UInt64 -`ProfileEvent_DistributedConnectionMissingTable` UInt64 -`ProfileEvent_DistributedConnectionStaleReplica` UInt64 -`ProfileEvent_DistributedConnectionFailAtAll` UInt64 -`ProfileEvent_CompileAttempt` UInt64 -`ProfileEvent_CompileSuccess` UInt64 -`ProfileEvent_CompileFunction` UInt64 -`ProfileEvent_CompiledFunctionExecute` UInt64 -`ProfileEvent_CompileExpressionsMicroseconds` UInt64 -`ProfileEvent_CompileExpressionsBytes` UInt64 -`ProfileEvent_ExternalSortWritePart` UInt64 -`ProfileEvent_ExternalSortMerge` UInt64 -`ProfileEvent_ExternalAggregationWritePart` UInt64 -`ProfileEvent_ExternalAggregationMerge` UInt64 -`ProfileEvent_ExternalAggregationCompressedBytes` UInt64 -`ProfileEvent_ExternalAggregationUncompressedBytes` UInt64 -`ProfileEvent_SlowRead` UInt64 -`ProfileEvent_ReadBackoff` UInt64 -`ProfileEvent_ReplicaYieldLeadership` UInt64 -`ProfileEvent_ReplicaPartialShutdown` UInt64 -`ProfileEvent_SelectedParts` UInt64 -`ProfileEvent_SelectedRanges` UInt64 -`ProfileEvent_SelectedMarks` UInt64 -`ProfileEvent_Merge` UInt64 -`ProfileEvent_MergedRows` UInt64 -`ProfileEvent_MergedUncompressedBytes` UInt64 -`ProfileEvent_MergesTimeMilliseconds` UInt64 -`ProfileEvent_MergeTreeDataWriterRows` UInt64 -`ProfileEvent_MergeTreeDataWriterUncompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterCompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocks` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocksAlreadySorted` UInt64 -`ProfileEvent_CannotRemoveEphemeralNode` UInt64 -`ProfileEvent_LeaderElectionAcquiredLeadership` UInt64 -`ProfileEvent_RegexpCreated` UInt64 -`ProfileEvent_ContextLock` UInt64 -`ProfileEvent_StorageBufferFlush` UInt64 -`ProfileEvent_StorageBufferErrorOnFlush` UInt64 -`ProfileEvent_StorageBufferPassedAllMinThresholds` UInt64 -`ProfileEvent_StorageBufferPassedTimeMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedRowsMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedBytesMaxThreshold` UInt64 -`ProfileEvent_DictCacheKeysRequested` UInt64 -`ProfileEvent_DictCacheKeysRequestedMiss` UInt64 -`ProfileEvent_DictCacheKeysRequestedFound` UInt64 -`ProfileEvent_DictCacheKeysExpired` UInt64 -`ProfileEvent_DictCacheKeysNotFound` UInt64 -`ProfileEvent_DictCacheKeysHit` UInt64 -`ProfileEvent_DictCacheRequestTimeNs` UInt64 -`ProfileEvent_DictCacheRequests` UInt64 -`ProfileEvent_DictCacheLockWriteNs` UInt64 -`ProfileEvent_DictCacheLockReadNs` UInt64 -`ProfileEvent_DistributedSyncInsertionTimeoutExceeded` UInt64 -`ProfileEvent_DataAfterMergeDiffersFromReplica` UInt64 -`ProfileEvent_DataAfterMutationDiffersFromReplica` UInt64 -`ProfileEvent_PolygonsAddedToPool` UInt64 -`ProfileEvent_PolygonsInPoolAllocatedBytes` UInt64 -`ProfileEvent_RWLockAcquiredReadLocks` UInt64 -`ProfileEvent_RWLockAcquiredWriteLocks` UInt64 -`ProfileEvent_RWLockReadersWaitMilliseconds` UInt64 -`ProfileEvent_RWLockWritersWaitMilliseconds` UInt64 -`ProfileEvent_DNSError` UInt64 -`ProfileEvent_RealTimeMicroseconds` UInt64 -`ProfileEvent_UserTimeMicroseconds` UInt64 -`ProfileEvent_SystemTimeMicroseconds` UInt64 -`ProfileEvent_SoftPageFaults` UInt64 -`ProfileEvent_HardPageFaults` UInt64 -`ProfileEvent_VoluntaryContextSwitches` UInt64 -`ProfileEvent_InvoluntaryContextSwitches` UInt64 -`ProfileEvent_OSIOWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUVirtualTimeMicroseconds` UInt64 -`ProfileEvent_OSReadBytes` UInt64 -`ProfileEvent_OSWriteBytes` UInt64 -`ProfileEvent_OSReadChars` UInt64 -`ProfileEvent_OSWriteChars` UInt64 -`ProfileEvent_CreatedHTTPConnections` UInt64 -`ProfileEvent_CannotWriteToWriteBufferDiscard` UInt64 -`ProfileEvent_QueryProfilerSignalOverruns` UInt64 -`CurrentMetric_Query` Int64 -`CurrentMetric_Merge` Int64 -`CurrentMetric_PartMutation` Int64 -`CurrentMetric_ReplicatedFetch` Int64 -`CurrentMetric_ReplicatedSend` Int64 -`CurrentMetric_ReplicatedChecks` Int64 -`CurrentMetric_BackgroundPoolTask` Int64 -`CurrentMetric_BackgroundMovePoolTask` Int64 -`CurrentMetric_BackgroundSchedulePoolTask` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueBatches` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueKeys` Int64 -`CurrentMetric_DiskSpaceReservedForMerge` Int64 -`CurrentMetric_DistributedSend` Int64 -`CurrentMetric_QueryPreempted` Int64 -`CurrentMetric_TCPConnection` Int64 -`CurrentMetric_MySQLConnection` Int64 -`CurrentMetric_HTTPConnection` Int64 -`CurrentMetric_InterserverConnection` Int64 -`CurrentMetric_OpenFileForRead` Int64 -`CurrentMetric_OpenFileForWrite` Int64 -`CurrentMetric_Read` Int64 -`CurrentMetric_Write` Int64 -`CurrentMetric_SendScalars` Int64 -`CurrentMetric_SendExternalTables` Int64 -`CurrentMetric_QueryThread` Int64 -`CurrentMetric_ReadonlyReplica` Int64 -`CurrentMetric_LeaderReplica` Int64 -`CurrentMetric_MemoryTracking` Int64 -`CurrentMetric_MemoryTrackingInBackgroundProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundMoveProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundSchedulePool` Int64 -`CurrentMetric_MemoryTrackingForMerges` Int64 -`CurrentMetric_LeaderElection` Int64 -`CurrentMetric_EphemeralNode` Int64 -`CurrentMetric_ZooKeeperSession` Int64 -`CurrentMetric_ZooKeeperWatch` Int64 -`CurrentMetric_ZooKeeperRequest` Int64 -`CurrentMetric_DelayedInserts` Int64 -`CurrentMetric_ContextLockWait` Int64 -`CurrentMetric_StorageBufferRows` Int64 -`CurrentMetric_StorageBufferBytes` Int64 -`CurrentMetric_DictCacheRequests` Int64 -`CurrentMetric_Revision` Int64 -`CurrentMetric_VersionInteger` Int64 -`CurrentMetric_RWLockWaitingReaders` Int64 -`CurrentMetric_RWLockWaitingWriters` Int64 -`CurrentMetric_RWLockActiveReaders` Int64 -`CurrentMetric_RWLockActiveWriters` Int64 -`CurrentMetric_GlobalThread` Int64 -`CurrentMetric_GlobalThreadActive` Int64 -`CurrentMetric_LocalThread` Int64 -`CurrentMetric_LocalThreadActive` Int64 -`CurrentMetric_DistributedFilesToInsert` Int64 diff --git a/dbms/programs/server/data/system/metric_log/202004_522_522_0/count.txt b/dbms/programs/server/data/system/metric_log/202004_522_522_0/count.txt deleted file mode 100644 index 301160a9306..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_522_522_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -8 \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_522_522_0/minmax_event_date.idx b/dbms/programs/server/data/system/metric_log/202004_522_522_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_522_522_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_522_522_0/partition.dat b/dbms/programs/server/data/system/metric_log/202004_522_522_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/metric_log/202004_522_522_0/primary.idx b/dbms/programs/server/data/system/metric_log/202004_522_522_0/primary.idx deleted file mode 100644 index 7124d1b7297..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_522_522_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_523_523_0/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_523_523_0/checksums.txt deleted file mode 100644 index 03ec0a218cf195cd755046c0806ef98ab235f001..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6974 zcmYLOXLqL$Ks36LYRK%72RTh^Q1q2sV7Ew_a>37n!eSgi&&D=Zpp7WgNJm=IjRcmj@oLUH|TMecpKzWuh(5^_t*BG?((Fp&8c~L>OsYR&&{o)GB2Jy zN30+3E$ezMdZhQxT95LxuQr_TF$N-ARdv4FzFD=`C%^x|V9A?K~3SEu>9>VTx8dvYM z2jsG#9Fz;}0sP6okr*ufCO`sPi)&?#0AucGx#|DFXz5c4vFv%^HBaNt(;UOpcBK_6YFt>5o9G%3oViZ+FP$GA_^{JBNFHC91%vJ^u(80iCBSQr62(xkRmdLaV_=Y;>?gJjcd1HYUoQAE+ksgMneQ!h|})DvN5vVDf^1t_n=4n`#6*92Xo8i zdbi7A4-y002b|D;%7h-oie5a&u>`eVO6FDhzYu{Eir?Duo;Me4BpmM zQ8wndm?&kb!tRgEv(?L<@n4 z{QIR0vV7&w1Ke|<9XYH&DDx-yT!9uPyCCSLG*S5=aKnK;gqB+-&MUS10wuwK{Q)g= zat3LUq5~77xGD@aUsE08a1_T4g3#`yy&_46M@xOOTvs2Em#A4K(MwNv8nug(^aBN< zEQqdnmUMV%w>qhO!euR&poD!~rtC{o7-PG=#jxtiY^z; zE|NHDJmo|pjRGVNi_ikBATkPX)k50{A7}cI2>XOPQL=DC*s|($?5?>Hn`|2wl@sgb zzMs8$x$fLgwT5@kB_2C)aTO2!33GxVnRw_4g#T;P6SPW~y(}&3;K6VV3-Qe=a zC|Zp@B96SsecCi{&!S&+!}lc4&=BkCd*Y_<%am&e;)a62+-Tu-60Cv~ zMU5CU9*k%EbobX$CP^c9#@kL2tWZ_qu)FPh1(UQ>Y`3!J2DC!J>$6wO1;H6JWM4s3 z;0AQljSPpWf>&E1x7<6sgg3HJK~37jU3G52!5t^dVj?gnidgGB?(uj7a+NXJl0Fs;AqKW&4Fw>Hd~&5%3EU{rTj=sT?7r*L z|B_0qr4XL+*45+Z^8M~Un7E|(1iTQT zLEUP|kcWIU`ipt=)|3xF_cR&`vWRt+eNO6v)z8iR_wuV%`?gQ&dKUC#EQBl>7k>oA zbqQePT8+I1)L#Gg{+IbR(Ml?U8_VAf~+1C7DA3K|n2MSh?1l&Cf zW?~Y<7<8K1)4Wld%CXUC8cZ6N)P+D4S}rTU*wzZHyGn2AhNKZ<4QVIg_Jevp*oNC( zZcC&|S`j)zTkb`;YiuPY_WB4+DCY^M_zFmvKG=t#fq}rHNUWvT{}D-93L*I+HS?MZ zl4tWdUAK}UhTzGwiqnVw;IXBA_jTUs%BoXAV*O8!!~NzlDY(()fTx)9asWBbkrHV(AxMKo%^A`FdBqS;H( z&C-7&B(n9mgrJs=b!8cbDEhuy3MXLt8b`CCs>J1XyB?=;_=pBpg)l;IE5z|BVCsfQ ztatf*3KM-+Kz0v(i>EDAodcGUk=>$c6el=S;ZuNcB(irwl{eNK81C%BGI;|XE~-BQ zvE4NTLB%ITu=&J9L9Jc@iqX&}9EvzqE)g*w>4}-TmFy|txpudu@Mu*ov&6MoV`AqZ zf`m?nqgdo{|+fFa@*_ugE=}#f7g%^yxYTlCI2`t zPG4X7Uf|<_#Oj_fewpH(LFex@Upu#__~0X5c!Yy81P5Inkb{IR2jPw}Yma-q?EgFV z(4>7$Z)Kc)LP4y(FKvzdv8iFy-N#$jr(Ma~(>t25GIYQSwDAKGO4>t_nhbhcl^;MV zoFE!-kX3*t#4Fk~B6RIZS3zXu9*M=O^*DfYm7%%P{c4sE1eOSWFy&UovUFmI&wiii zR)cyxur<)Fsv=-_%Ox}pAN1idR5Ra$SWwaAaac35P@gXm)_H@0qDF@-I|m`(b_g8$ zd}K;taA^X!5xU9F$&t*%>1j}ha!V1CTeOh~JL2o}tE+u-wfzADq#L-+k!*p{Ouv-a z1WX1f?I~_{1hUa*spima$J9~{8v@XOoX7E86zfUM=KCgaY!Z^~2AuLq2!BBS72!e( z%M=WufilbsA!9933i&9qy}rOMgwPVDF6=-0sY-PpFeOQ#U#ZwAjHSo%qB@R-w?|WXu`j>|%BH25=qaJyF=ms26E~xl%XsTq^_BFe>jyfPk8vek z%tY}$$NoaTxFu%Pb4hED-x=I(L-ards}n!+l!`bZuM*Yr3r4a|Ve+9e-Ye?qCma@h z%21L_9I!K#EJ;$nTXA1jQ-yaW4?%&w0D6n{en@7`xXNpkUBU%lqll#}`sBoYWs4ub zckpWY)o(ZbHIG>5-92PV-`R5C&2#fhUm=gh;p-i7vrQiY0C`+s9}7|RPs+w%k%QuZ zHd8hjmu#jSJBItg$M(2MjEmyL??nXdxu=9#MeOFuB`>dCyLixdBeBLvss69@Gi=?p z;+ek}AH26wsU(XaWK$1@%JT#)PM&a>D&zM5Bi}BUeydnAx8DYQ8Mo4=Q)nS2yFl+>sM8#c#>q9$pSt^qab45)y}KXLM80>KSpPBC zFA-PnsA+g9Bk}oJZifpnr?HV8j7TfB2W=I@O2>_N`2s<^+fV4pDAtSU@rO>L_)3GogYPmi$0X5} zTqh^EO*UIavF$NP(Hw&i&$Q(^rI2K+a742pGn*4UTsTsqGl$hw^@_%rAzWTt=xDNl z7**^7nCUjPx((Pd$d2qAE(*(8jtfJb9ufk!>3*+U4%}zRLNT;i&mIONT_binQK@T% zQm?zI+UxP>JDsw#(!0ePLrpxE3M5pTQ>P$UA*tB&Adpi!xSGQ5BHuaxM}(ZjX6Ve6 zlQ`5RA;3N&ke-lLV)w5eJhwcEJ&F?hs=m>*IHbUYQ1+$Uv}6#9G~t@B!c~n0iVd4g zzMy9jXQW37CNgy>kQ^Kduh^&?S#-s91O5LHhH9#JZrg zbxqab*^76xYft`V8ga_NcbpB;sU}MVaWSwzk#??V=weWoKwHK*#&Jmdl=Q{!R#3sT z$%%BPij`wWXC*|FLx7io_$cC)r;$qNLg2qcbG!dVWI4I_+0gY`9nhS5^=O= zVal_R)92#$$;!R6h!ym8uU|X8cHdydXXU52j7{4pA^Ygs*yk-RD*`Qd32ujUU&ds5 ze3|+kXMEGiGmT}&Z?MmCIX*jS$gc|?ct^Ez3;#;R?hHMf3{gm(Jc2L|jEP)>p4AI_ zikD5GovS9KsoYsH3_CY8>VI#t`3>SxO3YPqp~m``4*VwX^!-Hz(u$lFol%H^4|BR+v_ z=9q{n{dQD4QEDa%Wa)5tg1+yvyDMsf0jGDi=PxkeID&{n5XF>Fa>;Jzej~eUjKOD4 z6Sl49MUi{}A*^pSWRCHUPc>=sd2P5%)%?Q=0|1tiY%I)0HB)kW9O?|cN+Zhw!siIA zN*P66v?Cp`Tu^h1fXpAlVxmpRq;wh3uvlahh#K(}#{xXZ-_y4D6Qo$m4H|_qJ=r#+ zHi~V+20$!DG|>>6*ltcuo&xqhDo8s~^H_R>`W1iR0wyAi>%|s;QBr8sdZGvyVNu4B zT5j6F+zMHqTWO>Z>qri;>7mkSI>7cZ#HVE&5tKJ*2Pml$lPZ>Mm9eap7%5eT4Q>X7 zh>f~*u04aIt8>fko)#{eexMXC1K%#tGm`Tv+}_#U)a(c`(2V4EB_f>%#D8FWQbvc6 z5Se;w9=X|PuFW&t9rV}yj_6;j=txTRtWrG`@HA$;>y!}wIFt}crS{022T)&@xIDp; zHh+!R?HXjFcVpQ^?CW9SfQZtOKHB~z_QnmscHlNvYuU?4*^?lo%UA)?fhQhElGts~ zX(wVm1Sw>QndK7;-nNhMG@!CqH2DK^9qR1whFGXq*-1eeD)L#nCYZ{m&?uUr>O|~t z;Q?%adIN#m2AdnZ3}vxNbW)* zWfNpqbq%(SO+_YpUFxK^H#yrlZ1iUFDbf!Fza8pe@pLS)jDIaPoi0^&%G19Uq}eF$ zFyMh1Qy6F^Ap=1QR~?iREIfdWP@t6!p{EW}d)uNM0y^GE5&%|)68`U7++7|`*mSl` zGj;oWe^p?Ev7)7X*Jni)V#?Lk!ofZhPoGoZ!kn34+85j8veJM~W8wgE%b2x(!?;UJ zPA|EmTb#1^w`Y&8AXd(HqkP(R^C$gho{zdUf8#4T4{6vGFowF3Xn_Q7J%cfXL4@8i z@O6mirvlrI2b?I1@<@_H_z_)LEi+P=*y)cJN(TXA+>`Wxk$%c|0%oX2M9ukJBwL9n zNYsY07nkey`n#!&NVp;i!^ZL1MmKL)zUsJH@Ln--4EEoyWf91xAI9D#3F$B}yR zO~@P}S7N&9st+emp8l*9`v531^Jd$9@`ir;`UTnSD(>-TWrx?WStn!`d*mS|mc$FB zEsSC_$GE;znJ})}2YA*OgsPIDJK*}7Gq40+;M(JAuToq~vTT8x7dZASiKlxzz0CH5 zz-dbu!Op^TzGI)ETT8`sz8AD7^I25vPVCr-_W14jquCT}!QE2`>h?^bl|_zM{Kvkj1cJ*{!X4qEi&L7Dn>Qwi+is}z z+~^^g8jCV+8PnE&)G#V#C%g|mp32(&Vv`k%+{b=AX}LYL&9 w_M1LvJ~eB8-=ROPuDl6Sz0XyLbwh8Qt=Zi>@{A+4X0ZQB@drX|7u1Bp!@Z~y=R diff --git a/dbms/programs/server/data/system/metric_log/202004_523_523_0/columns.txt b/dbms/programs/server/data/system/metric_log/202004_523_523_0/columns.txt deleted file mode 100644 index 6b901df244b..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_523_523_0/columns.txt +++ /dev/null @@ -1,213 +0,0 @@ -columns format version: 1 -211 columns: -`event_date` Date -`event_time` DateTime -`milliseconds` UInt64 -`ProfileEvent_Query` UInt64 -`ProfileEvent_SelectQuery` UInt64 -`ProfileEvent_InsertQuery` UInt64 -`ProfileEvent_FileOpen` UInt64 -`ProfileEvent_Seek` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorRead` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadFailed` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadBytes` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWrite` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteFailed` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteBytes` UInt64 -`ProfileEvent_ReadBufferAIORead` UInt64 -`ProfileEvent_ReadBufferAIOReadBytes` UInt64 -`ProfileEvent_WriteBufferAIOWrite` UInt64 -`ProfileEvent_WriteBufferAIOWriteBytes` UInt64 -`ProfileEvent_ReadCompressedBytes` UInt64 -`ProfileEvent_CompressedReadBufferBlocks` UInt64 -`ProfileEvent_CompressedReadBufferBytes` UInt64 -`ProfileEvent_UncompressedCacheHits` UInt64 -`ProfileEvent_UncompressedCacheMisses` UInt64 -`ProfileEvent_UncompressedCacheWeightLost` UInt64 -`ProfileEvent_IOBufferAllocs` UInt64 -`ProfileEvent_IOBufferAllocBytes` UInt64 -`ProfileEvent_ArenaAllocChunks` UInt64 -`ProfileEvent_ArenaAllocBytes` UInt64 -`ProfileEvent_FunctionExecute` UInt64 -`ProfileEvent_TableFunctionExecute` UInt64 -`ProfileEvent_MarkCacheHits` UInt64 -`ProfileEvent_MarkCacheMisses` UInt64 -`ProfileEvent_CreatedReadBufferOrdinary` UInt64 -`ProfileEvent_CreatedReadBufferAIO` UInt64 -`ProfileEvent_CreatedReadBufferAIOFailed` UInt64 -`ProfileEvent_CreatedReadBufferMMap` UInt64 -`ProfileEvent_CreatedReadBufferMMapFailed` UInt64 -`ProfileEvent_CreatedWriteBufferOrdinary` UInt64 -`ProfileEvent_CreatedWriteBufferAIO` UInt64 -`ProfileEvent_CreatedWriteBufferAIOFailed` UInt64 -`ProfileEvent_DiskReadElapsedMicroseconds` UInt64 -`ProfileEvent_DiskWriteElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkReceiveElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkSendElapsedMicroseconds` UInt64 -`ProfileEvent_ThrottlerSleepMicroseconds` UInt64 -`ProfileEvent_QueryMaskingRulesMatch` UInt64 -`ProfileEvent_ReplicatedPartFetches` UInt64 -`ProfileEvent_ReplicatedPartFailedFetches` UInt64 -`ProfileEvent_ObsoleteReplicatedParts` UInt64 -`ProfileEvent_ReplicatedPartMerges` UInt64 -`ProfileEvent_ReplicatedPartFetchesOfMerged` UInt64 -`ProfileEvent_ReplicatedPartMutations` UInt64 -`ProfileEvent_ReplicatedPartChecks` UInt64 -`ProfileEvent_ReplicatedPartChecksFailed` UInt64 -`ProfileEvent_ReplicatedDataLoss` UInt64 -`ProfileEvent_InsertedRows` UInt64 -`ProfileEvent_InsertedBytes` UInt64 -`ProfileEvent_DelayedInserts` UInt64 -`ProfileEvent_RejectedInserts` UInt64 -`ProfileEvent_DelayedInsertsMilliseconds` UInt64 -`ProfileEvent_DuplicatedInsertedBlocks` UInt64 -`ProfileEvent_ZooKeeperInit` UInt64 -`ProfileEvent_ZooKeeperTransactions` UInt64 -`ProfileEvent_ZooKeeperList` UInt64 -`ProfileEvent_ZooKeeperCreate` UInt64 -`ProfileEvent_ZooKeeperRemove` UInt64 -`ProfileEvent_ZooKeeperExists` UInt64 -`ProfileEvent_ZooKeeperGet` UInt64 -`ProfileEvent_ZooKeeperSet` UInt64 -`ProfileEvent_ZooKeeperMulti` UInt64 -`ProfileEvent_ZooKeeperCheck` UInt64 -`ProfileEvent_ZooKeeperClose` UInt64 -`ProfileEvent_ZooKeeperWatchResponse` UInt64 -`ProfileEvent_ZooKeeperUserExceptions` UInt64 -`ProfileEvent_ZooKeeperHardwareExceptions` UInt64 -`ProfileEvent_ZooKeeperOtherExceptions` UInt64 -`ProfileEvent_ZooKeeperWaitMicroseconds` UInt64 -`ProfileEvent_ZooKeeperBytesSent` UInt64 -`ProfileEvent_ZooKeeperBytesReceived` UInt64 -`ProfileEvent_DistributedConnectionFailTry` UInt64 -`ProfileEvent_DistributedConnectionMissingTable` UInt64 -`ProfileEvent_DistributedConnectionStaleReplica` UInt64 -`ProfileEvent_DistributedConnectionFailAtAll` UInt64 -`ProfileEvent_CompileAttempt` UInt64 -`ProfileEvent_CompileSuccess` UInt64 -`ProfileEvent_CompileFunction` UInt64 -`ProfileEvent_CompiledFunctionExecute` UInt64 -`ProfileEvent_CompileExpressionsMicroseconds` UInt64 -`ProfileEvent_CompileExpressionsBytes` UInt64 -`ProfileEvent_ExternalSortWritePart` UInt64 -`ProfileEvent_ExternalSortMerge` UInt64 -`ProfileEvent_ExternalAggregationWritePart` UInt64 -`ProfileEvent_ExternalAggregationMerge` UInt64 -`ProfileEvent_ExternalAggregationCompressedBytes` UInt64 -`ProfileEvent_ExternalAggregationUncompressedBytes` UInt64 -`ProfileEvent_SlowRead` UInt64 -`ProfileEvent_ReadBackoff` UInt64 -`ProfileEvent_ReplicaYieldLeadership` UInt64 -`ProfileEvent_ReplicaPartialShutdown` UInt64 -`ProfileEvent_SelectedParts` UInt64 -`ProfileEvent_SelectedRanges` UInt64 -`ProfileEvent_SelectedMarks` UInt64 -`ProfileEvent_Merge` UInt64 -`ProfileEvent_MergedRows` UInt64 -`ProfileEvent_MergedUncompressedBytes` UInt64 -`ProfileEvent_MergesTimeMilliseconds` UInt64 -`ProfileEvent_MergeTreeDataWriterRows` UInt64 -`ProfileEvent_MergeTreeDataWriterUncompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterCompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocks` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocksAlreadySorted` UInt64 -`ProfileEvent_CannotRemoveEphemeralNode` UInt64 -`ProfileEvent_LeaderElectionAcquiredLeadership` UInt64 -`ProfileEvent_RegexpCreated` UInt64 -`ProfileEvent_ContextLock` UInt64 -`ProfileEvent_StorageBufferFlush` UInt64 -`ProfileEvent_StorageBufferErrorOnFlush` UInt64 -`ProfileEvent_StorageBufferPassedAllMinThresholds` UInt64 -`ProfileEvent_StorageBufferPassedTimeMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedRowsMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedBytesMaxThreshold` UInt64 -`ProfileEvent_DictCacheKeysRequested` UInt64 -`ProfileEvent_DictCacheKeysRequestedMiss` UInt64 -`ProfileEvent_DictCacheKeysRequestedFound` UInt64 -`ProfileEvent_DictCacheKeysExpired` UInt64 -`ProfileEvent_DictCacheKeysNotFound` UInt64 -`ProfileEvent_DictCacheKeysHit` UInt64 -`ProfileEvent_DictCacheRequestTimeNs` UInt64 -`ProfileEvent_DictCacheRequests` UInt64 -`ProfileEvent_DictCacheLockWriteNs` UInt64 -`ProfileEvent_DictCacheLockReadNs` UInt64 -`ProfileEvent_DistributedSyncInsertionTimeoutExceeded` UInt64 -`ProfileEvent_DataAfterMergeDiffersFromReplica` UInt64 -`ProfileEvent_DataAfterMutationDiffersFromReplica` UInt64 -`ProfileEvent_PolygonsAddedToPool` UInt64 -`ProfileEvent_PolygonsInPoolAllocatedBytes` UInt64 -`ProfileEvent_RWLockAcquiredReadLocks` UInt64 -`ProfileEvent_RWLockAcquiredWriteLocks` UInt64 -`ProfileEvent_RWLockReadersWaitMilliseconds` UInt64 -`ProfileEvent_RWLockWritersWaitMilliseconds` UInt64 -`ProfileEvent_DNSError` UInt64 -`ProfileEvent_RealTimeMicroseconds` UInt64 -`ProfileEvent_UserTimeMicroseconds` UInt64 -`ProfileEvent_SystemTimeMicroseconds` UInt64 -`ProfileEvent_SoftPageFaults` UInt64 -`ProfileEvent_HardPageFaults` UInt64 -`ProfileEvent_VoluntaryContextSwitches` UInt64 -`ProfileEvent_InvoluntaryContextSwitches` UInt64 -`ProfileEvent_OSIOWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUVirtualTimeMicroseconds` UInt64 -`ProfileEvent_OSReadBytes` UInt64 -`ProfileEvent_OSWriteBytes` UInt64 -`ProfileEvent_OSReadChars` UInt64 -`ProfileEvent_OSWriteChars` UInt64 -`ProfileEvent_CreatedHTTPConnections` UInt64 -`ProfileEvent_CannotWriteToWriteBufferDiscard` UInt64 -`ProfileEvent_QueryProfilerSignalOverruns` UInt64 -`CurrentMetric_Query` Int64 -`CurrentMetric_Merge` Int64 -`CurrentMetric_PartMutation` Int64 -`CurrentMetric_ReplicatedFetch` Int64 -`CurrentMetric_ReplicatedSend` Int64 -`CurrentMetric_ReplicatedChecks` Int64 -`CurrentMetric_BackgroundPoolTask` Int64 -`CurrentMetric_BackgroundMovePoolTask` Int64 -`CurrentMetric_BackgroundSchedulePoolTask` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueBatches` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueKeys` Int64 -`CurrentMetric_DiskSpaceReservedForMerge` Int64 -`CurrentMetric_DistributedSend` Int64 -`CurrentMetric_QueryPreempted` Int64 -`CurrentMetric_TCPConnection` Int64 -`CurrentMetric_MySQLConnection` Int64 -`CurrentMetric_HTTPConnection` Int64 -`CurrentMetric_InterserverConnection` Int64 -`CurrentMetric_OpenFileForRead` Int64 -`CurrentMetric_OpenFileForWrite` Int64 -`CurrentMetric_Read` Int64 -`CurrentMetric_Write` Int64 -`CurrentMetric_SendScalars` Int64 -`CurrentMetric_SendExternalTables` Int64 -`CurrentMetric_QueryThread` Int64 -`CurrentMetric_ReadonlyReplica` Int64 -`CurrentMetric_LeaderReplica` Int64 -`CurrentMetric_MemoryTracking` Int64 -`CurrentMetric_MemoryTrackingInBackgroundProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundMoveProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundSchedulePool` Int64 -`CurrentMetric_MemoryTrackingForMerges` Int64 -`CurrentMetric_LeaderElection` Int64 -`CurrentMetric_EphemeralNode` Int64 -`CurrentMetric_ZooKeeperSession` Int64 -`CurrentMetric_ZooKeeperWatch` Int64 -`CurrentMetric_ZooKeeperRequest` Int64 -`CurrentMetric_DelayedInserts` Int64 -`CurrentMetric_ContextLockWait` Int64 -`CurrentMetric_StorageBufferRows` Int64 -`CurrentMetric_StorageBufferBytes` Int64 -`CurrentMetric_DictCacheRequests` Int64 -`CurrentMetric_Revision` Int64 -`CurrentMetric_VersionInteger` Int64 -`CurrentMetric_RWLockWaitingReaders` Int64 -`CurrentMetric_RWLockWaitingWriters` Int64 -`CurrentMetric_RWLockActiveReaders` Int64 -`CurrentMetric_RWLockActiveWriters` Int64 -`CurrentMetric_GlobalThread` Int64 -`CurrentMetric_GlobalThreadActive` Int64 -`CurrentMetric_LocalThread` Int64 -`CurrentMetric_LocalThreadActive` Int64 -`CurrentMetric_DistributedFilesToInsert` Int64 diff --git a/dbms/programs/server/data/system/metric_log/202004_523_523_0/count.txt b/dbms/programs/server/data/system/metric_log/202004_523_523_0/count.txt deleted file mode 100644 index c7930257dfe..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_523_523_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -7 \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_523_523_0/minmax_event_date.idx b/dbms/programs/server/data/system/metric_log/202004_523_523_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_523_523_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_523_523_0/partition.dat b/dbms/programs/server/data/system/metric_log/202004_523_523_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/metric_log/202004_523_523_0/primary.idx b/dbms/programs/server/data/system/metric_log/202004_523_523_0/primary.idx deleted file mode 100644 index 6a0268c1d74..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_523_523_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_524_524_0/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_524_524_0/checksums.txt deleted file mode 100644 index d1ab9d1463ba8941700b21934bef7e7997bffdbe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6745 zcmYLOd0cOPdy2T1v5nQd-K=6uPmLl(tefEg^Z7&@?GoN(&0Y zuvBGn6cGhMWnd6NMHUw<2=WsY)M4Bh1!RPA8D$kviqhXnlXm{fyEnP_zI)DhzVn?E z#~j(w;14$XlV*E;jrKrNv+VP`yqjvnw z?&QqanI2Gu|u+W>rgaxq-> zdIEAwV1n1tP-}MuQgBV55|G#eJSHg@zAcvB_EygJ#)T>h)E~zIu59xob#k ze|lm;@zse7j!htGFRfcYome%)zFE|=JY`&qYs1$C_Knk*^UMZHwg&iwj5Tlr&^W% z*=Ki8^g7!Jy9;{hV~&mi){OT}Y=%8$ZtpC+yJn70wqJ(3hV*~$9k9A>^^Qw>fBfa{ zq)m^_C02iP+Y?!tZ>%0abW`bt9Z4U*1?thj0>DfAIn`j8RD>RBz8YHFJR!Gr_N_0* zY}#KY-K{&APORngZXQ3{Ix(eWlkJ_X|M**nC|Ef_$lFpH`D=MiP30J`$0KhS*+pWI z(z%GKz@(pYqquZg$#7DJ-csbu5vx+`$uJy<(IwmW5CiTfV(kF3D8+z?n?B{#WU z4ttOo*g@cg$UPGRBp)6AEyrqjy_C$Wih%iWr>{ljAW)p+(7SpmmDh#cDD@>*E96G6 zueHX9;CFfI>+#*rcE6icOz991Yzj6>X_R^?SOwzvF|_8W8-$L80b@NG(i1+fL-zY| zX*y*|2(iLhkk0lYEDNf@sCykcjTwda)lXgY;Ab_e}Z2q|nAF3O(>M%7Y@O~ajr&44~^2ZAkT0`UJ(v4?oQC3ZLvHXD(U$=h11 zCr$W7Oq8-!VHiYNZB4Q#OI?gWtZG-@jsE=n&AK1x!si>;pV)E9Jo^T*p6k27_)E*Y z#h%#@&);&;_`?E-Ac*CHvZ&VQ3LH}2HS~)s`JniG#?U7`VRHM#Sql zjvEf4x0l`#$pBbV>67KgrhvRk&EDqp(wZK-_fe7|pdge5(OXZEZYRE_PO7T?Ny{ZD z;UJeR`_dI1#?+S1*bkmRzrDSweZ)GUrR7CpUH6abDOVa#EThYR<`0@4I^1su&&HzJ zmO>(VnB$fL`vdeiQVC-LKX^6_IJm6KLm!HS{RNMlOgWKA;{Zv^A{+uXIw~3;)k4<{ zA8&e}2nU6`(HqX~zc)2)(9%8u>(S?DdDO%@Q+4mB{mVBm-uvvk%g;TtsLy&HatU*S zAenf`fMT!5-8zi2ix4f{5+bJ8<^s7w*J9+42@&~&NJJT;Hf&n$Z4e%Ux>6Zg?>)i5 zj(~y-hrINWpjZHG42xxl&{SdG>K!+M%V~D`V-=mo{xxSulcS~A|5!J#tv>p@DF=yl z@1EbMmi^rMMq*=S`|-@Djq#&EU~Y8B775nFrzI^Ig&t%G`z-grqD_)U?1|t5BIH9| zwZrbVzayBW17gG_nj6pw0k6+qFOLq+o-O-Ew+3!NKV5k^@D;o|9}26y^DB5GI}U0R z3A>@HteP=7dW#gpM+i#Ff$Yh$?=%NjSa!Yad!N5+7?8hj;I&Nq|1Cb2vC_ORalMsT zEfYI0|2a7SzxehtDkz=-u75i^W(qM+5{^o zSQAolS~kqZgoe@MG_y6lQF?&uP9K)k6+jePF1u)~trJ+9N^j|h5g!V9=>Xx{LA?}g zrFNIw5@nK}4831hHb$UoY!wyure2s@E)Y&}21q#Y+54b@0$|Z3&N6T#k(8AX(k#+; zUQG$`^7NXyw_7Trug_gpne{DbMO&9e&1Mfb zuXtj0iO=W#P|HdMy(MlWZaEit8b_BF+XMEZ*#Y^>6w z!}~$Yo z92>4r)b!!e&&rUWbygrp3HvJuca1|HX(+FH{6wm_;g?@k&V7DhOkL!+$Yg~GkYm%e?@9C=xJS10I8=|G~gtw22CiAwdq9Yn?cuurRHtP zgd<$23@wzVt633{Dxq`;yKp&n%CGs7J zqpMpzj{MAQl;Nv{E#6?Dq{Si2&f&;y-Qpw;sm2gooxp8{EZI3NDvkzqsFM`zjX^wZ zA;SLN_M-ZFpIl$w7X}!(ol$JL(M-RRSS^w+s&|U-S%$=LBGnvv46Q_kMJqLIBtY){ z9>>$sEQ6TMLnV$)MN)eSzx*JCDIkA}@b46sB^W{zlaPNyMp~s5icleYeStR-7ORxH zuv6%9m1-z3B}t%XRBRju$1?o!1qN%=;H!SHh_D%YJ&whZ&=ocg9m|Y?4nzN(=)$Ml zh4RFPij|`~D`ssV*5)VLXT<+?_ueHJx4r)1f{{y{T1}WQX(Icy-0pK$Vm2(b2i^My zo9IF44w@*g&sf`<7~0;u!!Yu-50AH6V}E%4X!-f5-3PXPPpq}al1BVIecqgN&$?cU z8vXhUd!;V3V68!Kf;taK9qlmWT)Xt}(Tz9blD%6R1|;_W@5jXYP1?4^_QC$#W0k)i zl|Oso&zn6524c8$1(}fsHjaInr>B7!l@&sqX-<4b;IcvaFX65s^FjM}bKcur;=7c0 zQ<)m~`!-@7xarrc7x^K1$K;d4^QvxaN++Z^R|wYu>~Dx8*|{>gi=;A^{I+O)%Q~*I zj&OMbsP=9A5G=O!2mB&_F#&MelA2jV>WegY^TC(i77y+IdO-_Rm2I_8Wf<<8p#%g z$%kU|9Z^p|;IP(Gh7wX@-hohJB1uJV#ZZ0gAtje1v1dVV$)66%Y&-6;gR)Dw-42R4 z$!Raz-?uIPpWJ%wqG{r!jXx3VCkq$9R(f*RnJ3qNUFv`FRuF&Qjrl$LkaNj0p(Dig zgLYs4>3ZwQr}lJaL?7OfvHMYC-FGKReeBnB*9yv(^1EC*#ZAg4VC{i9mTsqP1fIH` za(HTz=H2Ozi8l_SDOZ-!r%1E%jwInT>JBp*LQ1UexLdy{yY*kiSf~#_?^g( zEA&*UByA9~s0Slq0|ASZCmdUdA&CP#sw2+bdwTjD&yZ1&MKJF~@udJOGtgvdIOrxRTBAK`s~%H%|74dh5VX4w z5%yVhBF^>%@EHvP2F_<8pC{3ETu-dJO*UKgSleG9Me{L);=e1WD1{_j^&BG$GBce^ z!vK%T8Z)PEU<^W?aMmuwYqEhDRqWqjrhC-tEwrrlNCMISL!;U((7)m_j>$APN(dw@xGcLOHDjVWfCfOsZ$X0A!)2< zA&}2?a3zKPfjs2=A3<>vo2@fbPU27igrK-cAQ_NdVfQ~XVsRC=L;Mx?^@C&R*pQA9 zLYa>4(URf7kK?ZTs$KOc&~4b@@dZ6?oROX)m{`=I#6nL3*PbhMXyK9oE8@v2_|;f& zrugKMj;%}Ie&>uYkFRYyOswji+T-51%Zq0gq;LM>`d9Y;9|3n^VE6qHT57UN5FZ7W ztkToV8u}wvkzmM@+l(ZL(dusdgG6rkSf!LniB^4Q8Ww#{!}y~CR9(-5rP)EI#V=D^LWh? z;3BAdwU=TAz}QrH&A6gi_B(v8rohsXER)yMtrBvBu8SLXYgq|sxl6$ManUqW&!#~% z(iV?EiU(sN*Q{s72vz*p23mePMiQRbTLpMXy&l+8ppQ#k)l>Os^RXeMNu_h89vSnM z+EFFf%Pmc3GT0>)D;?3@L z&Ix${s|g!8G7bklK~ZDGl9uqp1U5{JrY_peudEQ%TpJKHT5OC7`HQXv8kS6~mZ;Io z@hre|{5@@?(v{+<`WwO*K(cLiLp0llrD+^RAJdSJ*;|~NJPs@$Ydbqp^O0iuSKOeI z2=OIsIT+*nA5f@f5kdDFe zc+AO3mG-D%Gf{|DxIDq~I{zH6+xcH~{2y^_3iJxeXJiE(8Kljvu(!MfY(KuoGg|f{ z66sV3DH%4J=)lt(XOdV4=(JO?B!Cn$(#(p81s@|bJPoMq)vf-3+=z1X{74J+hI(XS za>!JX69Y8CR5pVm+*Lg=Efr>BtIrzjpBX--k33kfz}XG08&_eQ%X>P11VFWoeiN;w+`&EC`W>h zH^|+<53{v+2Y4 zJez?Wa!(BVos34?7?z4fh$jDC({$@D4jTsXd@-|L?Vz|mDs8G zH+el@V@INnbQfvYnc9JWXaw#Hv;@e^C*LjgzxB<^kujsIpFLe01f6W21Y2DsivP{9 z`P9bCQUCfjdcxbA{K^@P&lCNLUkdrSnS=5=rX`z47Pa`$8pJc~Cb`$L?5UwQ_UI0{ z-j086)!Wl*U{Ju-_@?q#!|>~a_T6i&Tshr-q4AHmTbldT7q{<8|EkvzAX7kls$r$E(dB8hx72+B*EB9?i=e;cZhpmb`hohq z0mmjaz19ZWChW_)wkxis-iLpo2z`=&Xg3|%{`tJ6gGc@FOwDbOntZNCRO9#IYK_S$ UUtYW}zB%X5?>ElV-}(anAN~2X(f|Me diff --git a/dbms/programs/server/data/system/metric_log/202004_524_524_0/columns.txt b/dbms/programs/server/data/system/metric_log/202004_524_524_0/columns.txt deleted file mode 100644 index 6b901df244b..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_524_524_0/columns.txt +++ /dev/null @@ -1,213 +0,0 @@ -columns format version: 1 -211 columns: -`event_date` Date -`event_time` DateTime -`milliseconds` UInt64 -`ProfileEvent_Query` UInt64 -`ProfileEvent_SelectQuery` UInt64 -`ProfileEvent_InsertQuery` UInt64 -`ProfileEvent_FileOpen` UInt64 -`ProfileEvent_Seek` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorRead` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadFailed` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadBytes` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWrite` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteFailed` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteBytes` UInt64 -`ProfileEvent_ReadBufferAIORead` UInt64 -`ProfileEvent_ReadBufferAIOReadBytes` UInt64 -`ProfileEvent_WriteBufferAIOWrite` UInt64 -`ProfileEvent_WriteBufferAIOWriteBytes` UInt64 -`ProfileEvent_ReadCompressedBytes` UInt64 -`ProfileEvent_CompressedReadBufferBlocks` UInt64 -`ProfileEvent_CompressedReadBufferBytes` UInt64 -`ProfileEvent_UncompressedCacheHits` UInt64 -`ProfileEvent_UncompressedCacheMisses` UInt64 -`ProfileEvent_UncompressedCacheWeightLost` UInt64 -`ProfileEvent_IOBufferAllocs` UInt64 -`ProfileEvent_IOBufferAllocBytes` UInt64 -`ProfileEvent_ArenaAllocChunks` UInt64 -`ProfileEvent_ArenaAllocBytes` UInt64 -`ProfileEvent_FunctionExecute` UInt64 -`ProfileEvent_TableFunctionExecute` UInt64 -`ProfileEvent_MarkCacheHits` UInt64 -`ProfileEvent_MarkCacheMisses` UInt64 -`ProfileEvent_CreatedReadBufferOrdinary` UInt64 -`ProfileEvent_CreatedReadBufferAIO` UInt64 -`ProfileEvent_CreatedReadBufferAIOFailed` UInt64 -`ProfileEvent_CreatedReadBufferMMap` UInt64 -`ProfileEvent_CreatedReadBufferMMapFailed` UInt64 -`ProfileEvent_CreatedWriteBufferOrdinary` UInt64 -`ProfileEvent_CreatedWriteBufferAIO` UInt64 -`ProfileEvent_CreatedWriteBufferAIOFailed` UInt64 -`ProfileEvent_DiskReadElapsedMicroseconds` UInt64 -`ProfileEvent_DiskWriteElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkReceiveElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkSendElapsedMicroseconds` UInt64 -`ProfileEvent_ThrottlerSleepMicroseconds` UInt64 -`ProfileEvent_QueryMaskingRulesMatch` UInt64 -`ProfileEvent_ReplicatedPartFetches` UInt64 -`ProfileEvent_ReplicatedPartFailedFetches` UInt64 -`ProfileEvent_ObsoleteReplicatedParts` UInt64 -`ProfileEvent_ReplicatedPartMerges` UInt64 -`ProfileEvent_ReplicatedPartFetchesOfMerged` UInt64 -`ProfileEvent_ReplicatedPartMutations` UInt64 -`ProfileEvent_ReplicatedPartChecks` UInt64 -`ProfileEvent_ReplicatedPartChecksFailed` UInt64 -`ProfileEvent_ReplicatedDataLoss` UInt64 -`ProfileEvent_InsertedRows` UInt64 -`ProfileEvent_InsertedBytes` UInt64 -`ProfileEvent_DelayedInserts` UInt64 -`ProfileEvent_RejectedInserts` UInt64 -`ProfileEvent_DelayedInsertsMilliseconds` UInt64 -`ProfileEvent_DuplicatedInsertedBlocks` UInt64 -`ProfileEvent_ZooKeeperInit` UInt64 -`ProfileEvent_ZooKeeperTransactions` UInt64 -`ProfileEvent_ZooKeeperList` UInt64 -`ProfileEvent_ZooKeeperCreate` UInt64 -`ProfileEvent_ZooKeeperRemove` UInt64 -`ProfileEvent_ZooKeeperExists` UInt64 -`ProfileEvent_ZooKeeperGet` UInt64 -`ProfileEvent_ZooKeeperSet` UInt64 -`ProfileEvent_ZooKeeperMulti` UInt64 -`ProfileEvent_ZooKeeperCheck` UInt64 -`ProfileEvent_ZooKeeperClose` UInt64 -`ProfileEvent_ZooKeeperWatchResponse` UInt64 -`ProfileEvent_ZooKeeperUserExceptions` UInt64 -`ProfileEvent_ZooKeeperHardwareExceptions` UInt64 -`ProfileEvent_ZooKeeperOtherExceptions` UInt64 -`ProfileEvent_ZooKeeperWaitMicroseconds` UInt64 -`ProfileEvent_ZooKeeperBytesSent` UInt64 -`ProfileEvent_ZooKeeperBytesReceived` UInt64 -`ProfileEvent_DistributedConnectionFailTry` UInt64 -`ProfileEvent_DistributedConnectionMissingTable` UInt64 -`ProfileEvent_DistributedConnectionStaleReplica` UInt64 -`ProfileEvent_DistributedConnectionFailAtAll` UInt64 -`ProfileEvent_CompileAttempt` UInt64 -`ProfileEvent_CompileSuccess` UInt64 -`ProfileEvent_CompileFunction` UInt64 -`ProfileEvent_CompiledFunctionExecute` UInt64 -`ProfileEvent_CompileExpressionsMicroseconds` UInt64 -`ProfileEvent_CompileExpressionsBytes` UInt64 -`ProfileEvent_ExternalSortWritePart` UInt64 -`ProfileEvent_ExternalSortMerge` UInt64 -`ProfileEvent_ExternalAggregationWritePart` UInt64 -`ProfileEvent_ExternalAggregationMerge` UInt64 -`ProfileEvent_ExternalAggregationCompressedBytes` UInt64 -`ProfileEvent_ExternalAggregationUncompressedBytes` UInt64 -`ProfileEvent_SlowRead` UInt64 -`ProfileEvent_ReadBackoff` UInt64 -`ProfileEvent_ReplicaYieldLeadership` UInt64 -`ProfileEvent_ReplicaPartialShutdown` UInt64 -`ProfileEvent_SelectedParts` UInt64 -`ProfileEvent_SelectedRanges` UInt64 -`ProfileEvent_SelectedMarks` UInt64 -`ProfileEvent_Merge` UInt64 -`ProfileEvent_MergedRows` UInt64 -`ProfileEvent_MergedUncompressedBytes` UInt64 -`ProfileEvent_MergesTimeMilliseconds` UInt64 -`ProfileEvent_MergeTreeDataWriterRows` UInt64 -`ProfileEvent_MergeTreeDataWriterUncompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterCompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocks` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocksAlreadySorted` UInt64 -`ProfileEvent_CannotRemoveEphemeralNode` UInt64 -`ProfileEvent_LeaderElectionAcquiredLeadership` UInt64 -`ProfileEvent_RegexpCreated` UInt64 -`ProfileEvent_ContextLock` UInt64 -`ProfileEvent_StorageBufferFlush` UInt64 -`ProfileEvent_StorageBufferErrorOnFlush` UInt64 -`ProfileEvent_StorageBufferPassedAllMinThresholds` UInt64 -`ProfileEvent_StorageBufferPassedTimeMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedRowsMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedBytesMaxThreshold` UInt64 -`ProfileEvent_DictCacheKeysRequested` UInt64 -`ProfileEvent_DictCacheKeysRequestedMiss` UInt64 -`ProfileEvent_DictCacheKeysRequestedFound` UInt64 -`ProfileEvent_DictCacheKeysExpired` UInt64 -`ProfileEvent_DictCacheKeysNotFound` UInt64 -`ProfileEvent_DictCacheKeysHit` UInt64 -`ProfileEvent_DictCacheRequestTimeNs` UInt64 -`ProfileEvent_DictCacheRequests` UInt64 -`ProfileEvent_DictCacheLockWriteNs` UInt64 -`ProfileEvent_DictCacheLockReadNs` UInt64 -`ProfileEvent_DistributedSyncInsertionTimeoutExceeded` UInt64 -`ProfileEvent_DataAfterMergeDiffersFromReplica` UInt64 -`ProfileEvent_DataAfterMutationDiffersFromReplica` UInt64 -`ProfileEvent_PolygonsAddedToPool` UInt64 -`ProfileEvent_PolygonsInPoolAllocatedBytes` UInt64 -`ProfileEvent_RWLockAcquiredReadLocks` UInt64 -`ProfileEvent_RWLockAcquiredWriteLocks` UInt64 -`ProfileEvent_RWLockReadersWaitMilliseconds` UInt64 -`ProfileEvent_RWLockWritersWaitMilliseconds` UInt64 -`ProfileEvent_DNSError` UInt64 -`ProfileEvent_RealTimeMicroseconds` UInt64 -`ProfileEvent_UserTimeMicroseconds` UInt64 -`ProfileEvent_SystemTimeMicroseconds` UInt64 -`ProfileEvent_SoftPageFaults` UInt64 -`ProfileEvent_HardPageFaults` UInt64 -`ProfileEvent_VoluntaryContextSwitches` UInt64 -`ProfileEvent_InvoluntaryContextSwitches` UInt64 -`ProfileEvent_OSIOWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUVirtualTimeMicroseconds` UInt64 -`ProfileEvent_OSReadBytes` UInt64 -`ProfileEvent_OSWriteBytes` UInt64 -`ProfileEvent_OSReadChars` UInt64 -`ProfileEvent_OSWriteChars` UInt64 -`ProfileEvent_CreatedHTTPConnections` UInt64 -`ProfileEvent_CannotWriteToWriteBufferDiscard` UInt64 -`ProfileEvent_QueryProfilerSignalOverruns` UInt64 -`CurrentMetric_Query` Int64 -`CurrentMetric_Merge` Int64 -`CurrentMetric_PartMutation` Int64 -`CurrentMetric_ReplicatedFetch` Int64 -`CurrentMetric_ReplicatedSend` Int64 -`CurrentMetric_ReplicatedChecks` Int64 -`CurrentMetric_BackgroundPoolTask` Int64 -`CurrentMetric_BackgroundMovePoolTask` Int64 -`CurrentMetric_BackgroundSchedulePoolTask` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueBatches` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueKeys` Int64 -`CurrentMetric_DiskSpaceReservedForMerge` Int64 -`CurrentMetric_DistributedSend` Int64 -`CurrentMetric_QueryPreempted` Int64 -`CurrentMetric_TCPConnection` Int64 -`CurrentMetric_MySQLConnection` Int64 -`CurrentMetric_HTTPConnection` Int64 -`CurrentMetric_InterserverConnection` Int64 -`CurrentMetric_OpenFileForRead` Int64 -`CurrentMetric_OpenFileForWrite` Int64 -`CurrentMetric_Read` Int64 -`CurrentMetric_Write` Int64 -`CurrentMetric_SendScalars` Int64 -`CurrentMetric_SendExternalTables` Int64 -`CurrentMetric_QueryThread` Int64 -`CurrentMetric_ReadonlyReplica` Int64 -`CurrentMetric_LeaderReplica` Int64 -`CurrentMetric_MemoryTracking` Int64 -`CurrentMetric_MemoryTrackingInBackgroundProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundMoveProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundSchedulePool` Int64 -`CurrentMetric_MemoryTrackingForMerges` Int64 -`CurrentMetric_LeaderElection` Int64 -`CurrentMetric_EphemeralNode` Int64 -`CurrentMetric_ZooKeeperSession` Int64 -`CurrentMetric_ZooKeeperWatch` Int64 -`CurrentMetric_ZooKeeperRequest` Int64 -`CurrentMetric_DelayedInserts` Int64 -`CurrentMetric_ContextLockWait` Int64 -`CurrentMetric_StorageBufferRows` Int64 -`CurrentMetric_StorageBufferBytes` Int64 -`CurrentMetric_DictCacheRequests` Int64 -`CurrentMetric_Revision` Int64 -`CurrentMetric_VersionInteger` Int64 -`CurrentMetric_RWLockWaitingReaders` Int64 -`CurrentMetric_RWLockWaitingWriters` Int64 -`CurrentMetric_RWLockActiveReaders` Int64 -`CurrentMetric_RWLockActiveWriters` Int64 -`CurrentMetric_GlobalThread` Int64 -`CurrentMetric_GlobalThreadActive` Int64 -`CurrentMetric_LocalThread` Int64 -`CurrentMetric_LocalThreadActive` Int64 -`CurrentMetric_DistributedFilesToInsert` Int64 diff --git a/dbms/programs/server/data/system/metric_log/202004_524_524_0/count.txt b/dbms/programs/server/data/system/metric_log/202004_524_524_0/count.txt deleted file mode 100644 index 301160a9306..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_524_524_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -8 \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_524_524_0/minmax_event_date.idx b/dbms/programs/server/data/system/metric_log/202004_524_524_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_524_524_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_524_524_0/partition.dat b/dbms/programs/server/data/system/metric_log/202004_524_524_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/metric_log/202004_524_524_0/primary.idx b/dbms/programs/server/data/system/metric_log/202004_524_524_0/primary.idx deleted file mode 100644 index e28d01e380f..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_524_524_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_525_525_0/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_525_525_0/checksums.txt deleted file mode 100644 index 2ed8405afcf801e2198f75c3be6f774997770908..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6553 zcmYLOX<$=T7C!eaS?)`6Lz|>cOPdy2+Crf%6lmF+(gg~%DNUe&U6|oa=hu7pzTEAc?|kRGC$2fN zt0@?64yMia2b!Isv{pF~^!R~mN9Tjg56-|KJ&n+DAC_%afd*PnXvE!P`mr|N^> zezx!`vA%wN+lwo|`|h<{o7{H}56V27GXdf{RLy~=f%#*;8~@XVdadHqnOAVXwRjzx1yK6MGc+q(N5bh%0mAr*e-ggr{%@ z+9$QRogsN*SPsjhogw@b+(ZnP!g@$yFW_YPW58H4UT%LLjFtiiB(Z0JPfjm~tA1Zd zZVOHDyPE2qo=^sk=~V#|TY$@?4TkgOve(%zyQ_RbIS}HAQQ8MmIj}Yy-MbA6aQR%$ zUnkEC%fSc0BrWD)E=R`zTLgU1ev6<(6>PG%I9+m`j1#oV?n-~4S`IYI3y48l&NCY* z*;?RJ`mTi=fCiow4&iS+UODLSuOn;?7<6ZV)d0^{1Ftzk6r;dqmwoPsz##pR(^Np` zMlA~Rc}2?{xmgZ4y)}OKD%|H$j+Oyy#bczl!uBy<|1781F()89FT))}_BT8Fty#5Z z%f%f({(NWRhCj|F)?n+ZNAq&uUNe5khRO?D(vH3h>e0YLz)QP0)j*h7iiU2z8X4O< zVQ~BGUyqk<*fmDF)37I-SjW!0dGJ7cO-98A+xvOn2ipr3Y%D;;iBcAMY^=jkTjuxq zudyo?wfn&Ks zSX?K!cs(v>m>Ad|;Dp#YQ$nN&{dk^ZDQdlx&Z|m+1#qUXMP&gf&T{B9y_Cu8qTZ2u zldIKovp>-82q30CzQ#t}+nKJtN#&Flf?!jyiAtl?L&2&Mo_|DZj=DkUNa$DP>nlAP z@Vn$-5Qk<{mWIeFnFZNwC!%dw9T;_QL5Hz#$$-YGi|+rzp-NwJdG6I^MZ`Ml(@EQB zA8x8%7YBbD;I2p+em`We*Ktr$4H#8RB{mIbDxMC#*cQZ4!USOdP$gIrhXY}=5d*or zt=&Fx!r#SIDPI)@K%CXqBKz{x`r=LB>iAXZ9tZrTSD?`HFFTXw6@!*y_BR7 z6oj%cy5b4a<)L@gX?69Nv|Nf3_HcP}AX{Nf>a&GsRe0v5kV+PE_$Uyn03UqB<#1V7 zf@T*<7!wKE0^ddL9N7<_1AB@z58_{XTDdF=gLDy!5LOvzB%+kSlZ@ z#%NiDPw&S#>nm!b*3#Yr;SbPIDI@uOBpBF!P;k-slnx1s1;EN!BHM?jkJ3laqzT-h zR!=Zd(P7MSoEc3HR9?UP zV-ftobSk}cL)C?y8A8i6zkj?exBQb4!N}$zvJ$zdb(8gWosKIM*k#ZoG({nX&=W*_ z8}#+@iR>2s`UHtCA#|u}1OC}aqZO@~x*AF`fjLXPUcYM}Z?+)o(+GjHLJ)0bbHYAR zX|nWc2Z4{e%iMP`d|u}C`TU_eB#2hI!I*C8R|AF!2V3$-0gzNa-QiaPH%|1Idx9=! z;F|RRw31{ghe!R*Ex3GXDAcZDYdC`?tqjx=HnwCB0y4UyttIdvZwWrhnJtl|$X4N( zyb+-Ju-fAa_=B>`?{m*m7^O>un@E(WL2U=N9=AM^aJI^@&-J4&k+$;uN)V%HVj7Oi zw})LB@%WY#W5h_O!TlOERJ6&i@M482(xF8{!PTS-e7e7REte*{f0nox_>$3H|86NE zCHFoKkSc9R-hj2^d{t>ul<46cjrEtxBT zD70LDX_c)5Se8m}>4V`OiFfH8!tDk1Qm|DzJzh(kNqRi8zs|ggK-Ji)tDP-9Fcn-N zoZ=LaXyCJtK?B2p#gimU|L2LMtc8elkT&xgJF(gVZqIM&ED=XtBN{P!EcO!Nl+#sg zJC_i7jdBVfd8IhfOC*og0vDNt)!`4VAlgYs0t%l2Dt^+3s}~e6-|sHEBs8)8#M*H9 z_r!sZUfX*1n~yeLi~s5LdC-d2NW`LMvyWR>USqEa1pI*%1V++GoNzAiG>I-PcZQs$ zvqSQU^t5u%PG%2Q2K>z%dY}~yNduPxn*>_+93rK#0>S!e0^10EEQ6;&DtigX3u@_T zZXBkb#CK^r^*#^C>5GQAVsSjeK#S;i9O!Nr>`8Gnd zJl#jt#b6m5*C&C-b3#fc90Z8Q2HOEDYmGlt>CRz!yn$AV>Q6w7nS2siwrWi5JoF&8PTW~CW^gx&*Es=}Kv!Pa%n5u?uEgPKUIqyaTQs^pHOGmQIUd(v zL5!B(ecWe-gm*CTj`)K++%Ex{2S8?H~)^y1OW%7~YBRv-rmdklo%j04#9mN%=^ z`*saYzx8~|pXt9*FMa!%tzhn+r1bS?#3RYaRu33tfX9GMM65rKCY;$rNplpL>7b_# zr6J^=deMNL%nq7JAZxRUFt0CN4;IIJk_lTlP#GC0-KS=yKwz~{fC;W4iS;9fo`XIl zx~D-s8Q2Es(_jxdy>c~8#)dxHhuYy=5J%hFeXgS1eAL6Mg%|zdP(_rmM!*xLm>Z6m_2p3c(7#(>;7wm0-Ma9iWpa-*4^lvq8| zF3M<%8y$nx@L8rg@){Y5nuykF*hqlElYFkH;#pr}HV=_FHWg{@75w;V1Xn;_7vY-> zmM0h@9TSm(BUW0i6iQLh`U9ce2#wWBUDPr3ph`6an3AN>Qz|wN17sQg_yU7J1T%9u2jeLmJ1aMxl!taOIGI|rKR9_R|3IIdTfZEXVG+w-_3iq(eKvoidSJ-+Pi+WtylRKkX%-ay^d_i>0%@SP4Fi^Jdx)L7 z@A$S43&uCS#Wp`ktg4y!M!b|cV|U5$-+!KU+*)w6d%;iwm#rYvv%to&6GQbhl%TRg zgf-2nPYYZ=D8CZ!81m*HKy`0ExO<%{XWl1!vK}JV(WlxRXRP_v9r1gQWX=iJWfM|9 zScocj_9r0evU_EECr$Myg*zr>y97DmurOzP{tY zF5pR2G}Ch3jw)ZP-y8OY@N#uLvBW;#^UP2)C7Nyu*&I(FDX^Bh=7s0>;7m{woa331 zV5H+IR|3L?0%R+Quoaz3N~@iLrYS@6#(I9>O;pFx=>BLXFAfBFg|hoIO!P}3<{7i; zHpjLq%$9^N zb@R0Whb~Urb?nJ{{COAVx9cO$CCdc%S0SF>q-+9S1u%uu&6JJ6DK}G&9mU;XGlu!p zjUI;(4>M(M2)Tp*>);m(@<1 z>t=qYw$+&ZM9e(rorQ zMiypfI+sS#nUGgDr=foW;*@aKPPl6Fff!Zn8!*%DYIP@=5|M3~Qi#XP7{|rn^*148 zn-%nX<@ZdbSXZ=A6ljZtE&_9Z>7{wm15H!BV$dc02sriV~@bN2N<5cT4IP zgd#|*@;w0LOC4OvV0V#3-2Z16PGYllX39w%=I;oO773&;l5Awnco(d}9?9QbkE)qvg8i2B-wWs*SHw~8~;BLp*pI+7XaVc_--7GBlDMFCbM zlhyExvHzMw$7antX2KL%?0=-*rBO zj+(3%#D{^UtMv4;hCU3+YUt$Cqa25PO36U1(S#LDo1RMNs#qPCC7y%?@;AW8Fl>tW zDBg_J^2c0*V-O&@)DY-ncs`BC52JT7ZEbiicw0kHjNlA~!rbLeL`7 z=89%%9>`Sd+won*aXTJsE^}p9XvO$cdQTC%SZJmQWPJ*(q#t^m zUi+MI$nBr+`xOk>j}GS0!HIQKJ+jxm*U0V~6S2){q6gMICzAaT0pcb@-UR>TOp~UR z*G6wZn*VabP=Gar^&gppjqZSO7+En8zF1(zVm$TGE@EUQpypNqL4zeGn2>nrI-pU# z!|I6|J)F!!JjdVD#xAK+5>=lj{4hwj&2EZkui~vTiK2gL#J}uaPE8&G=204%lc@RF zCHNPdpq2>96>K>elXJ>N)hwcjN_(?$1eDlid9lMt7wSkcuvuvL8#K)>@0>saTDA#M zaw8VOJf%uZFL(`bB(Yjz#AejUW>AP&{^`fXatZpbq|WKv$0gAHO5qalF^QR*Znb;; z^ZTgTVPc@U>9L$FT>u1RXlGh(S4tI``b{gjRj}CRtMrD0bFL#CUr=-rI6b3Oj{-c5 z8Q;2-X)qa+XQx)7~Bo$;~Jg&yBQD zf20}}<%L`o`K+HNoXMtBM7pXQqLso-EUNiKq1y(V7jLc;lhWv`-AJGZLA5!Bd3&YM z3#6M~KPOt`qd)XYGUssWE;^v&iFO36tCu~EbFdm|sxZ-OQa9ne&eyT0Z+`B!ayB_3T}gcS zx_f8w8J@U!hp8;$u9&m~%zFb9$2|uP6Yhp&~5xX6- zkJ8~0l>5?V8tFm48!@9aBFe9);@Fc2gH&x4e90wVf3T0rh!iN2C~nyxeC;q|J;6Yi z<|r-$*C`8=N-Eks2ZknKeL&!2LNQJ+z7Ba~WCtdJ-uh@xe4kXPxg1$gdRY zTfQH|uHYVjRCaj{D?mbCl}{dNVrjfUI^!rQZ-VEX%7lRx>)_cy5E`n(-jL@j&cIT5 zfs1a?oU=w0Um*Fm(41#E_7mwz-|u!a+Y17xt!4!8ue12BjYeKA_E~&?h$Y#1RLmky zEN}XP&eHL0Iu>^BDFk)wwsu(|z!-`BhorJ{PLQJ9!3J>xH#VIe&Q7JHbmBCkjopoF zh(La=Mb-_r_@YReRV#5!h0Kn%IVD;iT<;nOZcRj1w(I7OE-@!Z400^h_Co2x!1Pr zi6J+(>)!Fam;Bu7_og|ZAmnM@t^CC>eCehG2Nn&J=a;!(DY`N<>jFvp#|wp9K00^z zs`B$m&x}gCbj$$+r*Xg?Vfun&U%dWCQU7CxF$c~y9wWsKd5V8WB>pF diff --git a/dbms/programs/server/data/system/metric_log/202004_525_525_0/columns.txt b/dbms/programs/server/data/system/metric_log/202004_525_525_0/columns.txt deleted file mode 100644 index 6b901df244b..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_525_525_0/columns.txt +++ /dev/null @@ -1,213 +0,0 @@ -columns format version: 1 -211 columns: -`event_date` Date -`event_time` DateTime -`milliseconds` UInt64 -`ProfileEvent_Query` UInt64 -`ProfileEvent_SelectQuery` UInt64 -`ProfileEvent_InsertQuery` UInt64 -`ProfileEvent_FileOpen` UInt64 -`ProfileEvent_Seek` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorRead` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadFailed` UInt64 -`ProfileEvent_ReadBufferFromFileDescriptorReadBytes` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWrite` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteFailed` UInt64 -`ProfileEvent_WriteBufferFromFileDescriptorWriteBytes` UInt64 -`ProfileEvent_ReadBufferAIORead` UInt64 -`ProfileEvent_ReadBufferAIOReadBytes` UInt64 -`ProfileEvent_WriteBufferAIOWrite` UInt64 -`ProfileEvent_WriteBufferAIOWriteBytes` UInt64 -`ProfileEvent_ReadCompressedBytes` UInt64 -`ProfileEvent_CompressedReadBufferBlocks` UInt64 -`ProfileEvent_CompressedReadBufferBytes` UInt64 -`ProfileEvent_UncompressedCacheHits` UInt64 -`ProfileEvent_UncompressedCacheMisses` UInt64 -`ProfileEvent_UncompressedCacheWeightLost` UInt64 -`ProfileEvent_IOBufferAllocs` UInt64 -`ProfileEvent_IOBufferAllocBytes` UInt64 -`ProfileEvent_ArenaAllocChunks` UInt64 -`ProfileEvent_ArenaAllocBytes` UInt64 -`ProfileEvent_FunctionExecute` UInt64 -`ProfileEvent_TableFunctionExecute` UInt64 -`ProfileEvent_MarkCacheHits` UInt64 -`ProfileEvent_MarkCacheMisses` UInt64 -`ProfileEvent_CreatedReadBufferOrdinary` UInt64 -`ProfileEvent_CreatedReadBufferAIO` UInt64 -`ProfileEvent_CreatedReadBufferAIOFailed` UInt64 -`ProfileEvent_CreatedReadBufferMMap` UInt64 -`ProfileEvent_CreatedReadBufferMMapFailed` UInt64 -`ProfileEvent_CreatedWriteBufferOrdinary` UInt64 -`ProfileEvent_CreatedWriteBufferAIO` UInt64 -`ProfileEvent_CreatedWriteBufferAIOFailed` UInt64 -`ProfileEvent_DiskReadElapsedMicroseconds` UInt64 -`ProfileEvent_DiskWriteElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkReceiveElapsedMicroseconds` UInt64 -`ProfileEvent_NetworkSendElapsedMicroseconds` UInt64 -`ProfileEvent_ThrottlerSleepMicroseconds` UInt64 -`ProfileEvent_QueryMaskingRulesMatch` UInt64 -`ProfileEvent_ReplicatedPartFetches` UInt64 -`ProfileEvent_ReplicatedPartFailedFetches` UInt64 -`ProfileEvent_ObsoleteReplicatedParts` UInt64 -`ProfileEvent_ReplicatedPartMerges` UInt64 -`ProfileEvent_ReplicatedPartFetchesOfMerged` UInt64 -`ProfileEvent_ReplicatedPartMutations` UInt64 -`ProfileEvent_ReplicatedPartChecks` UInt64 -`ProfileEvent_ReplicatedPartChecksFailed` UInt64 -`ProfileEvent_ReplicatedDataLoss` UInt64 -`ProfileEvent_InsertedRows` UInt64 -`ProfileEvent_InsertedBytes` UInt64 -`ProfileEvent_DelayedInserts` UInt64 -`ProfileEvent_RejectedInserts` UInt64 -`ProfileEvent_DelayedInsertsMilliseconds` UInt64 -`ProfileEvent_DuplicatedInsertedBlocks` UInt64 -`ProfileEvent_ZooKeeperInit` UInt64 -`ProfileEvent_ZooKeeperTransactions` UInt64 -`ProfileEvent_ZooKeeperList` UInt64 -`ProfileEvent_ZooKeeperCreate` UInt64 -`ProfileEvent_ZooKeeperRemove` UInt64 -`ProfileEvent_ZooKeeperExists` UInt64 -`ProfileEvent_ZooKeeperGet` UInt64 -`ProfileEvent_ZooKeeperSet` UInt64 -`ProfileEvent_ZooKeeperMulti` UInt64 -`ProfileEvent_ZooKeeperCheck` UInt64 -`ProfileEvent_ZooKeeperClose` UInt64 -`ProfileEvent_ZooKeeperWatchResponse` UInt64 -`ProfileEvent_ZooKeeperUserExceptions` UInt64 -`ProfileEvent_ZooKeeperHardwareExceptions` UInt64 -`ProfileEvent_ZooKeeperOtherExceptions` UInt64 -`ProfileEvent_ZooKeeperWaitMicroseconds` UInt64 -`ProfileEvent_ZooKeeperBytesSent` UInt64 -`ProfileEvent_ZooKeeperBytesReceived` UInt64 -`ProfileEvent_DistributedConnectionFailTry` UInt64 -`ProfileEvent_DistributedConnectionMissingTable` UInt64 -`ProfileEvent_DistributedConnectionStaleReplica` UInt64 -`ProfileEvent_DistributedConnectionFailAtAll` UInt64 -`ProfileEvent_CompileAttempt` UInt64 -`ProfileEvent_CompileSuccess` UInt64 -`ProfileEvent_CompileFunction` UInt64 -`ProfileEvent_CompiledFunctionExecute` UInt64 -`ProfileEvent_CompileExpressionsMicroseconds` UInt64 -`ProfileEvent_CompileExpressionsBytes` UInt64 -`ProfileEvent_ExternalSortWritePart` UInt64 -`ProfileEvent_ExternalSortMerge` UInt64 -`ProfileEvent_ExternalAggregationWritePart` UInt64 -`ProfileEvent_ExternalAggregationMerge` UInt64 -`ProfileEvent_ExternalAggregationCompressedBytes` UInt64 -`ProfileEvent_ExternalAggregationUncompressedBytes` UInt64 -`ProfileEvent_SlowRead` UInt64 -`ProfileEvent_ReadBackoff` UInt64 -`ProfileEvent_ReplicaYieldLeadership` UInt64 -`ProfileEvent_ReplicaPartialShutdown` UInt64 -`ProfileEvent_SelectedParts` UInt64 -`ProfileEvent_SelectedRanges` UInt64 -`ProfileEvent_SelectedMarks` UInt64 -`ProfileEvent_Merge` UInt64 -`ProfileEvent_MergedRows` UInt64 -`ProfileEvent_MergedUncompressedBytes` UInt64 -`ProfileEvent_MergesTimeMilliseconds` UInt64 -`ProfileEvent_MergeTreeDataWriterRows` UInt64 -`ProfileEvent_MergeTreeDataWriterUncompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterCompressedBytes` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocks` UInt64 -`ProfileEvent_MergeTreeDataWriterBlocksAlreadySorted` UInt64 -`ProfileEvent_CannotRemoveEphemeralNode` UInt64 -`ProfileEvent_LeaderElectionAcquiredLeadership` UInt64 -`ProfileEvent_RegexpCreated` UInt64 -`ProfileEvent_ContextLock` UInt64 -`ProfileEvent_StorageBufferFlush` UInt64 -`ProfileEvent_StorageBufferErrorOnFlush` UInt64 -`ProfileEvent_StorageBufferPassedAllMinThresholds` UInt64 -`ProfileEvent_StorageBufferPassedTimeMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedRowsMaxThreshold` UInt64 -`ProfileEvent_StorageBufferPassedBytesMaxThreshold` UInt64 -`ProfileEvent_DictCacheKeysRequested` UInt64 -`ProfileEvent_DictCacheKeysRequestedMiss` UInt64 -`ProfileEvent_DictCacheKeysRequestedFound` UInt64 -`ProfileEvent_DictCacheKeysExpired` UInt64 -`ProfileEvent_DictCacheKeysNotFound` UInt64 -`ProfileEvent_DictCacheKeysHit` UInt64 -`ProfileEvent_DictCacheRequestTimeNs` UInt64 -`ProfileEvent_DictCacheRequests` UInt64 -`ProfileEvent_DictCacheLockWriteNs` UInt64 -`ProfileEvent_DictCacheLockReadNs` UInt64 -`ProfileEvent_DistributedSyncInsertionTimeoutExceeded` UInt64 -`ProfileEvent_DataAfterMergeDiffersFromReplica` UInt64 -`ProfileEvent_DataAfterMutationDiffersFromReplica` UInt64 -`ProfileEvent_PolygonsAddedToPool` UInt64 -`ProfileEvent_PolygonsInPoolAllocatedBytes` UInt64 -`ProfileEvent_RWLockAcquiredReadLocks` UInt64 -`ProfileEvent_RWLockAcquiredWriteLocks` UInt64 -`ProfileEvent_RWLockReadersWaitMilliseconds` UInt64 -`ProfileEvent_RWLockWritersWaitMilliseconds` UInt64 -`ProfileEvent_DNSError` UInt64 -`ProfileEvent_RealTimeMicroseconds` UInt64 -`ProfileEvent_UserTimeMicroseconds` UInt64 -`ProfileEvent_SystemTimeMicroseconds` UInt64 -`ProfileEvent_SoftPageFaults` UInt64 -`ProfileEvent_HardPageFaults` UInt64 -`ProfileEvent_VoluntaryContextSwitches` UInt64 -`ProfileEvent_InvoluntaryContextSwitches` UInt64 -`ProfileEvent_OSIOWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUWaitMicroseconds` UInt64 -`ProfileEvent_OSCPUVirtualTimeMicroseconds` UInt64 -`ProfileEvent_OSReadBytes` UInt64 -`ProfileEvent_OSWriteBytes` UInt64 -`ProfileEvent_OSReadChars` UInt64 -`ProfileEvent_OSWriteChars` UInt64 -`ProfileEvent_CreatedHTTPConnections` UInt64 -`ProfileEvent_CannotWriteToWriteBufferDiscard` UInt64 -`ProfileEvent_QueryProfilerSignalOverruns` UInt64 -`CurrentMetric_Query` Int64 -`CurrentMetric_Merge` Int64 -`CurrentMetric_PartMutation` Int64 -`CurrentMetric_ReplicatedFetch` Int64 -`CurrentMetric_ReplicatedSend` Int64 -`CurrentMetric_ReplicatedChecks` Int64 -`CurrentMetric_BackgroundPoolTask` Int64 -`CurrentMetric_BackgroundMovePoolTask` Int64 -`CurrentMetric_BackgroundSchedulePoolTask` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueBatches` Int64 -`CurrentMetric_CacheDictionaryUpdateQueueKeys` Int64 -`CurrentMetric_DiskSpaceReservedForMerge` Int64 -`CurrentMetric_DistributedSend` Int64 -`CurrentMetric_QueryPreempted` Int64 -`CurrentMetric_TCPConnection` Int64 -`CurrentMetric_MySQLConnection` Int64 -`CurrentMetric_HTTPConnection` Int64 -`CurrentMetric_InterserverConnection` Int64 -`CurrentMetric_OpenFileForRead` Int64 -`CurrentMetric_OpenFileForWrite` Int64 -`CurrentMetric_Read` Int64 -`CurrentMetric_Write` Int64 -`CurrentMetric_SendScalars` Int64 -`CurrentMetric_SendExternalTables` Int64 -`CurrentMetric_QueryThread` Int64 -`CurrentMetric_ReadonlyReplica` Int64 -`CurrentMetric_LeaderReplica` Int64 -`CurrentMetric_MemoryTracking` Int64 -`CurrentMetric_MemoryTrackingInBackgroundProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundMoveProcessingPool` Int64 -`CurrentMetric_MemoryTrackingInBackgroundSchedulePool` Int64 -`CurrentMetric_MemoryTrackingForMerges` Int64 -`CurrentMetric_LeaderElection` Int64 -`CurrentMetric_EphemeralNode` Int64 -`CurrentMetric_ZooKeeperSession` Int64 -`CurrentMetric_ZooKeeperWatch` Int64 -`CurrentMetric_ZooKeeperRequest` Int64 -`CurrentMetric_DelayedInserts` Int64 -`CurrentMetric_ContextLockWait` Int64 -`CurrentMetric_StorageBufferRows` Int64 -`CurrentMetric_StorageBufferBytes` Int64 -`CurrentMetric_DictCacheRequests` Int64 -`CurrentMetric_Revision` Int64 -`CurrentMetric_VersionInteger` Int64 -`CurrentMetric_RWLockWaitingReaders` Int64 -`CurrentMetric_RWLockWaitingWriters` Int64 -`CurrentMetric_RWLockActiveReaders` Int64 -`CurrentMetric_RWLockActiveWriters` Int64 -`CurrentMetric_GlobalThread` Int64 -`CurrentMetric_GlobalThreadActive` Int64 -`CurrentMetric_LocalThread` Int64 -`CurrentMetric_LocalThreadActive` Int64 -`CurrentMetric_DistributedFilesToInsert` Int64 diff --git a/dbms/programs/server/data/system/metric_log/202004_525_525_0/count.txt b/dbms/programs/server/data/system/metric_log/202004_525_525_0/count.txt deleted file mode 100644 index 301160a9306..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_525_525_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -8 \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_525_525_0/minmax_event_date.idx b/dbms/programs/server/data/system/metric_log/202004_525_525_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_525_525_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_525_525_0/partition.dat b/dbms/programs/server/data/system/metric_log/202004_525_525_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/metric_log/202004_525_525_0/primary.idx b/dbms/programs/server/data/system/metric_log/202004_525_525_0/primary.idx deleted file mode 100644 index 52d0a189ade..00000000000 --- a/dbms/programs/server/data/system/metric_log/202004_525_525_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G¯^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/metric_log/202004_526_526_0/checksums.txt b/dbms/programs/server/data/system/metric_log/202004_526_526_0/checksums.txt deleted file mode 100644 index e6411a67832e857be22e81af8decd88872d5aae5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6663 zcmYLOd0}i54QP}o4md@M>FtoKyEH*dTe zxEXhsiP%h^ZTQgaDV_Uh;mM<_4|d!fF%zQuRBgW2(YC7}Ecfm3pXvQh`$<~q){?(K z+DZ`F{g7ZyF9#jF2Y74dL=czNHnzylpj$%{ty$GzW@~|u8&e2<(Zvq@rNq@3z(Y8E zU9;Prj(|KXChZZEUS~xB_W7C$$tLb|-!&c^sT6k=>3i*;(Q7%f0|dOi~X>CBQmy_K;41CNo^0k2GRdlx#~buB*GaS?79hi4V8cEyG4SP2o=U#v3yEWsdnqN7cJ9k5Ql76g$l>>x)C=DZjDX*)mF7|pn z@-~s3!($KUA{LAr<|eZ$(Eh%5-s)QhNm8GZ&A_uR$B8##%}m+hlzpY{n=m3W1LtyM zVOfpb?shdgg2c$)22O~aGciCWq94z2EK#kOQg~GnFdwe;rKrpS#lJapnqErhbz$#F zL+a+}xJRp&!UyFAU!xVO^-dy`5i9See8!DcBGwDsLDr>2i*p%Xh9l+F6zR=A68Di0!|Rvv1eu zb_8Dd&*?*dyO{djoe;qe8wbjwdY>!Mqh#L*dTF?*TmoDfu)EN5Ys7o19lk(iFyOdd z%bc81TB#U7#dNL#gU#2~hDe;wag!mmJLxTvjD&lueX`uv9*|e5Ssl?!s|Ssmsw87U zK`0BNE7p+#551vIuBrb)%Oxt|Z7y5(4Of`rhHmFs1)jMZlE~d0UUGyAzzZL8Ib75g zpxH$dFU_HxNTlh2)L|8RfbpCGZ`DHo6F%O2fCz63HwBj3dkmf@J0)14_IecULZD=fEKSDnzWM9S7tRU4t=N z9HP?&7-t!xHf$~JH4yHChB6t+=bwU+y$cF19G}u*L2)OrViwDK@bqE&NQj%sjp=at zV-4d4Epgg+tlsj7Y6CZy5Q4$NE)MVP-FMQ*pZv4^)?-Y61eH2x5VXdbohRj z{+CwbtR=A4+t!YsFA4;@H0%-1XiY8#b%>3v!^Z%bTH4v}yN9>>ALT68P)=kk@t5o= zpxIaHYV>*ia--MdT&OTfKM-yfQGNyLT43vO%d-e)FAI7azt_dm4xV2DVi-+K!*RCS zU}KsA??yxzG4#;rd;uCtJLSgUGKD$xK#PQeUz5)ADc-i#T(a!^QR1HB3#PiguS+qB znYVI)RB1>0#=IoGBjAOw4s|R$4SG@qAsfUe9074lBA7UYuCwuknYKEuBkYHcfAj2! z1L$~r_44Y+j@VRH`|_W>H>WSL5EQH(sdJbO7GVOy=y6)uYThK>#tqQDHGMe{g_g4w zRoMH04O8i@LowV#@h-haxV@n6278&q<+etft(hx9$J_fih)C7gD=QuCQJ4$P5>D|2 zkZ|O)1E7IPz#jy5k{PU71|lh|AtWE9ZM^0gQfT)%UB9QWSe*DX(TL%rv8M^AoUdZ9 za516VFt6~TTRI8wlFh1t3oXKG@Wnz9Ye`=WYM+dYH<#{uxaOzc6QhKMcOQ8@kJ#S+ z;lrMqk9*%bIRD4tZ436kc?Ps%c+(b(hg)1yRa@%wc|X*$GC^;R%flrX0Z-#-cZnn5 zC~6AGpQj|3xc0DGf0@tQ_GA>>!WrX6cLSRZTJ|{NWo#)z_Wc;P8HQTN&4ncPG|m^) z(y<|IwqT(9)KWMx((N1_0}Yifx7&3;jmIInR~2$E?Nx|#(zc*1@*^T$KA*x&*9l0w zA-?5l4^`)awLE%g3^j0qGaU{AghPbw2328|H&Esr!LoTHEfduTL5vuF5~%pZDArBP z6x8YmKrsVC8Qq#$K$Ifp2|ck;w~{>!JQs0D8m~G|#I=xJ>Dr+&vojDy!j`h8B@rHu z#=&8gwJHla!OaWGFk!KYpdx1#>|WG_mpK9Mg@OFAjT87)q@d2#c0a^09MSLws3lsQ z!|}L-<-}wi@-M=RVHSzc91l_c8>G6_?P&K;;pk-jAI4$($c~4;@LqXh`SsSzlSiMq zN^IA;!|lm$q>k!1m|Wv{V(V9_JfcAvLW8al$bQ290>YoBthpyH{yauewQZi{*-aN8 zMtxM6_toKp4|R8Zw|02(KMIx#5^^vTHpH=!C>zGifduv>3~i_lINWk2 zjmLprJqPfD`r=y1SA5mG<2lL)USIEtE^eR6a85EyCXc0{uWOcr`lV)aPPD77i>aT?Oc2kDm3 z?ZC`x4a)-5zd|DTN{w zyemN|m%J)_O^Z5RlvXfCzFzB-2PP3SddXBDoT@yJJy?b@=tdROn`7vCVZb;V<5 zP73DkFwO4%C^x=o!)~eHELa%OUqC$pNCWLPX87ixdEQiV*Sfqj;~p-0`A9yoJzp8K z&2h+}>B-o;^w6|xcmFc@!9WZ*TtVg!13SmEqxCcpqbh_Dvs#ii2%HU+rv%KzX-k&8 zUAXnV{Eg*3j?7NWC&ae-r_%QBlQ#>l^_=@nxlW59A|>O5u)1f@0!fjbD^mJtY{dVq zdZzc^*1wXEr(D&3Q@`vuv5D_4J-lw{+f6%1U$`%Q`rmiH3Oor5Yg(e)UE%5Qx`Uno z7GNi03u^ChZJc1CL^DV*+YFo`M|e1=u&8lya8VRzh5~$VsZok6*R5SOV!Tt?@S zqDqIab?*4=a@PgkOm!R$?~kVQ;%I>1C|i(brk@EB&sfa89NVF=SYsw3)SSR4Wn@5M z9qP_aCbC6g_MygnOVrc%IkGEMKXLuJd%`L(Bp10A)6qlWcDe5F2ZV(;!KqsxWnsft&tmLSBEKD#H6OD zkMj%zKNR^iE}Aa`SdoF}9fpGeZlQJB{r1}8>e+K#zCh67?jiJ31Ixm>b=tx3`$B{6 zhOe`cuaoJc++d`+&31cjh5bHA(;S0P^!BF=rI2i|Z85PRv(QB}49}SC;+BT27z8Te z3j6V@u>mou*f(IIuc+1iXo^L;VPB$l_W{R6@b#e(urKs`-Ev^O(T1vFho0RHCitU*vSk&N}Z)6Jx2FM>R}B^(S=-!bC`}@GJrHnGPlSBR_C91df`l62u3A{emF5sG$#nvJ(1P_!!3_mr^ns zTR1@l)21ZRMJiTkp5A`ZaWPhX2 z)gI{1>opF$z13i}{E_@bvHt0U_mqr@BDS5+PTVwq#EBcrGaH+KnexS!trCX4u0QPm zp=G6@<<3KINJI@=_|Ukm=ax5luU2NA?0I?5Y+}ou_n`f=UFl1jcFCLXe!=l&rk>pb z1|%&Wffx^_B(6ix+66tu%X6URZ^dB4T_>mjH`=TRwhr@Zf@%N`+KJe@(WKKw(xAvW zscx*1o8`{-FEZG9lrp`B0p{s7%&w6a$&Cq#WCzE57#6*T2>od03S@mEtf2c`4tH%! zFyQpw;rSJe_#APgz=DB3IbFYd0VT#2urwLn5^SDUfg%Fyz8nb75=cJo8MZ7j# zYiWMtgb4tT5SEn}hl9bWsxz@%AbhUC8k7b@Wn__QjHL&NfhK-|$78*hedxKMxhk@-wt>qwU zK2maD#Rbenh%aRifJstl)Ow-_*AW)eWv-xBmY3C;=-oP!3v40U{|Zg6mG{gfJ}ujd zkh>CL0-jPO<|(W>>f%^6G2t+5Yzrtv>@|(#B1s8-SWx5e^l&lsU8V2?@DU}OnNnEm z_TDj6&5jTw%}j|Tcj+u3x&wQXGY4|B$kaOv$+etic2Ai*=x@1<(0)pRi9i`j1oX60 zoey{x3*HT8Wq&+o<>YEdbnfja!zx{#V7cAj;&r%$TpMssu54jn8HZw2$=*@Se{c#P@w~vra&_r zL!$v9*lShhVf|(zi2#oxD`tH@YyRTTZ7-7dEvQj*K6ANuBU*^8XQ=Q^-&xA%VXQ*bY44Tv3$ENiN~1rLtCL zqAqdJAw85%21L0h`F0aM#1A4SUn8Q9dn}qgiZDphhQSwK;P(25s!T|yA_?P`jltVK z6E-i5bfrmg5x9PR8COu+>DoUb2AdHAA5omqdhtcbo+j5}HW{K1=SIvqTX-=CK#^T| zhr=g7GgjaJpK0t8Zt{EOfZMR&BotJ5Ts$$cWL_ZskrbCb)AhB=jG+~Iz_ZaHG*kxN z0oN(c$P#&hi-gu&O7SUTvjaqPb&fxb29XSNpvPFu+cR>}+cft`kKt+fmJEQn<5 z#Z(+d9N0|t_#H(v*nDgw-BbwbNKtrEA;2_=T_;JbgcGDNd9X2@z?G-4Bg2zuzD~S_ zXd_j46%oj<)!0|}w|hNbV&kHL3{-KKnYt1GeF%&SbOy-Mvya!UyZXqSFQ=b5_4K+0 z&x203%z~{hh~}@GhK?=V_Vuy}i@x4^K+vt2|LgNabCIk1`uPVh&K7>SWzV$}RU6P6 zq#b@CJNyZ)C$8^`hp4kTwD;Dh>mVoKYI|LI)i`GRwN0mQzjWd0-j(vNT}MveL1Lv_ z^Da;MEhqNAlI!A+g_{%VfZ#H9a6>peY{hHK)-El2!v4^z=8sn&1ASXm$?vA&8Mk%k zP5485X3<9WLbdXjQN)&#Iw#$lcjTO<&HP!;k&Le>PZ)V{R{PGCpl!#NtZSR%=k!tdkB5+&a$&Fe-EE&O?jD_gVMEV>iAT$bOlV}}g$f%s9UX+Z`HO{Ys z*aQFo#RUKW_e3HIV{dhCbS`vwbOr7d(SCY#b;|gn2U&T$uTcOFWp-t5bYEm)bY(7L zX>QR0nJZ}Ugb6VhmsYGgE9}6^0ff&2s$t(-nXe%&&mEj$#$6?h4>bVv2W@g|GLQs* zvIn!#N(`@H11E!)%2Rn*05NoFZCC*F7xpy9;jombA2Y|NPR!F&AdHFuh@J&p-e1vb z2Q+s4(Tr;}DU_9100A`s0$2d>2*IZUQ@orHn7sY=xQgmvkN^p6Wp-t3s{ryC^DbCv zxn0#zNUVa}kc>>67}Wv7rvYA3|9DO3!tMTYza%wWx6TSM00K?`@(gqN)LoV$sR{nE?0$K3cMe$Zr@T_tq6O;RZPf01znv0we(O3ED$j!DmCOES8Xu;vz~i zNB|3MWpi_3XJ7&J8r-Q2REozIB&LHfnpNA_sF-720gTMK0Sv9LCI+|>Ut&PX_f-*l zAPh7B0#pF-5578p@UYCe2^~4RB#{bZK;HZ*DGRVRQr{ zA?Ljo!KT!4f-*kmmsIuu3vhC2ZDDeGE@@v9s;7R50kT`#S{$z!lV#1HHuV!5xw0n>Q+Kc(*3awGynp; z0PqMVO4Zdd%jYq4XJ&`8r~fnn402_5X>*eR@CIUOZk+>4TOpwFTQ7E=D&(vkbMu)L z0rdiSF@BG=@ZCiQM0@;at&=ScGXMfq0Pzi^vSqUe!0hmV+Dnm^Th3+x4|8vIa${v* zW@&820q_H#CrUW~Pm*~rYM8mCGt8&SV15Gl0#s9%50B{NBLgep_sq~GmWz5w(W+G9^dPcxhde&>Z$cXQ(9n1lh4 zyb6b4#l7POj&#X|!|6xC+I3n00X6^vS^)D2bY{ElOIfEV>iAT$c>;u_Yo58H(+R;yo#WVe8V za|8eYjRgPz_dy~FV{dhCbS`vwbOOzyi(W%KKezpb?hBiCD{V^{zOsKE;XF15`%1i1PGI}>@C?VI@-JP$Ge^agEmYcen>BWMs@ zZPIs=HpM$Lk~2G005NoFZBzjB6fO|dlp5X+SU-`9w-j{fo~Ku06=7E zV|0iB{TG)3T!M9yPej+{Tw-_B)3}6~0iOX88b9SX!CBn3<<|E1yQ;-h;5GVivBLMLUe^p3LUln%=i0wZ|!`kAo01Itpb8}&5QvviB z(FMEIy`uA%*0z1`K$E}A;hh2A42T)$$rCaIbT?>6>;SPlPz*5u0Z;(&54k0hcTW4= z`c}&gFD+3k!~hU&X=8G4b7f<1Ze(-O0QnVnU^&X5-Vv5ZDDv{(*Xfl0`N^~WOxJ+*ML^%!DlZYTT@>b0$u_vPg#tJ5Kl3xok& z^A)fcOZM|T^fBDNC3X#J4KM%!Pyh`E0kIErZ*_8GWnX4#Y_b9P6+IsbCQXleyLl9j z#@h4A3SI#xmjTk7rzj}s&<2Ig<%%Td6f*z>W&-^e{RY~7LGQ@CvAt)G&cFpDX00AQa4Q2ua zxBxM1X>Pax^c7%6yJe1>$9d+=FR`HPwT&48xB=DRqjVX)ep0W<&s zR{-+~Uy>ZMXq8SS@S1skbts*L01b3#a%Ev;umScKH-~hP{;9PXUHYvEAGU&i?E$y} zflKFW!&pV639u#;@B5Mb4l)1_Qvd@K4IxnM~LZK#$%G{&r;^o&hIu zZ3>Sco@*8>#6p!~w|8q4GXMl)0sR$3!GqB-DA9ohPs}9?v~{?Y0WU_>k2ow9i`n>; x(^kLn4doR$00EN#_ybLjG^`Q&VIU2yv$Bml`+firC;$N?00>|KP{Z2dumET7=Oq9D diff --git a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/columns.txt b/dbms/programs/server/data/system/text_log/202004_5998_5998_0/columns.txt deleted file mode 100644 index 461d26792e1..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/columns.txt +++ /dev/null @@ -1,14 +0,0 @@ -columns format version: 1 -12 columns: -`event_date` Date -`event_time` DateTime -`microseconds` UInt32 -`thread_name` LowCardinality(String) -`thread_id` UInt64 -`level` Enum8('Fatal' = 1, 'Critical' = 2, 'Error' = 3, 'Warning' = 4, 'Notice' = 5, 'Information' = 6, 'Debug' = 7, 'Trace' = 8) -`query_id` String -`logger_name` LowCardinality(String) -`message` String -`revision` UInt32 -`source_file` LowCardinality(String) -`source_line` UInt64 diff --git a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/count.txt b/dbms/programs/server/data/system/text_log/202004_5998_5998_0/count.txt deleted file mode 100644 index b3935607590..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -23 \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/minmax_event_date.idx b/dbms/programs/server/data/system/text_log/202004_5998_5998_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/partition.dat b/dbms/programs/server/data/system/text_log/202004_5998_5998_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/primary.idx b/dbms/programs/server/data/system/text_log/202004_5998_5998_0/primary.idx deleted file mode 100644 index 0ba049044b7..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5998_5998_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/checksums.txt b/dbms/programs/server/data/system/text_log/202004_5999_5999_0/checksums.txt deleted file mode 100644 index 1e447bba6581ada7b5e7dc8a9e616e7f72ad6436..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1178 zcmV;L1ZDeUXk}w-b9HTVAZBlJZDDjEc4cyNX>V>iAT$aBno}}L()*|b2loLHiQKA! zZUg`Ti3I=v_dy~FV{dhCbS`vwbOPvZe4NH(<2?+h%j6?;@(utFWp-t5bYEm)bY(7L zX>KG<5^KjtB_~1)!H2M({uoFB7>xxx$@+wApB5#aBffQ&cn>lF^agEmYcep(Cx^Cw zkbXg%Pl}9Z(T>ej05NoFZBzjB6f3HekS)aX*wiA!U(pgT8^QrFgtZ$@?LWUQ7=iq1 z;izSDQ~&`o00C40@CftBhH|QN-}>ea(j-NrM3DdqY-M(3Y@-136()DH29|63{Lkx9 zqNi?r-Tna#2p2S++fOkd#{DV`j9m2e3M~KuN&xZ^`mmTP!PZYL=a@8U5^7V%+mb)D<`Y0h0ju0~dZ^ zubkA`lX`^3)~Vvs761?^00AQa@dVdz0ptWO?4)uf?g>M*#&{-EQ2bjAF#rKj0Pqi)Uc=AMuYn&g zxUr(>2sa1-5N&B=a&L2GV{dL`bI$M&{cwf;00apU>O=)C!1Q6tGcB>!JU1v8d#8Ggs zhyV?6VRCe7bZKvHE@WYJ1R^2ly%xcy)Nz6`KIfNI_5cfTa%pX0a(OOkWOxk3_EoEC z1I%j>Wbl9oWDnT@3~_a3a(Q2AWV!(L6($<~Gz62}FS^UNktJrbp?m=hl~t!aQ&h>3 zx~8f(YTQt{4KM%!wg3$j0$>bsWp-(EjR5r+VrgzCmE^ax-Ze-dg#c!vjvC5b0Wi3) zV?;D0@hMZ4N5x8Zgl-Km00B?{4F&t4rvI697{cIsgHc0QdvAY7n0%0{UG0uZLrOh47;Q5GVivBLEF%0tC1K zF>GmWxB&DNJlYgrNG}eU`rRWWF^;TeegR-ja%1csk+s^*&zMDmh>^rr00A=q0agI> z38k?ET?IRPqWt$NOc;oEkpK;JXmVv?WUm4C6*o#8#2W)Kq0Aa}fo2D_DvbePg~5L3 zs|LYR>^IeA=8c$=4lw`^QUC)J4(^b diff --git a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/columns.txt b/dbms/programs/server/data/system/text_log/202004_5999_5999_0/columns.txt deleted file mode 100644 index 461d26792e1..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/columns.txt +++ /dev/null @@ -1,14 +0,0 @@ -columns format version: 1 -12 columns: -`event_date` Date -`event_time` DateTime -`microseconds` UInt32 -`thread_name` LowCardinality(String) -`thread_id` UInt64 -`level` Enum8('Fatal' = 1, 'Critical' = 2, 'Error' = 3, 'Warning' = 4, 'Notice' = 5, 'Information' = 6, 'Debug' = 7, 'Trace' = 8) -`query_id` String -`logger_name` LowCardinality(String) -`message` String -`revision` UInt32 -`source_file` LowCardinality(String) -`source_line` UInt64 diff --git a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/count.txt b/dbms/programs/server/data/system/text_log/202004_5999_5999_0/count.txt deleted file mode 100644 index 3cacc0b93c9..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -12 \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/minmax_event_date.idx b/dbms/programs/server/data/system/text_log/202004_5999_5999_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/partition.dat b/dbms/programs/server/data/system/text_log/202004_5999_5999_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/primary.idx b/dbms/programs/server/data/system/text_log/202004_5999_5999_0/primary.idx deleted file mode 100644 index 53fe4d75d28..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_5999_5999_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/checksums.txt b/dbms/programs/server/data/system/text_log/202004_6000_6000_0/checksums.txt deleted file mode 100644 index 7bf694ff17f63c39af23ece9f63c2d21d9acc363..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1180 zcmV;N1Y`SSXk}w-b9HTVAZBlJZDDjEc4cyNX>V>iAT$b~&c&7~?Lnw*lM#gvri;Xa za0CDViv<7x_dy~FV{dhCbS`vwbOIBBc0S#{%h&xA_>!`+G06Z9Wp-t5bYEm)bY(7L zX>KGNe}o_zJ+{#li^38gQ&=wn7Sg;0f;GOJWwmr@H1S*fR1Y!$^agEmYceokZGP1G zo_(FIYg-w&G<{iA05NoFZBzjB6f&ce;aKv4)#*+pA&tiqJpKVJZxuFL(z$TU$-|*x zpL3F@Q~&`o00C40@Cb%3Cla2Axa=t3Z!!?(ARGV*Y-M(3Y@-136(zu&DilScO>#8a zSjyeAy8ZzR!rh{2=|Km`L5QiE+CUHo3M~KuN&xZa(l`@g@dMI2U0UH9hWFAAjsAgf6nA|13cBB+D06=7E zV|0iB{THwSq~h(hJArZ9HhwZRmm6650mT7Y!QX~WEI~Ti&Vv5ZDDv{(*Xfl0`N^~WOxJ+*ML^%!DlZYP(_Ka>x>9NO>ehv*A@fW&-^e*aY2fngE_q(@u*A?^ClmD1!m$ z2L9ZEPfP1osJDX_LQ>JjqZK*;0hIvw0|Hw2KGK=K)3f?&4*`J--vAIO00AQa4Q2ua zxBxM1X>Pax^b|tRE?}C%iVIH}=;(>^kbF@ASQn<^mY8v_*m60iDG6O%=2id!GXMcr z0P_jbLYP3fC(5AKlw4RmO7WnpBm0rnL=ZWkWiX%CfQAcS$zQXu{q0a#3+ zw9igw)V!ZnkHb$=`i>4U01r|C0}~G+00qVa_!NX~!SqfXq5W_FHcgIBKXL&ZAFk~h z)w}~2dJGhWaeMBi6f*z>U;+IVMH4LpcGy8%5GVivBLE0q0Z>)Ga3ugO^WoM2 diff --git a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/columns.txt b/dbms/programs/server/data/system/text_log/202004_6000_6000_0/columns.txt deleted file mode 100644 index 461d26792e1..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/columns.txt +++ /dev/null @@ -1,14 +0,0 @@ -columns format version: 1 -12 columns: -`event_date` Date -`event_time` DateTime -`microseconds` UInt32 -`thread_name` LowCardinality(String) -`thread_id` UInt64 -`level` Enum8('Fatal' = 1, 'Critical' = 2, 'Error' = 3, 'Warning' = 4, 'Notice' = 5, 'Information' = 6, 'Debug' = 7, 'Trace' = 8) -`query_id` String -`logger_name` LowCardinality(String) -`message` String -`revision` UInt32 -`source_file` LowCardinality(String) -`source_line` UInt64 diff --git a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/count.txt b/dbms/programs/server/data/system/text_log/202004_6000_6000_0/count.txt deleted file mode 100644 index 9d607966b72..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -11 \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/minmax_event_date.idx b/dbms/programs/server/data/system/text_log/202004_6000_6000_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/partition.dat b/dbms/programs/server/data/system/text_log/202004_6000_6000_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/primary.idx b/dbms/programs/server/data/system/text_log/202004_6000_6000_0/primary.idx deleted file mode 100644 index 6ae118ad85b..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6000_6000_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -G^G¯^ \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/checksums.txt b/dbms/programs/server/data/system/text_log/202004_6001_6001_0/checksums.txt deleted file mode 100644 index 54c0f22d4ba7cb899c44d427a2f7c738f54a95c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1179 zcmV;M1Z4YTXk}w-b9HTVAZBlJZDDjEc4cyNX>V>iAT$a~vY0*L3UfNd7&#kOD*MEO zZv+4UiUj}w_dy~FV{dhCbS`vwbOIBBc0S#{%h&xA_>!`+G06Z9Wp-t5bYEm)bY(7L zX>KGNe}o_zJ+{#li^38gQ&=wn7Sg;0f;GOJWwmr@H1S*fR1Y!$^agEmYceokZGP1G zo_(FIYg-w&G<{iA05NoFZBzjB6f$8}3Cy(yR0+pe3|4kv$kzcZAyVb0u2Ngu%1;Xn z$*%S9Q~&`o00C40@Cb%3Cla2Axa=t3Z!!?(ARGV*Y-M(3Y@-136(vM~X z0vSG21>YMBnQlAR%pT{eNCCM6a=E|^jD0nf=7Thei-n3=3^4!!Pyp}`6UL*l7;b-o z!^7dKEgver01$0yV{&hEWn*t{WOL8}`4vaIO9dQ{jD2>Ad`5Uvge92)ERL4`5md0% zuNF)6AQ9>=WDzs~0aySHsR4o&ZE0?8VR&EC0RdP7@J(rCcmxpSY<8<3(OqXZE5uQ7 zuZRE*aA9(EX>@6CZZ2eDbOa(H=e-uerqpqQGCt>*RQ3Q1aB^vFVRCscX=Hc|#9umF zMEbFzDLEhX%-^ax01R<;Wpa66X=J(p^%W%@eVHnPJ|#kJd`Do*e}@kIa>GXMl-0{s`z1dDV)0&}P*HT2`x=gWdT3jwnR zmn|1|+cX`6-Q80`Tcf?`6*>R`l>qny(E&B?hY-Rre+-+7-}Rfd01zku0V4nnW&#Ab z05NQ7ZnyyS6iTwR6)BVAYPkE&`_F=($=U%}576LYmnWk(xL~Y#BA$J_RsaDr00CA2 z^9j;Im_XP^E$to>QSd|$?vVfubZByAVPvlX_7ym`8QKAV=T1Xcg<}w|wy-<_Sfwzk z{?yl}ug}RFh46%tmJ|IXl&1HX(mH``SvP%dU z*7_Ix|C*YiCM6COGXMm|1N{|6|9reQ!J1E$>HKCjRVPtp0Wig49zpB?5RY%|LiZ-= tW`q?u00EN#_yhZKc1847)l({ut->AzM@s+@C;$N?00>?IP*uKgB>)G|*m?i} diff --git a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/columns.txt b/dbms/programs/server/data/system/text_log/202004_6001_6001_0/columns.txt deleted file mode 100644 index 461d26792e1..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/columns.txt +++ /dev/null @@ -1,14 +0,0 @@ -columns format version: 1 -12 columns: -`event_date` Date -`event_time` DateTime -`microseconds` UInt32 -`thread_name` LowCardinality(String) -`thread_id` UInt64 -`level` Enum8('Fatal' = 1, 'Critical' = 2, 'Error' = 3, 'Warning' = 4, 'Notice' = 5, 'Information' = 6, 'Debug' = 7, 'Trace' = 8) -`query_id` String -`logger_name` LowCardinality(String) -`message` String -`revision` UInt32 -`source_file` LowCardinality(String) -`source_line` UInt64 diff --git a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/count.txt b/dbms/programs/server/data/system/text_log/202004_6001_6001_0/count.txt deleted file mode 100644 index 9d607966b72..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/count.txt +++ /dev/null @@ -1 +0,0 @@ -11 \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/minmax_event_date.idx b/dbms/programs/server/data/system/text_log/202004_6001_6001_0/minmax_event_date.idx deleted file mode 100644 index fc9f33a367f..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/minmax_event_date.idx +++ /dev/null @@ -1 +0,0 @@ -GG \ No newline at end of file diff --git a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/partition.dat b/dbms/programs/server/data/system/text_log/202004_6001_6001_0/partition.dat deleted file mode 100644 index 870b71ef44bae12efece0406d75f84029c34bbf6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4 LcmWd;Wo7^X0GR+S diff --git a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/primary.idx b/dbms/programs/server/data/system/text_log/202004_6001_6001_0/primary.idx deleted file mode 100644 index 72220583214..00000000000 --- a/dbms/programs/server/data/system/text_log/202004_6001_6001_0/primary.idx +++ /dev/null @@ -1 +0,0 @@ -Gů^Gȯ^ \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 154d7c911cf..17a3cf88ecd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -578,6 +578,8 @@ target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR}) +target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) + if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp similarity index 100% rename from dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp rename to src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h similarity index 100% rename from dbms/src/Processors/Formats/Impl/MsgPackRowInputFormat.h rename to src/Processors/Formats/Impl/MsgPackRowInputFormat.h diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp similarity index 100% rename from dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp rename to src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp diff --git a/dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h similarity index 100% rename from dbms/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h rename to src/Processors/Formats/Impl/MsgPackRowOutputFormat.h diff --git a/dbms/tests/queries/0_stateless/01098_msgpack_format.reference b/tests/queries/0_stateless/01098_msgpack_format.reference similarity index 100% rename from dbms/tests/queries/0_stateless/01098_msgpack_format.reference rename to tests/queries/0_stateless/01098_msgpack_format.reference diff --git a/dbms/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh similarity index 100% rename from dbms/tests/queries/0_stateless/01098_msgpack_format.sh rename to tests/queries/0_stateless/01098_msgpack_format.sh From 092479397a0d3d7cf82728d16edc2054433ba932 Mon Sep 17 00:00:00 2001 From: Avogar Date: Sat, 4 Apr 2020 17:07:11 +0300 Subject: [PATCH 063/752] Remove extra line. --- src/Formats/FormatFactory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 6e357412571..9199ed89890 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -175,7 +175,6 @@ void registerOutputFormatProcessorTemplate(FormatFactory & factory); void registerInputFormatProcessorMsgPack(FormatFactory & factory); void registerOutputFormatProcessorMsgPack(FormatFactory & factory); - /// File Segmentation Engines for parallel reading void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); From 9fac9a7d38c080909288147623ddd77e2e88a621 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 4 Apr 2020 18:18:35 +0300 Subject: [PATCH 064/752] Try fix aggregating. --- .../Merges/AggregatingSortedTransform.cpp | 21 ++++++++++--------- .../Merges/AggregatingSortedTransform.h | 7 +++++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index b3a1be9f253..c490d6a7762 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -184,26 +184,24 @@ void AggregatingSortedTransform::merge() if (key_differs) { + /// Write the simple aggregation result for the previous group. + if (merged_data.mergedRows() > 0) + insertSimpleAggregationResult(); + + merged_data.insertRow(); + /// if there are enough rows accumulated and the last one is calculated completely if (merged_data.hasEnoughRows()) - { - /// Write the simple aggregation result for the previous group. - insertSimpleAggregationResult(); return; - } /// We will write the data for the group. We copy the values of ordinary columns. - merged_data.insertRow(current->all_columns, current->pos, - columns_definition.column_numbers_not_to_aggregate); + merged_data.initializeRow(current->all_columns, current->pos, + columns_definition.column_numbers_not_to_aggregate); /// Add the empty aggregation state to the aggregate columns. The state will be updated in the `addRow` function. for (auto & column_to_aggregate : columns_definition.columns_to_aggregate) column_to_aggregate.column->insertDefault(); - /// Write the simple aggregation result for the previous group. - if (merged_data.mergedRows() > 0) - insertSimpleAggregationResult(); - /// Reset simple aggregation states for next row for (auto & desc : columns_definition.columns_to_simple_aggregate) desc.createState(); @@ -229,7 +227,10 @@ void AggregatingSortedTransform::merge() /// Write the simple aggregation result for the previous group. if (merged_data.mergedRows() > 0) + { insertSimpleAggregationResult(); + merged_data.insertRow(); + } last_chunk_sort_columns.clear(); is_finished = true; diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.h b/dbms/src/Processors/Merges/AggregatingSortedTransform.h index 5ebc41d6ccf..7f32ed4eade 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.h +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.h @@ -59,11 +59,14 @@ private: public: using MergedData::MergedData; - void insertRow(const ColumnRawPtrs & raw_columns, size_t row, const ColumnNumbers & column_numbers) + void initializeRow(const ColumnRawPtrs & raw_columns, size_t row, const ColumnNumbers & column_numbers) { - for (auto column_number :column_numbers) + for (auto column_number : column_numbers) columns[column_number]->insertFrom(*raw_columns[column_number], row); + } + void insertRow() + { ++total_merged_rows; ++merged_rows; /// TODO: sum_blocks_granularity += block_size; From 36604e93ce1cd71c16316dde1bf6b259aae088f0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 4 Apr 2020 18:28:11 +0300 Subject: [PATCH 065/752] Try fix aggregating. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index c490d6a7762..82859559fb3 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -186,9 +186,10 @@ void AggregatingSortedTransform::merge() { /// Write the simple aggregation result for the previous group. if (merged_data.mergedRows() > 0) + { insertSimpleAggregationResult(); - - merged_data.insertRow(); + merged_data.insertRow(); + } /// if there are enough rows accumulated and the last one is calculated completely if (merged_data.hasEnoughRows()) From bcebad1d60898978c43715633b53a0e90a0bc57c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 4 Apr 2020 18:37:31 +0300 Subject: [PATCH 066/752] Try fix aggregating. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 4 ++-- dbms/src/Processors/Merges/AggregatingSortedTransform.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 82859559fb3..67b1289c4cc 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -185,7 +185,7 @@ void AggregatingSortedTransform::merge() if (key_differs) { /// Write the simple aggregation result for the previous group. - if (merged_data.mergedRows() > 0) + if (merged_data.isGroupStarted()) { insertSimpleAggregationResult(); merged_data.insertRow(); @@ -227,7 +227,7 @@ void AggregatingSortedTransform::merge() } /// Write the simple aggregation result for the previous group. - if (merged_data.mergedRows() > 0) + if (merged_data.isGroupStarted()) { insertSimpleAggregationResult(); merged_data.insertRow(); diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.h b/dbms/src/Processors/Merges/AggregatingSortedTransform.h index 7f32ed4eade..bb950a68257 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.h +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.h @@ -63,10 +63,15 @@ private: { for (auto column_number : column_numbers) columns[column_number]->insertFrom(*raw_columns[column_number], row); + + is_group_started = true; } + bool isGroupStarted() const { return is_group_started; } + void insertRow() { + is_group_started = false; ++total_merged_rows; ++merged_rows; /// TODO: sum_blocks_granularity += block_size; @@ -81,6 +86,8 @@ private: for (auto & desc : def.columns_to_aggregate) desc.column = typeid_cast(columns[desc.column_number].get()); } + private: + bool is_group_started = false; }; ColumnsDefinition columns_definition; From 12a12eb3b85ae5f0485276631948ee20cad27616 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 4 Apr 2020 18:43:55 +0300 Subject: [PATCH 067/752] Try fix aggregating. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 67b1289c4cc..16140ff744f 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -189,6 +189,7 @@ void AggregatingSortedTransform::merge() { insertSimpleAggregationResult(); merged_data.insertRow(); + last_key.reset(); } /// if there are enough rows accumulated and the last one is calculated completely From 5bbfa40910509374af5a99d050da6d48263092cd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 4 Apr 2020 18:46:44 +0300 Subject: [PATCH 068/752] Try fix aggregating. --- dbms/src/Processors/Merges/AggregatingSortedTransform.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp index 16140ff744f..68f60aed8a2 100644 --- a/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/dbms/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -189,12 +189,14 @@ void AggregatingSortedTransform::merge() { insertSimpleAggregationResult(); merged_data.insertRow(); - last_key.reset(); } /// if there are enough rows accumulated and the last one is calculated completely if (merged_data.hasEnoughRows()) + { + last_key.reset(); return; + } /// We will write the data for the group. We copy the values of ordinary columns. merged_data.initializeRow(current->all_columns, current->pos, From e635b0e9eb39dbb38c52e6d54d99115f527eed10 Mon Sep 17 00:00:00 2001 From: Avogar Date: Sat, 4 Apr 2020 20:04:41 +0300 Subject: [PATCH 069/752] Fix build error --- src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 32f89c816c5..ee32aeb6bfe 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -31,7 +31,7 @@ bool MsgPackRowInputFormat::readObject() if (buf.eof()) return false; PeekableReadBufferCheckpoint checkpoint{buf}; - size_t offset; + size_t offset = 0; bool need_more_data = true; while (need_more_data) { From 21532f6a6d46107622a5f68754505cb977086c21 Mon Sep 17 00:00:00 2001 From: Vxider Date: Sun, 5 Apr 2020 01:33:51 +0800 Subject: [PATCH 070/752] parallel insert for materialized view --- src/Storages/StorageMaterializedView.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 6284f791f4f..357d3858d0d 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -31,6 +31,7 @@ public: bool supportsPrewhere() const override { return getTargetTable()->supportsPrewhere(); } bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } bool supportsIndexForIn() const override { return getTargetTable()->supportsIndexForIn(); } + bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override { return getTargetTable()->mayBenefitFromIndexForIn(left_in_operand, query_context); From 1d451082187fd21a52dfba4c79cd5c847554557f Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 6 Apr 2020 13:27:31 +0300 Subject: [PATCH 071/752] Fixed builds, implementation and tests * Builds shouldn't fail on platforms that do not support SSE2 and SSE4.2 and do not have corresponding headers. * Updated tests to include malicious padding * Fixed reporting tokens that cross or outside of data boundaries. --- .../MergeTree/MergeTreeIndexFullText.cpp | 27 +++++++++++-------- .../tests/gtest_SplitTokenExtractor.cpp | 8 +++++- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/dbms/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/Storages/MergeTree/MergeTreeIndexFullText.cpp index af979010dc0..93553e0619e 100644 --- a/dbms/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -19,9 +19,14 @@ #include +#if defined(__SSE2__) #include + +#if defined(__SSE4_2__) #include -#include +#endif + +#endif namespace DB @@ -620,19 +625,19 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size #if defined(__SSE4_2__) // With the help of https://www.strchr.com/strcmp_and_strlen_using_sse_4.2 - static const auto alnum_chars_ranges = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + const auto alnum_chars_ranges = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, '\xFF', '\x80', 'z', 'a', 'Z', 'A', '9', '0'); // Every bit represents if `haystack` character is in the ranges (1) or not(0) const int result_bitmask = _mm_cvtsi128_si32(_mm_cmpestrm(alnum_chars_ranges, 8, haystack, haystack_length, _SIDD_CMP_RANGES)); #else // NOTE: -1 and +1 required since SSE2 has no `>=` and `<=` instructions on packed 8-bit integers (epi8). - static const auto number_begin = _mm_set1_epi8('0' - 1); - static const auto number_end = _mm_set1_epi8('9' + 1); - static const auto alpha_lower_begin = _mm_set1_epi8('a' - 1); - static const auto alpha_lower_end = _mm_set1_epi8('z' + 1); - static const auto alpha_upper_begin = _mm_set1_epi8('A' - 1); - static const auto alpha_upper_end = _mm_set1_epi8('Z' + 1); - static const auto zero = _mm_set1_epi8(0); + const auto number_begin = _mm_set1_epi8('0' - 1); + const auto number_end = _mm_set1_epi8('9' + 1); + const auto alpha_lower_begin = _mm_set1_epi8('a' - 1); + const auto alpha_lower_end = _mm_set1_epi8('z' + 1); + const auto alpha_upper_begin = _mm_set1_epi8('A' - 1); + const auto alpha_upper_end = _mm_set1_epi8('Z' + 1); + const auto zero = _mm_set1_epi8(0); // every bit represents if `haystack` character `c` statisfies condition: // (c < 0) || (c > '0' - 1 && c < '9' + 1) || (c > 'a' - 1 && c < 'z' + 1) || (c > 'A' - 1 && c < 'Z' + 1) @@ -669,7 +674,7 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size // check if there are leftovers in next `haystack` continue; - return true; + break; #else if (isASCII(data[*pos]) && !isAlphaNumericASCII(data[*pos])) { @@ -691,7 +696,7 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size // Could happen only if string is not padded with zeroes, and we accidentally hopped over end of data. if (*token_start > len) return false; - *token_len = len - *token_start; + *token_len = std::min(len - *token_start, *token_len); #endif return *token_len > 0; diff --git a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp index b8686f962bc..e2229792020 100644 --- a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -17,7 +17,7 @@ using namespace DB; struct SplitTokenExtractorTestCase { - const char * description; + const std::string_view description; const std::string source; const std::vector tokens; }; @@ -35,6 +35,12 @@ public: const auto & param = GetParam(); const auto & source = param.source; data = std::make_unique>(source.data(), source.data() + source.size()); + + // add predefined padding that forms tokens to ensure no reads past end of buffer. + const char extra_padding[] = "this is the end \xd1\x8d\xd1\x82\xd0\xbe\xd0\xba\xd0\xbe \xd0\xbd\xd0\xb5\xd1\x86"; + data->insert(data->end(), std::begin(extra_padding), std::end(extra_padding)); + + data->resize(data->size() - sizeof(extra_padding)); } std::unique_ptr> data; From 79024d73a230473203ad0560f5908b59cdac8e95 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 2 Apr 2020 20:27:07 +0300 Subject: [PATCH 072/752] improve performance of index analysis with monotonic functions --- src/Interpreters/Set.cpp | 11 ++ src/Interpreters/Set.h | 2 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +- src/Storages/MergeTree/KeyCondition.cpp | 121 +++++++----------- src/Storages/MergeTree/KeyCondition.h | 109 ++++++++-------- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 28 ++-- .../MergeTree/MergeTreeIndexMinMax.cpp | 4 +- tests/performance/set_index.xml | 9 +- 8 files changed, 144 insertions(+), 144 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 3c79ea5174d..2ad9f588cf6 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -588,6 +588,14 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, }; } +bool MergeTreeSetIndex::hasMonotonicFunctionsChain() const +{ + for (const auto & mapping : indexes_mapping) + if (!mapping.functions.empty()) + return true; + return false; +} + void ValueWithInfinity::update(const Field & x) { /// Keep at most one element in column. @@ -599,8 +607,11 @@ void ValueWithInfinity::update(const Field & x) const IColumn & ValueWithInfinity::getColumnIfFinite() const { +#ifndef NDEBUG if (type != NORMAL) throw Exception("Trying to get column of infinite type", ErrorCodes::LOGICAL_ERROR); +#endif + return *column; } diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index c9605d4e11e..3a16d9ed094 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -227,6 +227,8 @@ public: size_t size() const { return ordered_set.at(0)->size(); } + bool hasMonotonicFunctionsChain() const; + BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types); private: diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3e0caa67518..5d799d257bc 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -98,8 +98,8 @@ void IMergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names & for (size_t i = 0; i < column_names.size(); ++i) { - Field min_value; - Field max_value; + FieldRef min_value; + FieldRef max_value; const ColumnWithTypeAndName & column = block.getByName(column_names[i]); column.column->getExtremes(min_value, max_value); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index e994d254958..e755c4942a1 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -338,44 +338,6 @@ inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVi inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); } -FieldWithInfinity::FieldWithInfinity(const Field & field_) - : field(field_), - type(Type::NORMAL) -{ -} - -FieldWithInfinity::FieldWithInfinity(Field && field_) - : field(std::move(field_)), - type(Type::NORMAL) -{ -} - -FieldWithInfinity::FieldWithInfinity(const Type type_) - : type(type_) -{ -} - -FieldWithInfinity FieldWithInfinity::getMinusInfinity() -{ - return FieldWithInfinity(Type::MINUS_INFINITY); -} - -FieldWithInfinity FieldWithInfinity::getPlusInfinity() -{ - return FieldWithInfinity(Type::PLUS_INFINITY); -} - -bool FieldWithInfinity::operator<(const FieldWithInfinity & other) const -{ - return type < other.type || (type == other.type && type == Type::NORMAL && field < other.field); -} - -bool FieldWithInfinity::operator==(const FieldWithInfinity & other) const -{ - return type == other.type && (type != Type::NORMAL || field == other.field); -} - - /** Calculate expressions, that depend only on constants. * For index to work when something like "WHERE Date = toDate(now())" is written. */ @@ -480,24 +442,41 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants } -static void applyFunction( +static Field applyFunctionForField( const FunctionBasePtr & func, - const DataTypePtr & arg_type, const Field & arg_value, - DataTypePtr & res_type, Field & res_value) + const DataTypePtr & arg_type, + const Field & arg_value) { - res_type = func->getReturnType(); - Block block { { arg_type->createColumnConst(1, arg_value), arg_type, "x" }, - { nullptr, res_type, "y" } + { nullptr, func->getReturnType(), "y" } }; func->execute(block, {0}, 1, 1); - - block.safeGetByPosition(1).column->get(0, res_value); + return (*block.safeGetByPosition(1).column)[0]; } +static FieldRef applyFunction(FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) +{ + /// Fallback for fields without block reference. + if (field.isExplicit()) + return applyFunctionForField(func, current_type, field); + + String result_name = "_" + func->getName() + "_" + toString(field.column_idx); + size_t result_idx; + const auto & block = field.block; + if (!block->has(result_name)) + { + result_idx = block->columns(); + field.block->insert({nullptr, func->getReturnType(), result_name}); + func->execute(*block, {field.column_idx}, result_idx, block->rows()); + } + else + result_idx = block->getPositionByName(result_name); + + return {field.block, field.row_idx, result_idx}; +} void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants) { @@ -569,12 +548,8 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return false; // Apply the next transformation step - DataTypePtr new_type; - applyFunction(a.function_base, out_type, out_value, new_type, out_value); - if (!new_type) - return false; - - out_type.swap(new_type); + out_value = applyFunctionForField(a.function_base, out_type, out_value); + out_type = a.function_base->getReturnType(); expr_name = a.result_name; // Transformation results in a key expression, accept @@ -957,8 +932,8 @@ String KeyCondition::toString() const template static BoolMask forAnyHyperrectangle( size_t key_size, - const Field * key_left, - const Field * key_right, + const FieldRef * key_left, + const FieldRef * key_right, bool left_bounded, bool right_bounded, std::vector & hyperrectangle, @@ -1049,8 +1024,8 @@ static BoolMask forAnyHyperrectangle( BoolMask KeyCondition::checkInRange( size_t used_key_size, - const Field * left_key, - const Field * right_key, + const FieldRef * left_key, + const FieldRef * right_key, const DataTypes & data_types, bool right_bounded, BoolMask initial_mask) const @@ -1102,19 +1077,12 @@ std::optional KeyCondition::applyMonotonicFunctionsChainToRange( return {}; } - /// Apply the function. - DataTypePtr new_type; if (!key_range.left.isNull()) - applyFunction(func, current_type, key_range.left, new_type, key_range.left); + key_range.left = applyFunction(func, current_type, key_range.left); if (!key_range.right.isNull()) - applyFunction(func, current_type, key_range.right, new_type, key_range.right); + key_range.right = applyFunction(func, current_type, key_range.right); - if (!new_type) - { - return {}; - } - - current_type.swap(new_type); + current_type = func->getReturnType(); if (!monotonicity.is_positive) key_range.swapLeftAndRight(); @@ -1220,8 +1188,8 @@ BoolMask KeyCondition::checkInHyperrectangle( BoolMask KeyCondition::checkInRange( size_t used_key_size, - const Field * left_key, - const Field * right_key, + const FieldRef * left_key, + const FieldRef * right_key, const DataTypes & data_types, BoolMask initial_mask) const { @@ -1231,8 +1199,8 @@ BoolMask KeyCondition::checkInRange( bool KeyCondition::mayBeTrueInRange( size_t used_key_size, - const Field * left_key, - const Field * right_key, + const FieldRef * left_key, + const FieldRef * right_key, const DataTypes & data_types) const { return checkInRange(used_key_size, left_key, right_key, data_types, true, BoolMask::consider_only_can_be_true).can_be_true; @@ -1241,7 +1209,7 @@ bool KeyCondition::mayBeTrueInRange( BoolMask KeyCondition::checkAfter( size_t used_key_size, - const Field * left_key, + const FieldRef * left_key, const DataTypes & data_types, BoolMask initial_mask) const { @@ -1251,7 +1219,7 @@ BoolMask KeyCondition::checkAfter( bool KeyCondition::mayBeTrueAfter( size_t used_key_size, - const Field * left_key, + const FieldRef * left_key, const DataTypes & data_types) const { return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true; @@ -1382,4 +1350,13 @@ size_t KeyCondition::getMaxKeyColumn() const return res; } +bool KeyCondition::hasMonotonicFunctionsChain() const +{ + for (const auto & element : rpn) + if (!element.monotonic_functions_chain.empty() + || (element.set_index && element.set_index->hasMonotonicFunctionsChain())) + return true; + return false; +} + } diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 8667e0aea27..a7cdd1f1e0a 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -15,10 +15,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_TYPE_OF_FIELD; -} class IFunction; using FunctionBasePtr = std::shared_ptr; @@ -26,6 +22,35 @@ using FunctionBasePtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +/** A field, that can be stored in two reperesenation: + * - A standalone field. + * - A field with reference to it's position in block. + * It's needed for execution functions on ranges during + * index analysis. If function was executed once for field, + * it's result would be cached for all block for which field's reference points to. + */ +struct FieldRef : public Field +{ + using SharedBlock = std::shared_ptr; + + FieldRef() = default; + + /// Create as explicit field without block. + template + FieldRef(const T & value) : Field(value) {} + + /// Create as reference to field in block. + FieldRef(const SharedBlock & block_, size_t row_idx_, size_t column_idx_) + : Field((*block_->getByPosition(column_idx_).column)[row_idx_]), + block(block_), row_idx(row_idx_), column_idx(column_idx_) {} + + bool isExplicit() const { return block == nullptr; } + + SharedBlock block; + size_t row_idx; + size_t column_idx; +}; + /** Range with open or closed ends; possibly unbounded. */ struct Range @@ -35,8 +60,8 @@ private: static bool less(const Field & lhs, const Field & rhs); public: - Field left; /// the left border, if any - Field right; /// the right border, if any + FieldRef left; /// the left border, if any + FieldRef right; /// the right border, if any bool left_bounded = false; /// bounded at the left bool right_bounded = false; /// bounded at the right bool left_included = false; /// includes the left border, if any @@ -46,11 +71,11 @@ public: Range() {} /// One point. - Range(const Field & point) + Range(const FieldRef & point) : left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {} /// A bounded two-sided range. - Range(const Field & left_, bool left_included_, const Field & right_, bool right_included_) + Range(const FieldRef & left_, bool left_included_, const FieldRef & right_, bool right_included_) : left(left_), right(right_), left_bounded(true), right_bounded(true), left_included(left_included_), right_included(right_included_) @@ -58,7 +83,7 @@ public: shrinkToIncludedIfPossible(); } - static Range createRightBounded(const Field & right_point, bool right_included) + static Range createRightBounded(const FieldRef & right_point, bool right_included) { Range r; r.right = right_point; @@ -68,7 +93,7 @@ public: return r; } - static Range createLeftBounded(const Field & left_point, bool left_included) + static Range createLeftBounded(const FieldRef & left_point, bool left_included) { Range r; r.left = left_point; @@ -84,7 +109,7 @@ public: */ void shrinkToIncludedIfPossible() { - if (left_bounded && !left_included) + if (left.isExplicit() && left_bounded && !left_included) { if (left.getType() == Field::Types::UInt64 && left.get() != std::numeric_limits::max()) { @@ -97,7 +122,7 @@ public: left_included = true; } } - if (right_bounded && !right_included) + if (right.isExplicit() && right_bounded && !right_included) { if (right.getType() == Field::Types::UInt64 && right.get() != std::numeric_limits::min()) { @@ -120,13 +145,13 @@ public: } /// x contained in the range - bool contains(const Field & x) const + bool contains(const FieldRef & x) const { return !leftThan(x) && !rightThan(x); } /// x is to the left - bool rightThan(const Field & x) const + bool rightThan(const FieldRef & x) const { return (left_bounded ? !(less(left, x) || (left_included && equals(x, left))) @@ -134,7 +159,7 @@ public: } /// x is to the right - bool leftThan(const Field & x) const + bool leftThan(const FieldRef & x) const { return (right_bounded ? !(less(x, right) || (right_included && equals(x, right))) @@ -195,42 +220,6 @@ public: String toString() const; }; - -/// Class that extends arbitrary objects with infinities, like +-inf for floats -class FieldWithInfinity -{ -public: - enum Type - { - MINUS_INFINITY = -1, - NORMAL = 0, - PLUS_INFINITY = 1 - }; - - explicit FieldWithInfinity(const Field & field_); - FieldWithInfinity(Field && field_); - - static FieldWithInfinity getMinusInfinity(); - static FieldWithInfinity getPlusInfinity(); - - bool operator<(const FieldWithInfinity & other) const; - bool operator==(const FieldWithInfinity & other) const; - - Field getFieldIfFinite() const - { - if (type != NORMAL) - throw Exception("Trying to get field of infinite type", ErrorCodes::BAD_TYPE_OF_FIELD); - return field; - } - -private: - Field field; - Type type; - - FieldWithInfinity(const Type type_); -}; - - /** Condition on the index. * * Consists of the conditions for the key belonging to all possible ranges or sets, @@ -261,8 +250,8 @@ public: /// one of the resulting mask components (see BoolMask::consider_only_can_be_XXX). BoolMask checkInRange( size_t used_key_size, - const Field * left_key, - const Field * right_key, + const FieldRef * left_key, + const FieldRef* right_key, const DataTypes & data_types, BoolMask initial_mask = BoolMask(false, false)) const; @@ -270,7 +259,7 @@ public: /// left_key must contain all the fields in the sort_descr in the appropriate order. BoolMask checkAfter( size_t used_key_size, - const Field * left_key, + const FieldRef * left_key, const DataTypes & data_types, BoolMask initial_mask = BoolMask(false, false)) const; @@ -278,15 +267,15 @@ public: /// This is more efficient than checkInRange(...).can_be_true. bool mayBeTrueInRange( size_t used_key_size, - const Field * left_key, - const Field * right_key, + const FieldRef * left_key, + const FieldRef * right_key, const DataTypes & data_types) const; /// Same as checkAfter, but calculate only may_be_true component of a result. /// This is more efficient than checkAfter(...).can_be_true. bool mayBeTrueAfter( size_t used_key_size, - const Field * left_key, + const FieldRef * left_key, const DataTypes & data_types) const; /// Checks that the index can not be used. @@ -295,6 +284,8 @@ public: /// Get the maximum number of the key element used in the condition. size_t getMaxKeyColumn() const; + bool hasMonotonicFunctionsChain() const; + /// Impose an additional condition: the value in the column `column` must be in the range `range`. /// Returns whether there is such a column in the key. bool addCondition(const String & column, const Range & range); @@ -374,8 +365,8 @@ public: private: BoolMask checkInRange( size_t used_key_size, - const Field * left_key, - const Field * right_key, + const FieldRef * left_key, + const FieldRef * right_key, const DataTypes & data_types, bool right_bounded, BoolMask initial_mask) const; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 370286117ae..39de45e07e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1201,11 +1201,23 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( * If fits, split it into smaller ones and put them on the stack. If not, discard it. * If the segment is already of one mark length, add it to response and discard it. */ - std::vector ranges_stack{ {0, marks_count} }; + std::vector ranges_stack = { {0, marks_count} }; + + auto index_block = std::make_shared(); + for (size_t i = 0; i < used_key_size; ++i) + index_block->insert({index[i], data.primary_key_data_types[i], data.primary_key_columns[i]}); + + std::function create_field_ref; + /// If there is no monotonic functions, there is no need to save block reference. + /// Passing explicit field to FieldRef allows to optimize ranges and shows better performance while reading the field. + if (key_condition.hasMonotonicFunctionsChain()) + create_field_ref = [&index_block](size_t row, size_t column) -> FieldRef { return {index_block, row, column}; }; + else + create_field_ref = [&index](size_t row, size_t column) -> FieldRef { return (*index[column])[row]; }; /// NOTE Creating temporary Field objects to pass to KeyCondition. - Row index_left(used_key_size); - Row index_right(used_key_size); + std::vector index_left(used_key_size); + std::vector index_right(used_key_size); while (!ranges_stack.empty()) { @@ -1216,7 +1228,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( if (range.end == marks_count && !has_final_mark) { for (size_t i = 0; i < used_key_size; ++i) - index[i]->get(range.begin, index_left[i]); + index_left[i] = create_field_ref(range.begin, i); may_be_true = key_condition.mayBeTrueAfter( used_key_size, index_left.data(), data.primary_key_data_types); @@ -1228,8 +1240,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( for (size_t i = 0; i < used_key_size; ++i) { - index[i]->get(range.begin, index_left[i]); - index[i]->get(range.end, index_right[i]); + index_left[i] = create_field_ref(range.begin, i); + index_right[i] = create_field_ref(range.end, i); } may_be_true = key_condition.mayBeTrueInRange( @@ -1254,9 +1266,9 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( size_t end; for (end = range.end; end > range.begin + step; end -= step) - ranges_stack.push_back(MarkRange(end - step, end)); + ranges_stack.emplace_back(end - step, end); - ranges_stack.push_back(MarkRange(range.begin, end)); + ranges_stack.emplace_back(range.begin, end); } } } diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 122f038fee6..220fc70c549 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -100,8 +100,8 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s size_t rows_read = std::min(limit, block.rows() - *pos); - Field field_min; - Field field_max; + FieldRef field_min; + FieldRef field_max; for (size_t i = 0; i < index.columns.size(); ++i) { const auto & column = block.getByName(index.columns[i]).column; diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml index 090d8ac8c08..f158c481d93 100644 --- a/tests/performance/set_index.xml +++ b/tests/performance/set_index.xml @@ -14,7 +14,14 @@ - SELECT count() FROM test_in WHERE a IN (SELECT rand(1) FROM zeros(100000)) SETTINGS max_rows_to_read = 1, read_overflow_mode = 'break' + SELECT count() FROM test_in WHERE a IN (SELECT rand(1) FROM numbers(100000)) SETTINGS max_rows_to_read = 1, read_overflow_mode = 'break' + + SELECT count() FROM test_in WHERE toInt64(a) IN (SELECT toInt64(rand(1)) FROM numbers(100000)) settings max_rows_to_read=1, read_overflow_mode='break' + + + SELECT count() FROM test_in WHERE -toInt64(a) IN (SELECT toInt64(rand(1)) FROM numbers(100000)) settings max_rows_to_read=1, read_overflow_mode='break' + + SELECT count() FROM test_in WHERE -toInt64(a) NOT IN (SELECT toInt64(rand(1)) FROM numbers(100000)) settings max_rows_to_read=1, read_overflow_mode='break' SELECT count() FROM numbers(1000) WHERE toString(number) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', From 5ada959853275249ad7ef2aec5031a4b6651109b Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 6 Apr 2020 13:36:56 +0300 Subject: [PATCH 073/752] improve performance of index analysis with monotonic functions --- src/Storages/MergeTree/KeyCondition.h | 8 ++--- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 32 ++++++++++++------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index a7cdd1f1e0a..ffc0d46a2ec 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -31,22 +31,20 @@ using ExpressionActionsPtr = std::shared_ptr; */ struct FieldRef : public Field { - using SharedBlock = std::shared_ptr; - FieldRef() = default; /// Create as explicit field without block. template - FieldRef(const T & value) : Field(value) {} + FieldRef(T && value) : Field(std::forward(value)) {} /// Create as reference to field in block. - FieldRef(const SharedBlock & block_, size_t row_idx_, size_t column_idx_) + FieldRef(Block * block_, size_t row_idx_, size_t column_idx_) : Field((*block_->getByPosition(column_idx_).column)[row_idx_]), block(block_), row_idx(row_idx_), column_idx(column_idx_) {} bool isExplicit() const { return block == nullptr; } - SharedBlock block; + Block * block; size_t row_idx; size_t column_idx; }; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 39de45e07e0..13e852765b7 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1203,17 +1203,27 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( */ std::vector ranges_stack = { {0, marks_count} }; - auto index_block = std::make_shared(); - for (size_t i = 0; i < used_key_size; ++i) - index_block->insert({index[i], data.primary_key_data_types[i], data.primary_key_columns[i]}); - - std::function create_field_ref; + std::function create_field_ref; /// If there is no monotonic functions, there is no need to save block reference. - /// Passing explicit field to FieldRef allows to optimize ranges and shows better performance while reading the field. + /// Passing explicit field to FieldRef allows to optimize ranges and shows better performance. if (key_condition.hasMonotonicFunctionsChain()) - create_field_ref = [&index_block](size_t row, size_t column) -> FieldRef { return {index_block, row, column}; }; + { + auto index_block = std::make_shared(); + for (size_t i = 0; i < used_key_size; ++i) + index_block->insert({index[i], data.primary_key_data_types[i], data.primary_key_columns[i]}); + + create_field_ref = [index_block](size_t row, size_t column, FieldRef & field) + { + field = {index_block.get(), row, column}; + }; + } else - create_field_ref = [&index](size_t row, size_t column) -> FieldRef { return (*index[column])[row]; }; + { + create_field_ref = [&index](size_t row, size_t column, FieldRef & field) + { + index[column]->get(row, field); + }; + } /// NOTE Creating temporary Field objects to pass to KeyCondition. std::vector index_left(used_key_size); @@ -1228,7 +1238,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( if (range.end == marks_count && !has_final_mark) { for (size_t i = 0; i < used_key_size; ++i) - index_left[i] = create_field_ref(range.begin, i); + create_field_ref(range.begin, i, index_left[i]); may_be_true = key_condition.mayBeTrueAfter( used_key_size, index_left.data(), data.primary_key_data_types); @@ -1240,8 +1250,8 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( for (size_t i = 0; i < used_key_size; ++i) { - index_left[i] = create_field_ref(range.begin, i); - index_right[i] = create_field_ref(range.end, i); + create_field_ref(range.begin, i, index_left[i]); + create_field_ref(range.end, i, index_right[i]); } may_be_true = key_condition.mayBeTrueInRange( From 9c5cea3035a2f877426e32f05b8c0571c98f3fd6 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 6 Apr 2020 13:43:00 +0300 Subject: [PATCH 074/752] More tests and better token checks. --- .../tests/gtest_SplitTokenExtractor.cpp | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp index e2229792020..de78347ebbd 100644 --- a/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/dbms/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -62,16 +62,39 @@ TEST_P(SplitTokenExtractorTest, next) { SCOPED_TRACE(++i); ASSERT_TRUE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)); - EXPECT_EQ(expected_token, param.source.substr(token_start, token_len)) + + EXPECT_EQ(expected_token, std::string_view(data->data() + token_start, token_len)) << " token_start:" << token_start << " token_len: " << token_len; } - - ASSERT_FALSE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)); + ASSERT_FALSE(token_extractor.next(data->data(), data->size(), &pos, &token_start, &token_len)) + << "\n\t=> \"" << param.source.substr(token_start, token_len) << "\"" + << "\n\t" << token_start << ", " << token_len << ", " << pos << ", " << data->size(); } // Helper to allow strings with embedded '\0' chars. #define BINARY_STRING(str) std::string{str, sizeof(str) - 1} +INSTANTIATE_TEST_SUITE_P(NoTokens, + SplitTokenExtractorTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Empty input sequence produces no tokens.", + "", + {} + }, + { + "Whitespace only", + " ", + {} + }, + { + "Whitespace only large string", + " \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r \t\v\n\r", + {} + } + }) +); + INSTANTIATE_TEST_SUITE_P(ShortSingleToken, SplitTokenExtractorTest, ::testing::ValuesIn(std::initializer_list{ From 2dc1eddfab04348c816ecc5ef3794f8376e491dc Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 6 Apr 2020 16:35:11 +0300 Subject: [PATCH 075/752] fix FieldRef --- src/Storages/MergeTree/KeyCondition.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index ffc0d46a2ec..7c8b63eb800 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -44,9 +44,9 @@ struct FieldRef : public Field bool isExplicit() const { return block == nullptr; } - Block * block; - size_t row_idx; - size_t column_idx; + Block * block = nullptr; + size_t row_idx = 0; + size_t column_idx = 0; }; /** Range with open or closed ends; possibly unbounded. From 738e8a7ef8af5066eb99ac714708c72414d4ce69 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 6 Apr 2020 20:16:36 +0300 Subject: [PATCH 076/752] Minor test refactoring * using string literal for binary strings * sorted includes --- .../tests/gtest_SplitTokenExtractor.cpp | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/src/Storages/tests/gtest_SplitTokenExtractor.cpp index de78347ebbd..9255e5ca817 100644 --- a/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -5,14 +5,15 @@ #include -#include -#include -#include #include +#include +#include +#include namespace { using namespace DB; +using namespace std::literals::string_literals; } struct SplitTokenExtractorTestCase @@ -71,9 +72,6 @@ TEST_P(SplitTokenExtractorTest, next) << "\n\t" << token_start << ", " << token_len << ", " << pos << ", " << data->size(); } -// Helper to allow strings with embedded '\0' chars. -#define BINARY_STRING(str) std::string{str, sizeof(str) - 1} - INSTANTIATE_TEST_SUITE_P(NoTokens, SplitTokenExtractorTest, ::testing::ValuesIn(std::initializer_list{ @@ -98,11 +96,6 @@ INSTANTIATE_TEST_SUITE_P(NoTokens, INSTANTIATE_TEST_SUITE_P(ShortSingleToken, SplitTokenExtractorTest, ::testing::ValuesIn(std::initializer_list{ - { - "Empty input sequence produces no tokens.", - "", - {} - }, { "Short single token", "foo", @@ -144,7 +137,7 @@ INSTANTIATE_TEST_SUITE_P(MultipleTokens, }, { "Multiple tokens separated by non-printable chars", - BINARY_STRING("\0abc\1" "123\2XYZ\4"), + "\0abc\1" "123\2XYZ\4"s, { "abc", "123", "XYZ" } @@ -152,7 +145,7 @@ INSTANTIATE_TEST_SUITE_P(MultipleTokens, { "ASCII table is split into numeric, upper case and lower case letters", - BINARY_STRING("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16" "\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNO" "PQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87" "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c" @@ -160,7 +153,7 @@ INSTANTIATE_TEST_SUITE_P(MultipleTokens, "\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6" "\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb" "\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0" - "\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"), + "\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"s, { "0123456789", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz", "\x80\x81\x82\x83\x84\x85\x86\x87" From 51ea2cf61098b02eaa4ac778fdb265581734bc0e Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Apr 2020 23:14:56 +0300 Subject: [PATCH 077/752] some tests --- dbms/src/Interpreters/InterserverIOHandler.h | 10 +- .../test_quorum_inserts/__init__.py | 0 .../configs/conf.d/clusters.xml | 37 ++++ .../configs/conf.d/ddl.xml | 5 + .../test_quorum_inserts/configs/users.xml | 27 +++ .../integration/test_quorum_inserts/test.py | 173 ++++++++++++++++++ 6 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/integration/test_quorum_inserts/__init__.py create mode 100644 dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml create mode 100644 dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml create mode 100644 dbms/tests/integration/test_quorum_inserts/configs/users.xml create mode 100644 dbms/tests/integration/test_quorum_inserts/test.py diff --git a/dbms/src/Interpreters/InterserverIOHandler.h b/dbms/src/Interpreters/InterserverIOHandler.h index 4651c8cb978..ca3a92df2e6 100644 --- a/dbms/src/Interpreters/InterserverIOHandler.h +++ b/dbms/src/Interpreters/InterserverIOHandler.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include namespace Poco { namespace Net { class HTTPServerResponse; } } @@ -51,14 +53,18 @@ public: void addEndpoint(const String & name, InterserverIOEndpointPtr endpoint) { std::lock_guard lock(mutex); + LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime addEndpoint() " << name); + LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), StackTrace().toString()); bool inserted = endpoint_map.try_emplace(name, std::move(endpoint)).second; if (!inserted) throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); } - bool removeEndpointIfExists(const String & name) + bool removeEndpointIfExists(const String & name)>> { std::lock_guard lock(mutex); + LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime removeEndpointIfExists() " << name); + LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), StackTrace().toString()); return endpoint_map.erase(name); } @@ -66,6 +72,8 @@ public: try { std::lock_guard lock(mutex); + LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime getEndpoint() " << name); + LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), StackTrace().toString()); return endpoint_map.at(name); } catch (...) diff --git a/dbms/tests/integration/test_quorum_inserts/__init__.py b/dbms/tests/integration/test_quorum_inserts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml b/dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml new file mode 100644 index 00000000000..adf6ad80247 --- /dev/null +++ b/dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml @@ -0,0 +1,37 @@ + + + + + + true + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + + + + + true + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + s0_0_2 + 9000 + + + + + + diff --git a/dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml b/dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml new file mode 100644 index 00000000000..abad0dee450 --- /dev/null +++ b/dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml @@ -0,0 +1,5 @@ + + + /clickhouse/task_queue/ddl + + \ No newline at end of file diff --git a/dbms/tests/integration/test_quorum_inserts/configs/users.xml b/dbms/tests/integration/test_quorum_inserts/configs/users.xml new file mode 100644 index 00000000000..c5114c10cde --- /dev/null +++ b/dbms/tests/integration/test_quorum_inserts/configs/users.xml @@ -0,0 +1,27 @@ + + + + + 1 + 2 + 1 + 5000 + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/dbms/tests/integration/test_quorum_inserts/test.py b/dbms/tests/integration/test_quorum_inserts/test.py new file mode 100644 index 00000000000..1f6c1b9b852 --- /dev/null +++ b/dbms/tests/integration/test_quorum_inserts/test.py @@ -0,0 +1,173 @@ +import os +import sys +import time + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + clusters_schema = { + "0" : {"0" : ["0", "1", "2"]} + } + + for cluster_name, shards in clusters_schema.iteritems(): + for shard_name, replicas in shards.iteritems(): + for replica_name in replicas: + name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) + cluster.add_instance(name, + config_dir="configs", + macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name}, + with_zookeeper=True) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + +def test_drop_replica_and_achieve_quorum(started_cluster): + zero = cluster.instances['s0_0_0'] + first = cluster.instances['s0_0_1'] + second = cluster.instances['s0_0_2'] + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") + + create_query = "CREATE TABLE bug.test_drop_replica_and_achieve_quorum " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/test_drop_replica_and_achieve_quorum', '{}') " \ + "PARTITION BY d ORDER BY a" + + print("Create Replicated table with two replicas") + zero.query(create_query.format(0)) + first.query(create_query.format(1)) + + print("Stop fetches on one replica. Since that, it will be isolated.") + first.query("SYSTEM STOP FETCHES bug.test_drop_replica_and_achieve_quorum") + + print("Insert to other replica. This query will fail.") + quorum_timeout = zero.query_and_get_error("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (1, '2011-01-01')") + assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." + + assert "1\t2011-01-01\n" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 0}) + + print("Add third replica") + second.query(create_query.format(2)) + + zero.query("SYSTEM RESTART REPLICA bug.test_drop_replica_and_achieve_quorum") + + print("START FETCHES first replica") + first.query("SYSTEM START FETCHES bug.test_drop_replica_and_achieve_quorum") + + time.sleep(5) + + print(zero.query("SELECT * from system.replicas format Vertical")) + + + print("---------") + print(zero.query("SELECT * from system.replication_queue format Vertical")) + print("---------") + + + print(first.query("SELECT * from system.replicas format Vertical")) + print("---------") + print(first.query("SELECT * from system.replication_queue format Vertical")) + print("---------") + print(second.query("SELECT * from system.replicas format Vertical")) + print("---------") + print(first.query("SELECT * from system.replication_queue format Vertical")) + + + print("SYNC first replica") + first.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") + + print("SYNC second replica") + second.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") + + print("Quorum for previous insert achieved.") + assert "1\t2011-01-01\n" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 1}) + + print("Now we can insert some other data.") + zero.query("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (2, '2012-02-02')") + + assert "1\t2011-01-01\n2 2012-02-02" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") + assert "1\t2011-01-01\n2 2012-02-02" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + + +def test_insert_quorum_with_drop_partition(started_cluster): + zero = cluster.instances['s0_0_0'] + first = cluster.instances['s0_0_1'] + second = cluster.instances['s0_0_2'] + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") + + zero.query("CREATE TABLE bug.quorum_insert_with_drop_partition ON CLUSTER one_shard_three_replicas " + "(a Int8, d Date) " + "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " + "PARTITION BY d ORDER BY a ") + + print("Stop fetches for bug.quorum_insert_with_drop_partition at first replica.") + first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_drop_partition") + + print("Insert with quorum. (zero and second)") + zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')") + + print("Drop partition.") + zero.query_and_get_error("ALTER TABLE bug.quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'") + + print("Insert to deleted partition") + zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')") + + print("Sync other replica from quorum.") + second.query("SYSTEM SYNC REPLICA bug.quorum_insert_with_drop_partition") + + print("Select from updated partition.") + assert "2 2011-01-01\n" == zero.query("SELECT * FROM bug.quorum_insert_with_drop_partition") + assert "2 2011-01-01\n" == second.query("SELECT * FROM bug.quorum_insert_with_drop_partition") + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + + +def test_insert_quorum_with_ttl(started_cluster): + zero = cluster.instances['s0_0_0'] + first = cluster.instances['s0_0_1'] + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_two_replicas") + zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_two_replicas") + + zero.query("CREATE TABLE bug.quorum_insert_with_ttl ON CLUSTER one_shard_two_replicas " + "(a Int8, d Date) " + "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " + "PARTITION BY d ORDER BY a " + "TTL d + INTERVAL 5 second " + "SETTINGS merge_with_ttl_timeout=2 ") + + print("Stop fetches for bug.quorum_insert_with_ttl at first replica.") + first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") + + print("Insert should fail since it can not reach the quorum.") + quorum_timeout = zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_ttl(a,d) VALUES(6, now())") + assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." + + print("Wait 10 seconds and the data should be dropped by TTL.") + time.sleep(10) + count = zero.query("SELECT count() FROM bug.quorum_insert_with_ttl WHERE a=6") + assert count == "0\n", "Data have to be dropped by TTL" + + print("Resume fetches for bug.quorum_test_with_ttl at first replica.") + first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") + time.sleep(5) + + print("Inserts should resume.") + zero.query("INSERT INTO bug.quorum_insert_with_ttl(a) VALUES(6)") From 0c0eff36d132ce73cea771ffb4bfba8a0ca43c56 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Apr 2020 23:34:04 +0300 Subject: [PATCH 078/752] move --- .../test_quorum_inserts/__init__.py | 0 .../configs/conf.d/clusters.xml | 37 ++++ .../configs/conf.d/ddl.xml | 5 + .../test_quorum_inserts/configs/users.xml | 27 +++ tests/integration/test_quorum_inserts/test.py | 173 ++++++++++++++++++ 5 files changed, 242 insertions(+) create mode 100644 tests/integration/test_quorum_inserts/__init__.py create mode 100644 tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml create mode 100644 tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml create mode 100644 tests/integration/test_quorum_inserts/configs/users.xml create mode 100644 tests/integration/test_quorum_inserts/test.py diff --git a/tests/integration/test_quorum_inserts/__init__.py b/tests/integration/test_quorum_inserts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml b/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml new file mode 100644 index 00000000000..adf6ad80247 --- /dev/null +++ b/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml @@ -0,0 +1,37 @@ + + + + + + true + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + + + + + true + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + s0_0_2 + 9000 + + + + + + diff --git a/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml b/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml new file mode 100644 index 00000000000..abad0dee450 --- /dev/null +++ b/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml @@ -0,0 +1,5 @@ + + + /clickhouse/task_queue/ddl + + \ No newline at end of file diff --git a/tests/integration/test_quorum_inserts/configs/users.xml b/tests/integration/test_quorum_inserts/configs/users.xml new file mode 100644 index 00000000000..c5114c10cde --- /dev/null +++ b/tests/integration/test_quorum_inserts/configs/users.xml @@ -0,0 +1,27 @@ + + + + + 1 + 2 + 1 + 5000 + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py new file mode 100644 index 00000000000..1f6c1b9b852 --- /dev/null +++ b/tests/integration/test_quorum_inserts/test.py @@ -0,0 +1,173 @@ +import os +import sys +import time + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + clusters_schema = { + "0" : {"0" : ["0", "1", "2"]} + } + + for cluster_name, shards in clusters_schema.iteritems(): + for shard_name, replicas in shards.iteritems(): + for replica_name in replicas: + name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) + cluster.add_instance(name, + config_dir="configs", + macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name}, + with_zookeeper=True) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + +def test_drop_replica_and_achieve_quorum(started_cluster): + zero = cluster.instances['s0_0_0'] + first = cluster.instances['s0_0_1'] + second = cluster.instances['s0_0_2'] + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") + + create_query = "CREATE TABLE bug.test_drop_replica_and_achieve_quorum " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/test_drop_replica_and_achieve_quorum', '{}') " \ + "PARTITION BY d ORDER BY a" + + print("Create Replicated table with two replicas") + zero.query(create_query.format(0)) + first.query(create_query.format(1)) + + print("Stop fetches on one replica. Since that, it will be isolated.") + first.query("SYSTEM STOP FETCHES bug.test_drop_replica_and_achieve_quorum") + + print("Insert to other replica. This query will fail.") + quorum_timeout = zero.query_and_get_error("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (1, '2011-01-01')") + assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." + + assert "1\t2011-01-01\n" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 0}) + + print("Add third replica") + second.query(create_query.format(2)) + + zero.query("SYSTEM RESTART REPLICA bug.test_drop_replica_and_achieve_quorum") + + print("START FETCHES first replica") + first.query("SYSTEM START FETCHES bug.test_drop_replica_and_achieve_quorum") + + time.sleep(5) + + print(zero.query("SELECT * from system.replicas format Vertical")) + + + print("---------") + print(zero.query("SELECT * from system.replication_queue format Vertical")) + print("---------") + + + print(first.query("SELECT * from system.replicas format Vertical")) + print("---------") + print(first.query("SELECT * from system.replication_queue format Vertical")) + print("---------") + print(second.query("SELECT * from system.replicas format Vertical")) + print("---------") + print(first.query("SELECT * from system.replication_queue format Vertical")) + + + print("SYNC first replica") + first.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") + + print("SYNC second replica") + second.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") + + print("Quorum for previous insert achieved.") + assert "1\t2011-01-01\n" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 1}) + + print("Now we can insert some other data.") + zero.query("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (2, '2012-02-02')") + + assert "1\t2011-01-01\n2 2012-02-02" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") + assert "1\t2011-01-01\n2 2012-02-02" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + + +def test_insert_quorum_with_drop_partition(started_cluster): + zero = cluster.instances['s0_0_0'] + first = cluster.instances['s0_0_1'] + second = cluster.instances['s0_0_2'] + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") + + zero.query("CREATE TABLE bug.quorum_insert_with_drop_partition ON CLUSTER one_shard_three_replicas " + "(a Int8, d Date) " + "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " + "PARTITION BY d ORDER BY a ") + + print("Stop fetches for bug.quorum_insert_with_drop_partition at first replica.") + first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_drop_partition") + + print("Insert with quorum. (zero and second)") + zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')") + + print("Drop partition.") + zero.query_and_get_error("ALTER TABLE bug.quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'") + + print("Insert to deleted partition") + zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')") + + print("Sync other replica from quorum.") + second.query("SYSTEM SYNC REPLICA bug.quorum_insert_with_drop_partition") + + print("Select from updated partition.") + assert "2 2011-01-01\n" == zero.query("SELECT * FROM bug.quorum_insert_with_drop_partition") + assert "2 2011-01-01\n" == second.query("SELECT * FROM bug.quorum_insert_with_drop_partition") + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + + +def test_insert_quorum_with_ttl(started_cluster): + zero = cluster.instances['s0_0_0'] + first = cluster.instances['s0_0_1'] + + zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_two_replicas") + zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_two_replicas") + + zero.query("CREATE TABLE bug.quorum_insert_with_ttl ON CLUSTER one_shard_two_replicas " + "(a Int8, d Date) " + "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " + "PARTITION BY d ORDER BY a " + "TTL d + INTERVAL 5 second " + "SETTINGS merge_with_ttl_timeout=2 ") + + print("Stop fetches for bug.quorum_insert_with_ttl at first replica.") + first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") + + print("Insert should fail since it can not reach the quorum.") + quorum_timeout = zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_ttl(a,d) VALUES(6, now())") + assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." + + print("Wait 10 seconds and the data should be dropped by TTL.") + time.sleep(10) + count = zero.query("SELECT count() FROM bug.quorum_insert_with_ttl WHERE a=6") + assert count == "0\n", "Data have to be dropped by TTL" + + print("Resume fetches for bug.quorum_test_with_ttl at first replica.") + first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") + time.sleep(5) + + print("Inserts should resume.") + zero.query("INSERT INTO bug.quorum_insert_with_ttl(a) VALUES(6)") From fef7140ea924fbf75e46e1a745ca612aec527113 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Apr 2020 23:35:20 +0300 Subject: [PATCH 079/752] delete old --- .../test_quorum_inserts/__init__.py | 0 .../configs/conf.d/clusters.xml | 37 ---- .../configs/conf.d/ddl.xml | 5 - .../test_quorum_inserts/configs/users.xml | 27 --- .../integration/test_quorum_inserts/test.py | 173 ------------------ 5 files changed, 242 deletions(-) delete mode 100644 dbms/tests/integration/test_quorum_inserts/__init__.py delete mode 100644 dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml delete mode 100644 dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml delete mode 100644 dbms/tests/integration/test_quorum_inserts/configs/users.xml delete mode 100644 dbms/tests/integration/test_quorum_inserts/test.py diff --git a/dbms/tests/integration/test_quorum_inserts/__init__.py b/dbms/tests/integration/test_quorum_inserts/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml b/dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml deleted file mode 100644 index adf6ad80247..00000000000 --- a/dbms/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml +++ /dev/null @@ -1,37 +0,0 @@ - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - s0_0_2 - 9000 - - - - - - diff --git a/dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml b/dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml deleted file mode 100644 index abad0dee450..00000000000 --- a/dbms/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/dbms/tests/integration/test_quorum_inserts/configs/users.xml b/dbms/tests/integration/test_quorum_inserts/configs/users.xml deleted file mode 100644 index c5114c10cde..00000000000 --- a/dbms/tests/integration/test_quorum_inserts/configs/users.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - 1 - 2 - 1 - 5000 - - - - - - - - ::/0 - - default - default - - - - - - - - diff --git a/dbms/tests/integration/test_quorum_inserts/test.py b/dbms/tests/integration/test_quorum_inserts/test.py deleted file mode 100644 index 1f6c1b9b852..00000000000 --- a/dbms/tests/integration/test_quorum_inserts/test.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import sys -import time - -import pytest - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - clusters_schema = { - "0" : {"0" : ["0", "1", "2"]} - } - - for cluster_name, shards in clusters_schema.iteritems(): - for shard_name, replicas in shards.iteritems(): - for replica_name in replicas: - name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) - cluster.add_instance(name, - config_dir="configs", - macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name}, - with_zookeeper=True) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - -def test_drop_replica_and_achieve_quorum(started_cluster): - zero = cluster.instances['s0_0_0'] - first = cluster.instances['s0_0_1'] - second = cluster.instances['s0_0_2'] - - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") - zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") - - create_query = "CREATE TABLE bug.test_drop_replica_and_achieve_quorum " \ - "(a Int8, d Date) " \ - "Engine = ReplicatedMergeTree('/clickhouse/tables/test_drop_replica_and_achieve_quorum', '{}') " \ - "PARTITION BY d ORDER BY a" - - print("Create Replicated table with two replicas") - zero.query(create_query.format(0)) - first.query(create_query.format(1)) - - print("Stop fetches on one replica. Since that, it will be isolated.") - first.query("SYSTEM STOP FETCHES bug.test_drop_replica_and_achieve_quorum") - - print("Insert to other replica. This query will fail.") - quorum_timeout = zero.query_and_get_error("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (1, '2011-01-01')") - assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." - - assert "1\t2011-01-01\n" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", - settings={'select_sequential_consistency' : 0}) - - print("Add third replica") - second.query(create_query.format(2)) - - zero.query("SYSTEM RESTART REPLICA bug.test_drop_replica_and_achieve_quorum") - - print("START FETCHES first replica") - first.query("SYSTEM START FETCHES bug.test_drop_replica_and_achieve_quorum") - - time.sleep(5) - - print(zero.query("SELECT * from system.replicas format Vertical")) - - - print("---------") - print(zero.query("SELECT * from system.replication_queue format Vertical")) - print("---------") - - - print(first.query("SELECT * from system.replicas format Vertical")) - print("---------") - print(first.query("SELECT * from system.replication_queue format Vertical")) - print("---------") - print(second.query("SELECT * from system.replicas format Vertical")) - print("---------") - print(first.query("SELECT * from system.replication_queue format Vertical")) - - - print("SYNC first replica") - first.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") - - print("SYNC second replica") - second.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") - - print("Quorum for previous insert achieved.") - assert "1\t2011-01-01\n" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", - settings={'select_sequential_consistency' : 1}) - - print("Now we can insert some other data.") - zero.query("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (2, '2012-02-02')") - - assert "1\t2011-01-01\n2 2012-02-02" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") - assert "1\t2011-01-01\n2 2012-02-02" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") - - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") - - -def test_insert_quorum_with_drop_partition(started_cluster): - zero = cluster.instances['s0_0_0'] - first = cluster.instances['s0_0_1'] - second = cluster.instances['s0_0_2'] - - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") - zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") - - zero.query("CREATE TABLE bug.quorum_insert_with_drop_partition ON CLUSTER one_shard_three_replicas " - "(a Int8, d Date) " - "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " - "PARTITION BY d ORDER BY a ") - - print("Stop fetches for bug.quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_drop_partition") - - print("Insert with quorum. (zero and second)") - zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')") - - print("Drop partition.") - zero.query_and_get_error("ALTER TABLE bug.quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'") - - print("Insert to deleted partition") - zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')") - - print("Sync other replica from quorum.") - second.query("SYSTEM SYNC REPLICA bug.quorum_insert_with_drop_partition") - - print("Select from updated partition.") - assert "2 2011-01-01\n" == zero.query("SELECT * FROM bug.quorum_insert_with_drop_partition") - assert "2 2011-01-01\n" == second.query("SELECT * FROM bug.quorum_insert_with_drop_partition") - - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") - - -def test_insert_quorum_with_ttl(started_cluster): - zero = cluster.instances['s0_0_0'] - first = cluster.instances['s0_0_1'] - - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_two_replicas") - zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_two_replicas") - - zero.query("CREATE TABLE bug.quorum_insert_with_ttl ON CLUSTER one_shard_two_replicas " - "(a Int8, d Date) " - "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " - "PARTITION BY d ORDER BY a " - "TTL d + INTERVAL 5 second " - "SETTINGS merge_with_ttl_timeout=2 ") - - print("Stop fetches for bug.quorum_insert_with_ttl at first replica.") - first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") - - print("Insert should fail since it can not reach the quorum.") - quorum_timeout = zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_ttl(a,d) VALUES(6, now())") - assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." - - print("Wait 10 seconds and the data should be dropped by TTL.") - time.sleep(10) - count = zero.query("SELECT count() FROM bug.quorum_insert_with_ttl WHERE a=6") - assert count == "0\n", "Data have to be dropped by TTL" - - print("Resume fetches for bug.quorum_test_with_ttl at first replica.") - first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") - time.sleep(5) - - print("Inserts should resume.") - zero.query("INSERT INTO bug.quorum_insert_with_ttl(a) VALUES(6)") From bcc77fc96aae29d47df04ecac67bfea649234b88 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 6 Apr 2020 23:42:20 +0300 Subject: [PATCH 080/752] Update perf-comparison-tweaks-config.xml --- .../config/config.d/perf-comparison-tweaks-config.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml index a6d59fc3b4c..090d8ebe581 100644 --- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml @@ -1,4 +1,7 @@ + + + :: From 2951ed4f1dec9510a23118a78a9a677f3cb3e867 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 00:35:20 +0300 Subject: [PATCH 081/752] Corrected Common.RWLockDeadlock test --- src/Common/tests/gtest_rw_lock.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index dec4c732fd5..facd7a33c1c 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -150,9 +150,16 @@ TEST(Common, RWLockDeadlock) usleep(100000); usleep(100000); usleep(100000); + usleep(100000); try { auto holder2 = lock2->getLock(RWLockImpl::Read, "q1"); + if (!holder2) + { + throw Exception( + "Locking attempt timed out! Possible deadlock avoided. Client should retry.", + ErrorCodes::DEADLOCK_AVOIDED); + } } catch (const Exception & e) { @@ -174,9 +181,16 @@ TEST(Common, RWLockDeadlock) auto holder2 = lock2->getLock(RWLockImpl::Read, "q3"); usleep(100000); usleep(100000); + usleep(100000); try { auto holder1 = lock1->getLock(RWLockImpl::Read, "q3"); + if (!holder1) + { + throw Exception( + "Locking attempt timed out! Possible deadlock avoided. Client should retry.", + ErrorCodes::DEADLOCK_AVOIDED); + } } catch (const Exception & e) { From b98bc9afefb61645d30992b3f37dc489613363cd Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 02:44:45 +0300 Subject: [PATCH 082/752] Reworked RWLockImpl::getLock() + phase-fairness Timeout param added to getLock() method --- src/Common/RWLock.cpp | 295 ++++++++++++++++++++++-------------------- src/Common/RWLock.h | 45 ++++--- 2 files changed, 185 insertions(+), 155 deletions(-) diff --git a/src/Common/RWLock.cpp b/src/Common/RWLock.cpp index 5dfc1b55c63..a282c1c6a91 100644 --- a/src/Common/RWLock.cpp +++ b/src/Common/RWLock.cpp @@ -29,19 +29,17 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int DEADLOCK_AVOIDED; } -/** A single-use object that represents lock's ownership +/** A one-time-use-object that represents lock ownership * For the purpose of exception safety guarantees LockHolder is to be used in two steps: - * 1. Create an instance (allocating all the memory needed) + * 1. Create an instance (allocating all the needed memory) * 2. Associate the instance with the lock (attach to the lock and locking request group) */ class RWLockImpl::LockHolderImpl { bool bound{false}; - Type lock_type; String query_id; CurrentMetrics::Increment active_client_increment; RWLock parent; @@ -53,24 +51,30 @@ public: /// Implicit memory allocation for query_id is done here LockHolderImpl(const String & query_id_, Type type) - : lock_type{type}, query_id{query_id_}, - active_client_increment{ + : query_id{query_id_} + , active_client_increment{ type == Type::Read ? CurrentMetrics::RWLockActiveReaders : CurrentMetrics::RWLockActiveWriters} { } - ~LockHolderImpl(); + ~LockHolderImpl() + { + if (bound && parent != nullptr) + parent->unlock(it_group, query_id); + else + active_client_increment.destroy(); + } private: /// A separate method which binds the lock holder to the owned lock /// N.B. It is very important that this method produces no allocations bool bindWith(RWLock && parent_, GroupsContainer::iterator it_group_) noexcept { - if (bound) + if (bound || parent_ == nullptr) return false; it_group = it_group_; parent = std::move(parent_); - ++it_group->refererrs; + ++it_group->requests; bound = true; return true; } @@ -79,56 +83,27 @@ private: }; -namespace -{ - /// Global information about all read locks that query has. It is needed to avoid some type of deadlocks. - - class QueryLockInfo - { - private: - mutable std::mutex mutex; - std::map queries; - - public: - void add(const String & query_id) - { - std::lock_guard lock(mutex); - - const auto res = queries.emplace(query_id, 1); // may throw - if (!res.second) - ++res.first->second; - } - - void remove(const String & query_id) noexcept - { - std::lock_guard lock(mutex); - - const auto query_it = queries.find(query_id); - if (query_it != queries.cend() && --query_it->second == 0) - queries.erase(query_it); - } - - void check(const String & query_id) const - { - std::lock_guard lock(mutex); - - if (queries.find(query_id) != queries.cend()) - throw Exception("Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); - } - }; - - QueryLockInfo all_read_locks; -} - - -/** To guarantee that we do not get any piece of our data corrupted: +/** General algorithm: + * Step 1. Try the FastPath (for both Reads/Writes) + * Step 2. Find ourselves request group: attach to existing or create a new one + * Step 3. Wait/timed wait for ownership signal + * Step 3a. Check if we must handle timeout and exit + * Step 4. Persist lock ownership + * + * To guarantee that we do not get any piece of our data corrupted: * 1. Perform all actions that include allocations before changing lock's internal state * 2. Roll back any changes that make the state inconsistent * * Note: "SM" in the commentaries below stands for STATE MODIFICATION */ -RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id) +RWLockImpl::LockHolder +RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & lock_timeout_ms) { + const auto lock_deadline_tp = + (lock_timeout_ms == std::chrono::milliseconds(0)) + ? std::chrono::time_point::max() + : std::chrono::steady_clock::now() + lock_timeout_ms; + const bool request_has_query_id = query_id != NO_QUERY; Stopwatch watch(CLOCK_MONOTONIC_COARSE); @@ -145,100 +120,111 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & /// This object is placed above unique_lock, because it may lock in destructor. auto lock_holder = std::make_shared(query_id, type); - std::unique_lock lock(mutex); + std::unique_lock state_lock(internal_state_mtx); /// The FastPath: /// Check if the same query_id already holds the required lock in which case we can proceed without waiting if (request_has_query_id) { - const auto it_query = owner_queries.find(query_id); - if (it_query != owner_queries.end()) + const auto owner_query_it = owner_queries.find(query_id); + if (owner_query_it != owner_queries.end()) { - const auto current_owner_group = queue.begin(); + if (wrlock_owner != writers_queue.end()) + throw Exception( + "RWLockImpl::getLock(): RWLock is already locked in exclusive mode", + ErrorCodes::LOGICAL_ERROR); - /// XXX: it means we can't upgrade lock from read to write! + /// Lock upgrading is not supported if (type == Write) throw Exception( "RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked", ErrorCodes::LOGICAL_ERROR); - if (current_owner_group->type == Write) - throw Exception( - "RWLockImpl::getLock(): RWLock is already locked in exclusive mode", - ErrorCodes::LOGICAL_ERROR); - /// N.B. Type is Read here, query_id is not empty and it_query is a valid iterator - all_read_locks.add(query_id); /// SM1: may throw on insertion (nothing to roll back) - ++it_query->second; /// SM2: nothrow - lock_holder->bindWith(shared_from_this(), current_owner_group); /// SM3: nothrow + ++owner_query_it->second; /// SM1: nothrow + lock_holder->bindWith(shared_from_this(), rdlock_owner); /// SM2: nothrow finalize_metrics(); return lock_holder; } } - /** If the query already has any active read lock and tries to acquire another read lock - * but it is not in front of the queue and has to wait, deadlock is possible: - * - * Example (four queries, two RWLocks - 'a' and 'b'): - * - * --> time --> - * - * q1: ra rb - * q2: wa - * q3: rb ra - * q4: wb - * - * We will throw an exception instead. - */ - - if (type == Type::Write || queue.empty() || queue.back().type == Type::Write) + if (type == Type::Write) { - if (type == Type::Read && request_has_query_id && !queue.empty()) - all_read_locks.check(query_id); - - /// Create a new group of locking requests - queue.emplace_back(type); /// SM1: may throw (nothing to roll back) + writers_queue.emplace_back(type); /// SM1: may throw (nothing to roll back) } - else if (request_has_query_id && queue.size() > 1) - all_read_locks.check(query_id); + else if (readers_queue.empty() || + (rdlock_owner == readers_queue.begin() && !writers_queue.empty())) + { + readers_queue.emplace_back(type); /// SM1: may throw (nothing to roll back) + } + GroupsContainer::iterator it_group = + (type == Type::Write) ? std::prev(writers_queue.end()) : std::prev(readers_queue.end()); - GroupsContainer::iterator it_group = std::prev(queue.end()); + if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end()) + { + if (type == Type::Read) + { + rdlock_owner = it_group; /// SM2: nothrow + } + else + { + wrlock_owner = it_group; /// SM2: nothrow + } + } + else + { + /// Wait until our group becomes the lock owner + const auto predicate = [&] () { return it_group == (type == Read ? rdlock_owner : wrlock_owner); }; - /// We need to reference the associated group before waiting to guarantee - /// that this group does not get deleted prematurely - ++it_group->refererrs; + if (lock_deadline_tp == std::chrono::time_point::max()) + { + ++it_group->requests; + it_group->cv.wait(state_lock, predicate); + --it_group->requests; + } + else + { + ++it_group->requests; + const auto wait_result = it_group->cv.wait_until(state_lock, lock_deadline_tp, predicate); + --it_group->requests; - /// Wait a notification until we will be the only in the group. - it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); }); + /// Step 3a. Check if we must handle timeout and exit + if (!wait_result) /// Wait timed out! + { + if (it_group->requests == 0) + { + /// Roll back SM1 + if (type == Read) + { + readers_queue.erase(it_group); /// Rollback(SM1): nothrow + } + else + { + writers_queue.erase(it_group); /// Rollback(SM1): nothrow + } + } - --it_group->refererrs; + return nullptr; + } + } + } if (request_has_query_id) { try { - if (type == Type::Read) - all_read_locks.add(query_id); /// SM2: may throw on insertion - /// and is safe to roll back unconditionally const auto emplace_res = - owner_queries.emplace(query_id, 1); /// SM3: may throw on insertion + owner_queries.emplace(query_id, 1); /// SM2: may throw on insertion if (!emplace_res.second) - ++emplace_res.first->second; /// SM4: nothrow + ++emplace_res.first->second; /// SM3: nothrow } catch (...) { /// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety - /// We only need to roll back the changes to these objects: all_read_locks and the locking queue - if (type == Type::Read) - all_read_locks.remove(query_id); /// Rollback(SM2): nothrow - - if (it_group->refererrs == 0) - { - const auto next = queue.erase(it_group); /// Rollback(SM1): nothrow - if (next != queue.end()) - next->cv.notify_all(); - } + /// We only need to roll back the changes to these objects: owner_queries and the readers/writers queue + if (it_group->requests == 0) + erase_group(it_group); /// Rollback(SM1): nothrow throw; } @@ -251,10 +237,9 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & } -/** The sequence points of acquiring lock's ownership by an instance of LockHolderImpl: - * 1. all_read_locks is updated - * 2. owner_queries is updated - * 3. request group is updated by LockHolderImpl which in turn becomes "bound" +/** The sequence points of acquiring lock ownership by an instance of LockHolderImpl: + * 1. owner_queries is updated + * 2. request group is updated by LockHolderImpl which in turn becomes "bound" * * If by the time when destructor of LockHolderImpl is called the instance has been "bound", * it is guaranteed that all three steps have been executed successfully and the resulting state is consistent. @@ -262,38 +247,74 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & * * We do not employ try-catch: if something bad happens, there is nothing we can do =( */ -RWLockImpl::LockHolderImpl::~LockHolderImpl() +void RWLockImpl::unlock(GroupsContainer::iterator owner_group, const String & query_id) noexcept { - if (!bound || parent == nullptr) + std::lock_guard state_lock(internal_state_mtx); + + /// All of theses are Undefined behavior and nothing we can do! + if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end()) return; - - std::lock_guard lock(parent->mutex); - - /// The associated group must exist (and be the beginning of the queue?) - if (parent->queue.empty() || it_group != parent->queue.begin()) + if (rdlock_owner != readers_queue.end() && owner_group != rdlock_owner) + return; + if (wrlock_owner != writers_queue.end() && owner_group != wrlock_owner) return; /// If query_id is not empty it must be listed in parent->owner_queries - if (query_id != RWLockImpl::NO_QUERY) + if (query_id != NO_QUERY) { - const auto owner_it = parent->owner_queries.find(query_id); - if (owner_it != parent->owner_queries.end()) + const auto owner_query_it = owner_queries.find(query_id); + if (owner_query_it != owner_queries.end()) { - if (--owner_it->second == 0) /// SM: nothrow - parent->owner_queries.erase(owner_it); /// SM: nothrow - - if (lock_type == RWLockImpl::Read) - all_read_locks.remove(query_id); /// SM: nothrow + if (--owner_query_it->second == 0) /// SM: nothrow + owner_queries.erase(owner_query_it); /// SM: nothrow } } - /// If we are the last remaining referrer, remove the group and notify the next group - if (--it_group->refererrs == 0) /// SM: nothrow - { - const auto next = parent->queue.erase(it_group); /// SM: nothrow - if (next != parent->queue.end()) - next->cv.notify_all(); - } + /// If we are the last remaining referrer, remove this QNode and notify the next one + if (--owner_group->requests == 0) /// SM: nothrow + erase_group(owner_group); } + +void RWLockImpl::erase_group(GroupsContainer::iterator group_it) noexcept +{ + rdlock_owner = readers_queue.end(); + wrlock_owner = writers_queue.end(); + + if (group_it->type == Read) + { + readers_queue.erase(group_it); + /// Prepare next phase + if (!writers_queue.empty()) + { + wrlock_owner = writers_queue.begin(); + } + else + { + rdlock_owner = readers_queue.begin(); + } + } + else + { + writers_queue.erase(group_it); + /// Prepare next phase + if (!readers_queue.empty()) + { + rdlock_owner = readers_queue.begin(); + } + else + { + wrlock_owner = writers_queue.begin(); + } + } + + if (rdlock_owner != readers_queue.end()) + { + rdlock_owner->cv.notify_all(); + } + else if (wrlock_owner != writers_queue.end()) + { + wrlock_owner->cv.notify_one(); + } +} } diff --git a/src/Common/RWLock.h b/src/Common/RWLock.h index a7084720d6c..81b8551060a 100644 --- a/src/Common/RWLock.h +++ b/src/Common/RWLock.h @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -19,7 +20,8 @@ using RWLock = std::shared_ptr; /// Implements shared lock with FIFO service -/// Can be acquired recursively (several calls for the same query) in Read mode +/// (Phase Fair RWLock as suggested in https://www.cs.unc.edu/~anderson/papers/rtsj10-for-web.pdf) +/// Can be acquired recursively (for the same query) in Read mode /// /// NOTE: it is important to allow acquiring the same lock in Read mode without waiting if it is already /// acquired by another thread of the same query. Otherwise the following deadlock is possible: @@ -42,37 +44,44 @@ public: friend class LockHolderImpl; using LockHolder = std::shared_ptr; - /// Waits in the queue and returns appropriate lock - /// Empty query_id means the lock is acquired out of the query context (e.g. in a background thread). - LockHolder getLock(Type type, const String & query_id); + /// Empty query_id means the lock is acquired from outside of query context (e.g. in a background thread). + LockHolder getLock(Type type, const String & query_id, + const std::chrono::milliseconds & lock_timeout_ms = std::chrono::milliseconds(0)); /// Use as query_id to acquire a lock outside the query context. inline static const String NO_QUERY = String(); + inline static const auto default_locking_timeout = std::chrono::milliseconds(120000); private: - RWLockImpl() = default; - - struct Group; - using GroupsContainer = std::list; - using OwnerQueryIds = std::unordered_map; - - /// Group of locking requests that should be granted concurrently - /// i.e. a group can contain several readers, but only one writer + /// Group of locking requests that should be granted simultaneously + /// i.e. one or several readers or a single writer struct Group { const Type type; - size_t refererrs; + size_t requests; std::condition_variable cv; /// all locking requests of the group wait on this condvar - explicit Group(Type type_) : type{type_}, refererrs{0} {} + explicit Group(Type type_) : type{type_}, requests{0} {} }; - GroupsContainer queue; + using GroupsContainer = std::list; + using OwnerQueryIds = std::unordered_map; + +private: + mutable std::mutex internal_state_mtx; + + GroupsContainer readers_queue; + GroupsContainer writers_queue; + GroupsContainer::iterator rdlock_owner{readers_queue.end()}; /// equals to readers_queue.begin() in read phase + /// or readers_queue.end() otherwise + GroupsContainer::iterator wrlock_owner{writers_queue.end()}; /// equals to writers_queue.begin() in write phase + /// or writers_queue.end() otherwise OwnerQueryIds owner_queries; - mutable std::mutex mutex; +private: + RWLockImpl() = default; + void unlock(GroupsContainer::iterator group_it, const String & query_id) noexcept; + void erase_group(GroupsContainer::iterator group_it) noexcept; }; - - } From 09ce548376e8631cc19f5be00caea3c048f27b94 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 02:45:51 +0300 Subject: [PATCH 083/752] All locks in IStorage have timeouts now --- src/Storages/IStorage.cpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 4d916ca1b46..ee751be6f5a 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int TABLE_IS_DROPPED; extern const int NOT_IMPLEMENTED; + extern const int DEADLOCK_AVOIDED; } IStorage::IStorage(StorageID storage_id_, ColumnsDescription virtuals_) : storage_id(std::move(storage_id_)), virtuals(std::move(virtuals_)) @@ -314,12 +315,22 @@ bool IStorage::isVirtualColumn(const String & column_name) const return getColumns().get(column_name).is_virtual; } +RWLockImpl::LockHolder tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type, const String & query_id) +{ + auto lock_holder = rwlock->getLock(type, query_id, RWLockImpl::default_locking_timeout); + if (!lock_holder) + throw Exception( + "Locking attempt timed out! Possible deadlock avoided. Client should retry.", + ErrorCodes::DEADLOCK_AVOIDED); + return std::move(lock_holder); +} + TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) { TableStructureReadLockHolder result; if (will_add_new_data) - result.new_data_structure_lock = new_data_structure_lock->getLock(RWLockImpl::Read, query_id); - result.structure_lock = structure_lock->getLock(RWLockImpl::Read, query_id); + result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Read, query_id); + result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Read, query_id); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); @@ -329,7 +340,7 @@ TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_d TableStructureWriteLockHolder IStorage::lockAlterIntention(const String & query_id) { TableStructureWriteLockHolder result; - result.alter_intention_lock = alter_intention_lock->getLock(RWLockImpl::Write, query_id); + result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); @@ -342,20 +353,20 @@ void IStorage::lockStructureExclusively(TableStructureWriteLockHolder & lock_hol throw Exception("Alter intention lock for table " + getStorageID().getNameForLogs() + " was not taken. This is a bug.", ErrorCodes::LOGICAL_ERROR); if (!lock_holder.new_data_structure_lock) - lock_holder.new_data_structure_lock = new_data_structure_lock->getLock(RWLockImpl::Write, query_id); - lock_holder.structure_lock = structure_lock->getLock(RWLockImpl::Write, query_id); + lock_holder.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id); + lock_holder.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id); } TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id) { TableStructureWriteLockHolder result; - result.alter_intention_lock = alter_intention_lock->getLock(RWLockImpl::Write, query_id); + result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); - result.new_data_structure_lock = new_data_structure_lock->getLock(RWLockImpl::Write, query_id); - result.structure_lock = structure_lock->getLock(RWLockImpl::Write, query_id); + result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id); + result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id); return result; } From 081187dd1fec6a231b5e8334ff24321226865d79 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 03:06:48 +0300 Subject: [PATCH 084/752] Unit test for RWLock is set to use timeout for avoiding deadlocks --- src/Common/tests/gtest_rw_lock.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index facd7a33c1c..73987a25508 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -153,7 +153,7 @@ TEST(Common, RWLockDeadlock) usleep(100000); try { - auto holder2 = lock2->getLock(RWLockImpl::Read, "q1"); + auto holder2 = lock2->getLock(RWLockImpl::Read, "q1", std::chrono::milliseconds(100)); if (!holder2) { throw Exception( @@ -184,7 +184,7 @@ TEST(Common, RWLockDeadlock) usleep(100000); try { - auto holder1 = lock1->getLock(RWLockImpl::Read, "q3"); + auto holder1 = lock1->getLock(RWLockImpl::Read, "q3", std::chrono::milliseconds(100)); if (!holder1) { throw Exception( From 2e76e4d1ed0e25282ea224794f1262be07b63709 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 10:15:59 +0300 Subject: [PATCH 085/752] Made clang happy - fixed build --- src/Storages/IStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index ee751be6f5a..345ac6d5aac 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -322,7 +322,7 @@ RWLockImpl::LockHolder tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type throw Exception( "Locking attempt timed out! Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); - return std::move(lock_holder); + return lock_holder; } TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) From 78f97a7a59fdf0053e2d2e9f235f7f8e5556f414 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 12:23:08 +0300 Subject: [PATCH 086/752] Build fixes --- src/Common/RWLock.cpp | 14 +++++++------- src/Common/RWLock.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/RWLock.cpp b/src/Common/RWLock.cpp index a282c1c6a91..d6b8cbd244f 100644 --- a/src/Common/RWLock.cpp +++ b/src/Common/RWLock.cpp @@ -224,7 +224,7 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c /// Methods std::list<>::emplace_back() and std::unordered_map<>::emplace() provide strong exception safety /// We only need to roll back the changes to these objects: owner_queries and the readers/writers queue if (it_group->requests == 0) - erase_group(it_group); /// Rollback(SM1): nothrow + eraseGroup(it_group); /// Rollback(SM1): nothrow throw; } @@ -247,16 +247,16 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c * * We do not employ try-catch: if something bad happens, there is nothing we can do =( */ -void RWLockImpl::unlock(GroupsContainer::iterator owner_group, const String & query_id) noexcept +void RWLockImpl::unlock(GroupsContainer::iterator group_it, const String & query_id) noexcept { std::lock_guard state_lock(internal_state_mtx); /// All of theses are Undefined behavior and nothing we can do! if (rdlock_owner == readers_queue.end() && wrlock_owner == writers_queue.end()) return; - if (rdlock_owner != readers_queue.end() && owner_group != rdlock_owner) + if (rdlock_owner != readers_queue.end() && group_it != rdlock_owner) return; - if (wrlock_owner != writers_queue.end() && owner_group != wrlock_owner) + if (wrlock_owner != writers_queue.end() && group_it != wrlock_owner) return; /// If query_id is not empty it must be listed in parent->owner_queries @@ -271,12 +271,12 @@ void RWLockImpl::unlock(GroupsContainer::iterator owner_group, const String & qu } /// If we are the last remaining referrer, remove this QNode and notify the next one - if (--owner_group->requests == 0) /// SM: nothrow - erase_group(owner_group); + if (--group_it->requests == 0) /// SM: nothrow + eraseGroup(group_it); } -void RWLockImpl::erase_group(GroupsContainer::iterator group_it) noexcept +void RWLockImpl::eraseGroup(GroupsContainer::iterator group_it) noexcept { rdlock_owner = readers_queue.end(); wrlock_owner = writers_queue.end(); diff --git a/src/Common/RWLock.h b/src/Common/RWLock.h index 81b8551060a..edc22cffaa0 100644 --- a/src/Common/RWLock.h +++ b/src/Common/RWLock.h @@ -82,6 +82,6 @@ private: private: RWLockImpl() = default; void unlock(GroupsContainer::iterator group_it, const String & query_id) noexcept; - void erase_group(GroupsContainer::iterator group_it) noexcept; + void eraseGroup(GroupsContainer::iterator group_it) noexcept; }; } From 53b5dade5ed488f8be0f0b8e7a624be3e5bb206c Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Mon, 6 Apr 2020 22:30:16 +0900 Subject: [PATCH 087/752] Implement transform_null_in --- src/Core/Settings.h | 1 + src/Functions/in.cpp | 52 ++++++++--- src/Interpreters/ActionsVisitor.cpp | 16 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/NullableUtils.cpp | 9 +- src/Interpreters/NullableUtils.h | 2 +- src/Interpreters/Set.cpp | 19 ++-- src/Interpreters/Set.h | 10 +- src/Interpreters/SyntaxAnalyzer.cpp | 42 +++++++-- src/Interpreters/misc.h | 4 +- src/Storages/StorageSet.cpp | 6 +- .../01231_operator_null_in.reference | 54 +++++++++++ .../0_stateless/01231_operator_null_in.sql | 93 +++++++++++++++++++ 13 files changed, 267 insertions(+), 43 deletions(-) create mode 100644 tests/queries/0_stateless/01231_operator_null_in.reference create mode 100644 tests/queries/0_stateless/01231_operator_null_in.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 753231603b2..29bfa82d89b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -406,6 +406,7 @@ struct Settings : public SettingsCollection M(SettingBool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(SettingUInt64, max_parser_depth, 1000, "Maximum parser depth.", 0) \ M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ + M(SettingBool, transform_null_in, false, "Enable null verification of the 'IN' operator.", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 0b25ca201bb..a89535c675a 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -21,38 +21,62 @@ namespace ErrorCodes * notIn(x, set) - and NOT IN. */ -template +template struct FunctionInName; template <> -struct FunctionInName +struct FunctionInName { static constexpr auto name = "in"; }; template <> -struct FunctionInName +struct FunctionInName { static constexpr auto name = "globalIn"; }; template <> -struct FunctionInName +struct FunctionInName { static constexpr auto name = "notIn"; }; template <> -struct FunctionInName +struct FunctionInName { static constexpr auto name = "globalNotIn"; }; -template +template <> +struct FunctionInName +{ + static constexpr auto name = "nullIn"; +}; + +template <> +struct FunctionInName +{ + static constexpr auto name = "globalNullIn"; +}; + +template <> +struct FunctionInName +{ + static constexpr auto name = "notNullIn"; +}; + +template <> +struct FunctionInName +{ + static constexpr auto name = "globalNotNullIn"; +}; + +template class FunctionIn : public IFunction { public: - static constexpr auto name = FunctionInName::name; + static constexpr auto name = FunctionInName::name; static FunctionPtr create(const Context &) { return std::make_shared(); @@ -75,6 +99,8 @@ public: bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return null_is_skipped; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override { /// NOTE: after updating this code, check that FunctionIgnoreExceptNull returns the same type of column. @@ -123,10 +149,14 @@ public: void registerFunctionsIn(FunctionFactory & factory) { - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 4e008a81973..f7d64d54f27 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -64,7 +64,7 @@ static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypes } template -static Block createBlockFromCollection(const Collection & collection, const DataTypes & types) +static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, const Context & context) { size_t columns_num = types.size(); MutableColumns columns(columns_num); @@ -77,7 +77,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (columns_num == 1) { auto field = convertFieldToType(value, *types[0]); - if (!field.isNull()) + if (!field.isNull() || context.getSettingsRef().transform_null_in) columns[0]->insert(std::move(field)); } else @@ -100,7 +100,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data for (; i < tuple_size; ++i) { tuple_values[i] = convertFieldToType(tuple[i], *types[i]); - if (tuple_values[i].isNull()) + if (tuple_values[i].isNull() && !context.getSettingsRef().transform_null_in) break; } @@ -170,23 +170,23 @@ SetPtr makeExplicitSet( if (left_type_depth == right_type_depth) { Array array{right_arg_value}; - block = createBlockFromCollection(array, set_element_types); + block = createBlockFromCollection(array, set_element_types, context); } /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. else if (left_type_depth + 1 == right_type_depth) { auto type_index = right_arg_type->getTypeId(); if (type_index == TypeIndex::Tuple) - block = createBlockFromCollection(DB::get(right_arg_value), set_element_types); + block = createBlockFromCollection(DB::get(right_arg_value), set_element_types, context); else if (type_index == TypeIndex::Array) - block = createBlockFromCollection(DB::get(right_arg_value), set_element_types); + block = createBlockFromCollection(DB::get(right_arg_value), set_element_types, context); else throw_unsupported_type(right_arg_type); } else throw_unsupported_type(right_arg_type); - SetPtr set = std::make_shared(size_limits, create_ordered_set); + SetPtr set = std::make_shared(size_limits, create_ordered_set, context); set->setHeader(block); set->insertFromBlock(block); @@ -654,7 +654,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su return subquery_for_set.set; } - SetPtr set = std::make_shared(data.set_size_limit, false); + SetPtr set = std::make_shared(data.set_size_limit, false, data.context); /** The following happens for GLOBAL INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d1be66df217..d0b44b91af7 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -291,7 +291,7 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, {}, query_options); BlockIO res = interpreter_subquery->execute(); - SetPtr set = std::make_shared(settings.size_limits_for_set, true); + SetPtr set = std::make_shared(settings.size_limits_for_set, true, context); set->setHeader(res.in->getHeader()); res.in->readPrefix(); diff --git a/src/Interpreters/NullableUtils.cpp b/src/Interpreters/NullableUtils.cpp index fe2801f5d11..5c0202d1de3 100644 --- a/src/Interpreters/NullableUtils.cpp +++ b/src/Interpreters/NullableUtils.cpp @@ -5,7 +5,7 @@ namespace DB { -ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map) +ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map, bool exact_null) { ColumnPtr null_map_holder; @@ -38,7 +38,12 @@ ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullM PaddedPODArray & mutable_null_map = assert_cast(*mutable_null_map_holder).getData(); const PaddedPODArray & other_null_map = column_nullable->getNullMapData(); for (size_t i = 0, size = mutable_null_map.size(); i < size; ++i) - mutable_null_map[i] |= other_null_map[i]; + { + if (exact_null) + mutable_null_map[i] &= other_null_map[i]; + else + mutable_null_map[i] |= other_null_map[i]; + } null_map_holder = std::move(mutable_null_map_holder); } diff --git a/src/Interpreters/NullableUtils.h b/src/Interpreters/NullableUtils.h index ee3193919cd..054835f8bef 100644 --- a/src/Interpreters/NullableUtils.h +++ b/src/Interpreters/NullableUtils.h @@ -8,6 +8,6 @@ namespace DB * In 'null_map' return a map of positions where at least one column was NULL. * @returns ownership column of null_map. */ -ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map); +ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map, bool exact_null = false); } diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 3c79ea5174d..e63eff37047 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -87,6 +87,8 @@ void NO_INLINE Set::insertFromBlockImplCase( { if ((*null_map)[i]) { + has_null = true; + if constexpr (build_filter) { (*out_filter)[i] = false; @@ -138,7 +140,7 @@ void Set::setHeader(const Block & header) /// We will insert to the Set only keys, where all components are not NULL. ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, context.getSettingsRef().transform_null_in); if (fill_set_elements) { @@ -228,7 +230,7 @@ static Field extractValueFromNode(const ASTPtr & node, const IDataType & type, c throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); } -void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & context) +void Set::createFromAST(const DataTypes & types, ASTPtr node) { /// Will form a block with values from the set. @@ -249,7 +251,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co { Field value = extractValueFromNode(elem, *types[0], context); - if (!value.isNull()) + if (!value.isNull() || context.getSettingsRef().transform_null_in) columns[0]->insert(value); } else if (const auto * func = elem->as()) @@ -284,7 +286,7 @@ void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & co : extractValueFromNode(func->arguments->children[i], *types[i], context); /// If at least one of the elements of the tuple has an impossible (outside the range of the type) value, then the entire tuple too. - if (value.isNull()) + if (value.isNull() && !context.getSettings().transform_null_in) break; tuple_values[i] = value; @@ -348,7 +350,7 @@ ColumnPtr Set::execute(const Block & block, bool negative) const /// We will check existence in Set only for keys, where all components are not NULL. ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, context.getSettingsRef().transform_null_in); executeOrdinary(key_columns, vec_res, negative, null_map); @@ -390,7 +392,12 @@ void NO_INLINE Set::executeImplCase( for (size_t i = 0; i < rows; ++i) { if (has_null_map && (*null_map)[i]) - vec_res[i] = negative; + { + if (has_null) + vec_res[i] = !negative; + else + vec_res[i] = negative; + } else { auto find_result = state.findKey(method.data, i, pool); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index c9605d4e11e..da20ffc41b6 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -30,9 +30,9 @@ public: /// (that is useful only for checking that some value is in the set and may not store the original values), /// store all set elements in explicit form. /// This is needed for subsequent use for index. - Set(const SizeLimits & limits_, bool fill_set_elements_) + Set(const SizeLimits & limits_, bool fill_set_elements_, const Context & context_) : log(&Logger::get("Set")), - limits(limits_), fill_set_elements(fill_set_elements_) + limits(limits_), fill_set_elements(fill_set_elements_), context(context_) { } @@ -45,7 +45,7 @@ public: * 'types' - types of what are on the left hand side of IN. * 'node' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). */ - void createFromAST(const DataTypes & types, ASTPtr node, const Context & context); + void createFromAST(const DataTypes & types, ASTPtr node); /** Create a Set from stream. * Call setHeader, then call insertFromBlock for each block. @@ -113,6 +113,10 @@ private: /// Do we need to additionally store all elements of the set in explicit form for subsequent use for index. bool fill_set_elements; + const Context & context; + + bool has_null = false; + /// Check if set contains all the data. bool is_created = false; diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index f93d11fa1da..a1560e26367 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -60,25 +60,40 @@ namespace using LogAST = DebugASTLog; /// set to true to enable logs -/// Select implementation of countDistinct based on settings. +/// Select implementation of a function based on settings. /// Important that it is done as query rewrite. It means rewritten query /// will be sent to remote servers during distributed query execution, /// and on all remote servers, function implementation will be same. +template struct CustomizeFunctionsData { using TypeToVisit = ASTFunction; - const String & count_distinct; + const String & customized_func_name; void visit(ASTFunction & func, ASTPtr &) { - if (Poco::toLower(func.name) == "countdistinct") - func.name = count_distinct; + if (Poco::toLower(func.name) == func_name) + { + func.name = customized_func_name; + } } }; -using CustomizeFunctionsMatcher = OneTypeMatcher; -using CustomizeFunctionsVisitor = InDepthNodeVisitor; +char countdistinct[] = "countdistinct"; +using CustomizeFunctionsVisitor = InDepthNodeVisitor>, true>; + +char in[] = "in"; +using CustomizeInVisitor = InDepthNodeVisitor>, true>; + +char notIn[] = "notin"; +using CustomizeNotInVisitor = InDepthNodeVisitor>, true>; + +char globalIn[] = "globalin"; +using CustomizeGlobalInVisitor = InDepthNodeVisitor>, true>; + +char globalNotIn[] = "globalnotin"; +using CustomizeGlobalNotInVisitor = InDepthNodeVisitor>, true>; /// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. @@ -889,6 +904,21 @@ void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings CustomizeFunctionsVisitor::Data data{settings.count_distinct_implementation}; CustomizeFunctionsVisitor(data).visit(query); + if (settings.transform_null_in) + { + CustomizeInVisitor::Data data_null_in{"nullIn"}; + CustomizeInVisitor(data_null_in).visit(query); + + CustomizeNotInVisitor::Data data_not_null_in{"notNullIn"}; + CustomizeNotInVisitor(data_not_null_in).visit(query); + + CustomizeGlobalInVisitor::Data data_global_null_in{"globalNullIn"}; + CustomizeGlobalInVisitor(data_global_null_in).visit(query); + + CustomizeGlobalNotInVisitor::Data data_global_not_null_in{"globalNotNullIn"}; + CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); + } + /// Creates a dictionary `aliases`: alias -> ASTPtr QueryAliasesVisitor(aliases).visit(query); diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index e2f34375dc0..0fd0e12a4bb 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -5,12 +5,12 @@ namespace DB inline bool functionIsInOperator(const std::string & name) { - return name == "in" || name == "notIn"; + return name == "in" || name == "notIn" || name == "nullIn" || name == "notNullIn"; } inline bool functionIsInOrGlobalInOperator(const std::string & name) { - return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn"; + return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn" || name == "globalNullIn" || name == "globalNotNullIn"; } inline bool functionIsLikeOperator(const std::string & name) diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 72ae46787c8..45e1f81b487 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -112,7 +112,7 @@ StorageSet::StorageSet( const ConstraintsDescription & constraints_, const Context & context_) : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_}, - set(std::make_shared(SizeLimits(), false)) + set(std::make_shared(SizeLimits(), false, context_)) { Block header = getSampleBlock(); header = header.sortColumns(); @@ -127,7 +127,7 @@ void StorageSet::finishInsert() { set->finishInsert(); } size_t StorageSet::getSize() const { return set->getTotalRowCount(); } -void StorageSet::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageSet::truncate(const ASTPtr &, const Context & context, TableStructureWriteLockHolder &) { Poco::File(path).remove(true); Poco::File(path).createDirectories(); @@ -137,7 +137,7 @@ void StorageSet::truncate(const ASTPtr &, const Context &, TableStructureWriteLo header = header.sortColumns(); increment = 0; - set = std::make_shared(SizeLimits(), false); + set = std::make_shared(SizeLimits(), false, context); set->setHeader(header); } diff --git a/tests/queries/0_stateless/01231_operator_null_in.reference b/tests/queries/0_stateless/01231_operator_null_in.reference new file mode 100644 index 00000000000..7432b657191 --- /dev/null +++ b/tests/queries/0_stateless/01231_operator_null_in.reference @@ -0,0 +1,54 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01231_operator_null_in.sql b/tests/queries/0_stateless/01231_operator_null_in.sql new file mode 100644 index 00000000000..12361373001 --- /dev/null +++ b/tests/queries/0_stateless/01231_operator_null_in.sql @@ -0,0 +1,93 @@ +DROP TABLE IF EXISTS null_in; +CREATE TABLE null_in (dt DateTime, idx int, i Nullable(int), s Nullable(String)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; + +INSERT INTO null_in VALUES (1, 1, 1, '1') (2, 2, NULL, NULL) (3, 3, 3, '3') (4, 4, NULL, NULL) (5, 5, 5, '5'); + +SELECT count() == 2 FROM null_in WHERE i in (1, 3, NULL); +SELECT count() == 2 FROM null_in WHERE i in range(4); +SELECT count() == 2 FROM null_in WHERE s in ('1', '3', NULL); +SELECT count() == 2 FROM null_in WHERE i global in (1, 3, NULL); +SELECT count() == 2 FROM null_in WHERE i global in range(4); +SELECT count() == 2 FROM null_in WHERE s global in ('1', '3', NULL); + +SELECT count() == 1 FROM null_in WHERE i not in (1, 3, NULL); +SELECT count() == 1 FROM null_in WHERE i not in range(4); +SELECT count() == 1 FROM null_in WHERE s not in ('1', '3', NULL); +SELECT count() == 1 FROM null_in WHERE i global not in (1, 3, NULL); +SELECT count() == 1 FROM null_in WHERE i global not in range(4); +SELECT count() == 1 FROM null_in WHERE s global not in ('1', '3', NULL); + +SET transform_null_in = 1; + +SELECT count() == 4 FROM null_in WHERE i in (1, 3, NULL); +SELECT count() == 2 FROM null_in WHERE i in range(4); +SELECT count() == 4 FROM null_in WHERE s in ('1', '3', NULL); +SELECT count() == 4 FROM null_in WHERE i global in (1, 3, NULL); +SELECT count() == 2 FROM null_in WHERE i global in range(4); +SELECT count() == 4 FROM null_in WHERE s global in ('1', '3', NULL); + +SELECT count() == 1 FROM null_in WHERE i not in (1, 3, NULL); +SELECT count() == 3 FROM null_in WHERE i not in range(4); +SELECT count() == 1 FROM null_in WHERE s not in ('1', '3', NULL); +SELECT count() == 1 FROM null_in WHERE i global not in (1, 3, NULL); +SELECT count() == 3 FROM null_in WHERE i global not in range(4); +SELECT count() == 1 FROM null_in WHERE s global not in ('1', '3', NULL); + +SELECT count() == 3 FROM null_in WHERE i not in (1, 3); +SELECT count() == 3 FROM null_in WHERE i not in range(4); +SELECT count() == 3 FROM null_in WHERE s not in ('1', '3'); +SELECT count() == 3 FROM null_in WHERE i global not in (1, 3); +SELECT count() == 3 FROM null_in WHERE i global not in range(4); +SELECT count() == 3 FROM null_in WHERE s global not in ('1', '3'); + +DROP TABLE IF EXISTS null_in; + +DROP TABLE IF EXISTS null_in_subquery; +CREATE TABLE null_in_subquery (dt DateTime, idx int, i Nullable(UInt64)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +INSERT INTO null_in_subquery SELECT number % 3, number, number FROM system.numbers LIMIT 99999; + +SELECT count() == 33333 FROM null_in_subquery WHERE i in (SELECT i FROM null_in_subquery WHERE dt = 0); +SELECT count() == 66666 FROM null_in_subquery WHERE i not in (SELECT i FROM null_in_subquery WHERE dt = 1); +SELECT count() == 33333 FROM null_in_subquery WHERE i global in (SELECT i FROM null_in_subquery WHERE dt = 2); +SELECT count() == 66666 FROM null_in_subquery WHERE i global not in (SELECT i FROM null_in_subquery WHERE dt = 0); + +-- For index column +SELECT count() == 33333 FROM null_in_subquery WHERE idx in (SELECT idx FROM null_in_subquery WHERE dt = 0); +SELECT count() == 66666 FROM null_in_subquery WHERE idx not in (SELECT idx FROM null_in_subquery WHERE dt = 1); +SELECT count() == 33333 FROM null_in_subquery WHERE idx global in (SELECT idx FROM null_in_subquery WHERE dt = 2); +SELECT count() == 66666 FROM null_in_subquery WHERE idx global not in (SELECT idx FROM null_in_subquery WHERE dt = 0); + +INSERT INTO null_in_subquery VALUES (0, 123456780, NULL); +INSERT INTO null_in_subquery VALUES (1, 123456781, NULL); + +SELECT count() == 33335 FROM null_in_subquery WHERE i in (SELECT i FROM null_in_subquery WHERE dt = 0); +SELECT count() == 66666 FROM null_in_subquery WHERE i not in (SELECT i FROM null_in_subquery WHERE dt = 1); +SELECT count() == 33333 FROM null_in_subquery WHERE i in (SELECT i FROM null_in_subquery WHERE dt = 2); +SELECT count() == 66668 FROM null_in_subquery WHERE i not in (SELECT i FROM null_in_subquery WHERE dt = 2); +SELECT count() == 33335 FROM null_in_subquery WHERE i global in (SELECT i FROM null_in_subquery WHERE dt = 0); +SELECT count() == 66666 FROM null_in_subquery WHERE i global not in (SELECT i FROM null_in_subquery WHERE dt = 1); +SELECT count() == 33333 FROM null_in_subquery WHERE i global in (SELECT i FROM null_in_subquery WHERE dt = 2); +SELECT count() == 66668 FROM null_in_subquery WHERE i global not in (SELECT i FROM null_in_subquery WHERE dt = 2); + +DROP TABLE IF EXISTS null_in_subquery; + + +DROP TABLE IF EXISTS null_in_tuple; +CREATE TABLE null_in_tuple (dt DateTime, idx int, t Tuple(Nullable(UInt64), Nullable(String))) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; +INSERT INTO null_in_tuple VALUES (1, 1, (1, '1')) (2, 2, (2, NULL)) (3, 3, (NULL, '3')) (4, 4, (NULL, NULL)) + +SET transform_null_in = 0; + +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1')] FROM null_in_tuple WHERE t in ((1, '1'), (NULL, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3'), (NULL, NULL)] FROM null_in_tuple WHERE t not in ((1, '1'), (NULL, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1')] FROM null_in_tuple WHERE t global in ((1, '1'), (NULL, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3'), (NULL, NULL)] FROM null_in_tuple WHERE t global not in ((1, '1'), (NULL, NULL)); + +SET transform_null_in = 1; + +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1'), (NULL, NULL)] FROM null_in_tuple WHERE t in ((1, '1'), (NULL, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3')] FROM null_in_tuple WHERE t not in ((1, '1'), (NULL, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1'), (NULL, NULL)] FROM null_in_tuple WHERE t global in ((1, '1'), (NULL, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3')] FROM null_in_tuple WHERE t global not in ((1, '1'), (NULL, NULL)); + +DROP TABLE IF EXISTS null_in_subquery; From 621d26bcf7fb1d04c47144a2c9f4767a0c0b4f38 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 7 Apr 2020 14:34:35 +0300 Subject: [PATCH 088/752] Better timeout diagnostics message --- src/Common/RWLock.h | 2 +- src/Storages/IStorage.cpp | 11 ++++++++--- src/Storages/IStorage.h | 3 +++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Common/RWLock.h b/src/Common/RWLock.h index edc22cffaa0..43366192cf8 100644 --- a/src/Common/RWLock.h +++ b/src/Common/RWLock.h @@ -50,7 +50,7 @@ public: /// Use as query_id to acquire a lock outside the query context. inline static const String NO_QUERY = String(); - inline static const auto default_locking_timeout = std::chrono::milliseconds(120000); + inline static const auto default_locking_timeout_ms = std::chrono::milliseconds(120000); private: /// Group of locking requests that should be granted simultaneously diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 345ac6d5aac..ab3a750db16 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -315,13 +315,18 @@ bool IStorage::isVirtualColumn(const String & column_name) const return getColumns().get(column_name).is_virtual; } -RWLockImpl::LockHolder tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type, const String & query_id) +RWLockImpl::LockHolder IStorage::tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type, const String & query_id) { - auto lock_holder = rwlock->getLock(type, query_id, RWLockImpl::default_locking_timeout); + auto lock_holder = rwlock->getLock(type, query_id, RWLockImpl::default_locking_timeout_ms); if (!lock_holder) + { + const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE"; throw Exception( - "Locking attempt timed out! Possible deadlock avoided. Client should retry.", + type_str + " locking attempt on \"" + getStorageID().getFullTableName() + + "\" has timed out! (" + toString(RWLockImpl::default_locking_timeout_ms.count()) + "ms) " + "Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); + } return lock_holder; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index d3cede6e5c8..581fc8a67e7 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -195,6 +195,9 @@ private: IndicesDescription indices; ConstraintsDescription constraints; +private: + RWLockImpl::LockHolder tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type, const String & query_id); + public: /// Acquire this lock if you need the table structure to remain constant during the execution of /// the query. If will_add_new_data is true, this means that the query will add new data to the table From 3bf67b1380688be5fb48bdc004e1eede9e8fd7bd Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 7 Apr 2020 15:02:07 +0300 Subject: [PATCH 089/752] typo --- src/Interpreters/InterserverIOHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index ca3a92df2e6..0e85f30c886 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -60,7 +60,7 @@ public: throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); } - bool removeEndpointIfExists(const String & name)>> + bool removeEndpointIfExists(const String & name) { std::lock_guard lock(mutex); LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime removeEndpointIfExists() " << name); From f8500cf5d58546752d9e10c636832c1caccdf52d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 8 Apr 2020 04:09:01 +0300 Subject: [PATCH 090/752] Fix unit test --- src/Common/parseGlobs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 31e7c2eb612..71ddbbd92ea 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -46,7 +46,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob char point; std::istringstream iss_range(buffer); iss_range >> range_begin >> point >> point >> range_end; - assert(iss_range.good()); + assert(!iss_range.fail()); bool leading_zeros = buffer[0] == '0'; size_t num_len = std::to_string(range_end).size(); if (leading_zeros) From 97c2d17e99a2ec9f59e9c1ff6b60e66d6f2b829c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 8 Apr 2020 05:32:33 +0300 Subject: [PATCH 091/752] ThreadFuzzer: do not wrap pthread functions under thread and memory sanitizers --- src/Common/ThreadFuzzer.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 42e133b4561..60766e32361 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -8,6 +8,7 @@ #include +#include #include #include @@ -18,8 +19,15 @@ #include + /// We will also wrap some thread synchronization functions to inject sleep/migration before or after. -#if defined(OS_LINUX) +#if defined(OS_LINUX) && !defined(THREAD_SANITIZER) && !defined(MEMORY_SANITIZER) + #define THREAD_FUZZER_WRAP_PTHREAD 1 +#else + #define THREAD_FUZZER_WRAP_PTHREAD 0 +#endif + +#if THREAD_FUZZER_WRAP_PTHREAD # define FOR_EACH_WRAPPED_FUNCTION(M) \ M(int, pthread_mutex_lock, pthread_mutex_t * arg) \ M(int, pthread_mutex_unlock, pthread_mutex_t * arg) @@ -66,7 +74,7 @@ static void initFromEnv(std::atomic & what, const char * name) static std::atomic num_cpus = 0; -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define DEFINE_WRAPPER_PARAMS(RET, NAME, ...) \ static std::atomic NAME##_before_yield_probability = 0; \ static std::atomic NAME##_before_migrate_probability = 0; \ @@ -97,7 +105,7 @@ void ThreadFuzzer::initConfiguration() initFromEnv(sleep_probability, "THREAD_FUZZER_SLEEP_PROBABILITY"); initFromEnv(sleep_time_us, "THREAD_FUZZER_SLEEP_TIME_US"); -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define INIT_WRAPPER_PARAMS(RET, NAME, ...) \ initFromEnv(NAME##_before_yield_probability, "THREAD_FUZZER_" #NAME "_BEFORE_YIELD_PROBABILITY"); \ initFromEnv(NAME##_before_migrate_probability, "THREAD_FUZZER_" #NAME "_BEFORE_MIGRATE_PROBABILITY"); \ @@ -118,7 +126,7 @@ void ThreadFuzzer::initConfiguration() bool ThreadFuzzer::isEffective() const { -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define CHECK_WRAPPER_PARAMS(RET, NAME, ...) \ if (NAME##_before_yield_probability.load(std::memory_order_relaxed)) \ return true; \ @@ -236,7 +244,7 @@ void ThreadFuzzer::setup() /// We expect that for every function like pthread_mutex_lock there is the same function with two underscores prefix. /// NOTE We cannot use dlsym(... RTLD_NEXT), because it will call pthread_mutex_lock and it will lead to infinite recursion. -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define MAKE_WRAPPER(RET, NAME, ...) \ extern "C" RET __##NAME(__VA_ARGS__); /* NOLINT */ \ extern "C" RET NAME(__VA_ARGS__) /* NOLINT */ \ From a6194364ea6f0ec3613f83188adf8ba889e40ae4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 8 Apr 2020 05:54:08 +0300 Subject: [PATCH 092/752] Update MergeTreeDataSelectExecutor.cpp --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 13e852765b7..816af8db3e9 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1204,7 +1204,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::vector ranges_stack = { {0, marks_count} }; std::function create_field_ref; - /// If there is no monotonic functions, there is no need to save block reference. + /// If there are no monotonic functions, there is no need to save block reference. /// Passing explicit field to FieldRef allows to optimize ranges and shows better performance. if (key_condition.hasMonotonicFunctionsChain()) { From 94a621060d8af8607e3604e7c65c7e0b718c8182 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 8 Apr 2020 05:55:03 +0300 Subject: [PATCH 093/752] Update KeyCondition.h --- src/Storages/MergeTree/KeyCondition.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 7c8b63eb800..4863ffa6f34 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -22,7 +22,7 @@ using FunctionBasePtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; -/** A field, that can be stored in two reperesenation: +/** A field, that can be stored in two reperesenations: * - A standalone field. * - A field with reference to it's position in block. * It's needed for execution functions on ranges during From 723a1f41e2516f5d17d03b7240accc3a8f07e4a5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 8 Apr 2020 05:55:22 +0300 Subject: [PATCH 094/752] Update KeyCondition.h --- src/Storages/MergeTree/KeyCondition.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 4863ffa6f34..db0f9e68c60 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -24,7 +24,7 @@ using ExpressionActionsPtr = std::shared_ptr; /** A field, that can be stored in two reperesenations: * - A standalone field. - * - A field with reference to it's position in block. + * - A field with reference to its position in block. * It's needed for execution functions on ranges during * index analysis. If function was executed once for field, * it's result would be cached for all block for which field's reference points to. From a42d875a68ae50358095a63355c323a35b199a48 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 8 Apr 2020 05:55:39 +0300 Subject: [PATCH 095/752] Update KeyCondition.h --- src/Storages/MergeTree/KeyCondition.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index db0f9e68c60..4c67ef258bf 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -27,7 +27,7 @@ using ExpressionActionsPtr = std::shared_ptr; * - A field with reference to its position in block. * It's needed for execution functions on ranges during * index analysis. If function was executed once for field, - * it's result would be cached for all block for which field's reference points to. + * its result would be cached for all block for which field's reference points to. */ struct FieldRef : public Field { From a46a61c970d40845fa49115eaa8c7f53d93651bf Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 8 Apr 2020 05:56:25 +0300 Subject: [PATCH 096/752] Update KeyCondition.h --- src/Storages/MergeTree/KeyCondition.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 4c67ef258bf..3a3768f0e4c 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -24,10 +24,10 @@ using ExpressionActionsPtr = std::shared_ptr; /** A field, that can be stored in two reperesenations: * - A standalone field. - * - A field with reference to its position in block. - * It's needed for execution functions on ranges during + * - A field with reference to its position in a block. + * It's needed for execution of functions on ranges during * index analysis. If function was executed once for field, - * its result would be cached for all block for which field's reference points to. + * its result would be cached for whole block for which field's reference points to. */ struct FieldRef : public Field { From 0b5cc8058094c4189ccc63c32c909c4096fadb42 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 7 Apr 2020 22:52:32 +0800 Subject: [PATCH 097/752] joinGetOrNull --- src/Functions/FunctionJoinGet.cpp | 33 +++++++++++++------ src/Functions/FunctionJoinGet.h | 9 +++-- src/Interpreters/HashJoin.cpp | 33 ++++++++++++------- src/Interpreters/HashJoin.h | 6 ++-- .../01240_join_get_or_null.reference | 1 + .../0_stateless/01240_join_get_or_null.sql | 7 ++++ 6 files changed, 62 insertions(+), 27 deletions(-) create mode 100644 tests/queries/0_stateless/01240_join_get_or_null.reference create mode 100644 tests/queries/0_stateless/01240_join_get_or_null.sql diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 1745343cc33..3e18d657ac0 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -60,7 +60,8 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co return std::make_pair(storage_join, attr_name); } -FunctionBaseImplPtr JoinGetOverloadResolver::build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const +template +FunctionBaseImplPtr JoinGetOverloadResolver::build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const { auto [storage_join, attr_name] = getJoin(arguments, context); auto join = storage_join->getJoin(); @@ -70,40 +71,52 @@ FunctionBaseImplPtr JoinGetOverloadResolver::build(const ColumnsWithTypeAndName for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; - auto return_type = join->joinGetReturnType(attr_name); - return std::make_unique(table_lock, storage_join, join, attr_name, data_types, return_type); + auto return_type = join->joinGetReturnType(attr_name, or_null); + return std::make_unique>(table_lock, storage_join, join, attr_name, data_types, return_type); } -DataTypePtr JoinGetOverloadResolver::getReturnType(const ColumnsWithTypeAndName & arguments) const +template +DataTypePtr JoinGetOverloadResolver::getReturnType(const ColumnsWithTypeAndName & arguments) const { auto [storage_join, attr_name] = getJoin(arguments, context); auto join = storage_join->getJoin(); - return join->joinGetReturnType(attr_name); + return join->joinGetReturnType(attr_name, or_null); } -void ExecutableFunctionJoinGet::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) +template +void ExecutableFunctionJoinGet::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { auto ctn = block.getByPosition(arguments[2]); if (isColumnConst(*ctn.column)) ctn.column = ctn.column->cloneResized(1); ctn.name = ""; // make sure the key name never collide with the join columns Block key_block = {ctn}; - join->joinGet(key_block, attr_name); + join->joinGet(key_block, attr_name, or_null); auto & result_ctn = key_block.getByPosition(1); if (isColumnConst(*ctn.column)) result_ctn.column = ColumnConst::create(result_ctn.column, input_rows_count); block.getByPosition(result) = result_ctn; } -ExecutableFunctionImplPtr FunctionJoinGet::prepare(const Block &, const ColumnNumbers &, size_t) const +template +ExecutableFunctionImplPtr FunctionJoinGet::prepare(const Block &, const ColumnNumbers &, size_t) const { - return std::make_unique(join, attr_name); + return std::make_unique>(join, attr_name); } void registerFunctionJoinGet(FunctionFactory & factory) { - factory.registerFunction(); + // joinGet + factory.registerFunction>(); + // joinGetOrNull + factory.registerFunction>(); } +template class ExecutableFunctionJoinGet; +template class ExecutableFunctionJoinGet; +template class FunctionJoinGet; +template class FunctionJoinGet; +template class JoinGetOverloadResolver; +template class JoinGetOverloadResolver; } diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index 42ff2b16217..f233ccd8a4f 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -9,13 +9,14 @@ class Context; class HashJoin; using HashJoinPtr = std::shared_ptr; +template class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl { public: ExecutableFunctionJoinGet(HashJoinPtr join_, String attr_name_) : join(std::move(join_)), attr_name(std::move(attr_name_)) {} - static constexpr auto name = "joinGet"; + static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } @@ -30,10 +31,11 @@ private: const String attr_name; }; +template class FunctionJoinGet final : public IFunctionBaseImpl { public: - static constexpr auto name = "joinGet"; + static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; FunctionJoinGet(TableStructureReadLockHolder table_lock_, StoragePtr storage_join_, HashJoinPtr join_, String attr_name_, @@ -63,10 +65,11 @@ private: DataTypePtr return_type; }; +template class JoinGetOverloadResolver final : public IFunctionOverloadResolverImpl { public: - static constexpr auto name = "joinGet"; + static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; static FunctionOverloadResolverImplPtr create(const Context & context) { return std::make_unique(context); } explicit JoinGetOverloadResolver(const Context & context_) : context(context_) {} diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index a3432ebebba..d8c0d239c96 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -681,12 +681,10 @@ public: type_name.reserve(num_columns_to_add); right_indexes.reserve(num_columns_to_add); - for (size_t i = 0; i < num_columns_to_add; ++i) + for (auto & src_column : block_with_columns_to_add) { - const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - - /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) + /// Don't insert column if it's in left block + if (!block.has(src_column.name)) addColumn(src_column); } @@ -1158,28 +1156,36 @@ static void checkTypeOfKey(const Block & block_left, const Block & block_right) } -DataTypePtr HashJoin::joinGetReturnType(const String & column_name) const +DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null) const { std::shared_lock lock(data->rwlock); if (!sample_block_with_columns_to_add.has(column_name)) throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR); - return sample_block_with_columns_to_add.getByName(column_name).type; + auto ctn = sample_block_with_columns_to_add.getByName(column_name); + if (or_null) + { + if (!ctn.type->canBeInsideNullable()) + throw Exception("Type " + ctn.type->getName() + "cannot be inside Nullable", ErrorCodes::LOGICAL_ERROR); + else + ctn.type = makeNullable(ctn.type); + } + return ctn.type; } template -void HashJoin::joinGetImpl(Block & block, const String & column_name, const Maps & maps_) const +void HashJoin::joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const { joinBlockImpl( - block, {block.getByPosition(0).name}, {sample_block_with_columns_to_add.getByName(column_name)}, maps_); + block, {block.getByPosition(0).name}, block_with_columns_to_add, maps_); } // TODO: support composite key // TODO: return multiple columns as named tuple // TODO: return array of values when strictness == ASTTableJoin::Strictness::All -void HashJoin::joinGet(Block & block, const String & column_name) const +void HashJoin::joinGet(Block & block, const String & column_name, bool or_null) const { std::shared_lock lock(data->rwlock); @@ -1188,10 +1194,15 @@ void HashJoin::joinGet(Block & block, const String & column_name) const checkTypeOfKey(block, right_table_keys); + auto ctn = sample_block_with_columns_to_add.getByName(column_name); + if (or_null) + ctn.type = makeNullable(ctn.type); + ctn.column = ctn.type->createColumn(); + if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) && kind == ASTTableJoin::Kind::Left) { - joinGetImpl(block, column_name, std::get(data->maps)); + joinGetImpl(block, {ctn}, std::get(data->maps)); } else throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 24ad2b871c9..b769cfc61c5 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -161,10 +161,10 @@ public: void joinBlock(Block & block, ExtraBlockPtr & not_processed) override; /// Infer the return type for joinGet function - DataTypePtr joinGetReturnType(const String & column_name) const; + DataTypePtr joinGetReturnType(const String & column_name, bool or_null) const; /// Used by joinGet function that turns StorageJoin into a dictionary - void joinGet(Block & block, const String & column_name) const; + void joinGet(Block & block, const String & column_name, bool or_null) const; /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later. */ @@ -382,7 +382,7 @@ private: void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; template - void joinGetImpl(Block & block, const String & column_name, const Maps & maps) const; + void joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); }; diff --git a/tests/queries/0_stateless/01240_join_get_or_null.reference b/tests/queries/0_stateless/01240_join_get_or_null.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/01240_join_get_or_null.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/01240_join_get_or_null.sql b/tests/queries/0_stateless/01240_join_get_or_null.sql new file mode 100644 index 00000000000..d1b9a07540a --- /dev/null +++ b/tests/queries/0_stateless/01240_join_get_or_null.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS join_test; + +CREATE TABLE join_test (id UInt16, num UInt16) engine = Join(ANY, LEFT, id); + +SELECT joinGetOrNull('join_test', 'num', 500); + +DROP TABLE join_test; From 3116b2d29b4008bd4a9784816986c9c35b03981e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 4 Apr 2020 02:05:16 +0300 Subject: [PATCH 098/752] Make SHOW CREATE TABLE multiline (because it more readable and also mysql like) --- src/Interpreters/InterpreterShowCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 8bee0b88fe8..4161b3500bd 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -73,7 +73,7 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() throw Exception("Unable to show the create query of " + show_query->table + ". Maybe it was created by the system.", ErrorCodes::THERE_IS_NO_QUERY); std::stringstream stream; - formatAST(*create_query, stream, false, true); + formatAST(*create_query, stream, false, false); String res = stream.str(); MutableColumnPtr column = ColumnString::create(); From ed25ac8b12bbd0a5fb9a4551bda52cd8e27a56d6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 8 Apr 2020 10:43:10 +0300 Subject: [PATCH 099/752] Rewrite access_control_path in path.xml Follow-up for: #9811 --- programs/server/config.d/path.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/server/config.d/path.xml b/programs/server/config.d/path.xml index 14b7deb9de0..8db1d18e8c7 100644 --- a/programs/server/config.d/path.xml +++ b/programs/server/config.d/path.xml @@ -3,4 +3,5 @@ ./tmp/ ./user_files/ ./format_schemas/ + ./access/ From 5b2b8d38fa50fd8b1f195c5890be8c103ad65b61 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 8 Apr 2020 11:41:13 +0300 Subject: [PATCH 100/752] Download part trough disk interface. --- src/Storages/MergeTree/DataPartsExchange.cpp | 29 ++++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 3 ++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6373c85a15d..4e40d4a5977 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -258,19 +258,20 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( size_t files; readBinary(files, in); + auto disk = reservation->getDisk(); + static const String TMP_PREFIX = "tmp_fetch_"; String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_; - String relative_part_path = String(to_detached ? "detached/" : "") + tmp_prefix + part_name; - String absolute_part_path = Poco::Path(data.getFullPathOnDisk(reservation->getDisk()) + relative_part_path + "/").absolute().toString(); - Poco::File part_file(absolute_part_path); + String part_relative_path = String(to_detached ? "detached/" : "") + tmp_prefix + part_name; + String part_download_path = data.getRelativeDataPath() + part_relative_path + "/"; - if (part_file.exists()) - throw Exception("Directory " + absolute_part_path + " already exists.", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + if (disk->exists(part_download_path)) + throw Exception("Directory " + fullPath(disk, part_download_path) + " already exists.", ErrorCodes::DIRECTORY_ALREADY_EXISTS); CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - part_file.createDirectory(); + disk->createDirectories(part_download_path); MergeTreeData::DataPart::Checksums checksums; for (size_t i = 0; i < files; ++i) @@ -283,21 +284,21 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( /// File must be inside "absolute_part_path" directory. /// Otherwise malicious ClickHouse replica may force us to write to arbitrary path. - String absolute_file_path = Poco::Path(absolute_part_path + file_name).absolute().toString(); - if (!startsWith(absolute_file_path, absolute_part_path)) - throw Exception("File path (" + absolute_file_path + ") doesn't appear to be inside part path (" + absolute_part_path + ")." + String absolute_file_path = Poco::Path(part_download_path + file_name).absolute().toString(); + if (!startsWith(absolute_file_path, part_download_path)) + throw Exception("File path (" + absolute_file_path + ") doesn't appear to be inside part path (" + part_download_path + ")." " This may happen if we are trying to download part from malicious replica or logical error.", ErrorCodes::INSECURE_PATH); - WriteBufferFromFile file_out(absolute_file_path); - HashingWriteBuffer hashing_out(file_out); + auto file_out = disk->writeFile(part_download_path + file_name); + HashingWriteBuffer hashing_out(*file_out); copyData(in, hashing_out, file_size, blocker.getCounter()); if (blocker.isCancelled()) { /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - part_file.remove(true); + disk->removeRecursive(part_download_path); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -305,7 +306,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( readPODBinary(expected_hash, in); if (expected_hash != hashing_out.getHash()) - throw Exception("Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path, + throw Exception("Checksum mismatch for file " + fullPath(disk, part_download_path + file_name) + " transferred from " + replica_path, ErrorCodes::CHECKSUM_DOESNT_MATCH); if (file_name != "checksums.txt" && @@ -315,7 +316,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( assertEOF(in); - MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, reservation->getDisk(), relative_part_path); + MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, reservation->getDisk(), part_relative_path); new_data_part->is_temp = true; new_data_part->modification_time = time(nullptr); new_data_part->loadColumnsChecksumsIndexes(true, false); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 125a90d26e0..eb2a0dd8774 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -625,6 +625,9 @@ public: return storage_settings.get(); } + /// Get relative table path + String getRelativeDataPath() const { return relative_data_path; } + /// Get table path on disk String getFullPathOnDisk(const DiskPtr & disk) const; From 9b1c5c8b51bcdc636bff338b4834ecdee476e183 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 8 Apr 2020 11:44:26 +0300 Subject: [PATCH 101/752] Fix regex for S3 URI to pass clang-tidy check. --- src/IO/S3Common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 19eb2b42360..3aca41a9c9a 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -108,7 +108,7 @@ namespace S3 /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access - static const RE2 virtual_hosted_style_pattern("(.+\\.)?s3[.\\-][a-z0-9\\-.]+"); + static const RE2 virtual_hosted_style_pattern(R"((.+\.)?s3[.\-][a-z0-9\-.]+)"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.Region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access From ded306f0168870a0792f48da8f991b5f7ba9458b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 8 Apr 2020 12:51:04 +0300 Subject: [PATCH 102/752] Fix adaptive granularity compatibility --- src/Storages/StorageReplicatedMergeTree.cpp | 9 +++- .../__init__.py | 0 .../test.py | 49 +++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_adaptive_granularity_different_settings/__init__.py create mode 100644 tests/integration/test_adaptive_granularity_different_settings/test.py diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 31456c8d1f1..8ce65aca3e0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -246,6 +246,11 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( createTableIfNotExists(); + /// We have to check granularity on other replicas. It it's fixed we + /// must create our new replica with fixed granularity and store this + /// information in /replica/metadata. + other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); + checkTableStructure(zookeeper_path); Coordination::Stat metadata_stat; @@ -256,11 +261,14 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } else { + /// In old tables this node may missing or be empty String replica_metadata; bool replica_metadata_exists = current_zookeeper->tryGet(replica_path + "/metadata", replica_metadata); if (!replica_metadata_exists || replica_metadata.empty()) { + /// We have to check shared node granularity before we create ours. + other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); ReplicatedMergeTreeTableMetadata current_metadata(*this); current_zookeeper->createOrUpdate(replica_path + "/metadata", current_metadata.toString(), zkutil::CreateMode::Persistent); } @@ -291,7 +299,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( createNewZooKeeperNodes(); - other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); } diff --git a/tests/integration/test_adaptive_granularity_different_settings/__init__.py b/tests/integration/test_adaptive_granularity_different_settings/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_adaptive_granularity_different_settings/test.py b/tests/integration/test_adaptive_granularity_different_settings/test.py new file mode 100644 index 00000000000..b066c437e06 --- /dev/null +++ b/tests/integration/test_adaptive_granularity_different_settings/test.py @@ -0,0 +1,49 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_attach_detach(start_cluster): + + node1.query(""" + CREATE TABLE test (key UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/test', '1') + ORDER BY tuple() + SETTINGS index_granularity_bytes = 0""") + + node1.query("INSERT INTO test VALUES (1), (2)") + + node2.query(""" + CREATE TABLE test (key UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/test', '2') + ORDER BY tuple()""") + + node2.query("INSERT INTO test VALUES (3), (4)") + + node1.query("SYSTEM SYNC REPLICA test") + node2.query("SYSTEM SYNC REPLICA test") + + assert node1.query("SELECT COUNT() FROM test") == "4\n" + assert node2.query("SELECT COUNT() FROM test") == "4\n" + + node1.query("DETACH TABLE test") + node2.query("DETACH TABLE test") + + node1.query("ATTACH TABLE test") + node2.query("ATTACH TABLE test") + + assert node1.query("SELECT COUNT() FROM test") == "4\n" + assert node2.query("SELECT COUNT() FROM test") == "4\n" From 4ec77fee8b5a49d2f98659a9c817a8c47bd866e9 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 8 Apr 2020 13:53:17 +0300 Subject: [PATCH 103/752] Integration test for ReplicatedMergeTree over S3. --- .../test_replicated_merge_tree_s3/__init__.py | 0 .../config.d/bg_processing_pool_conf.xml | 5 + .../configs/config.d/log_conf.xml | 12 ++ .../configs/config.d/storage_conf.xml | 28 +++++ .../configs/config.d/users.xml | 6 + .../configs/config.xml | 20 ++++ .../test_replicated_merge_tree_s3/test.py | 108 ++++++++++++++++++ 7 files changed, 179 insertions(+) create mode 100644 tests/integration/test_replicated_merge_tree_s3/__init__.py create mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.xml create mode 100644 tests/integration/test_replicated_merge_tree_s3/test.py diff --git a/tests/integration/test_replicated_merge_tree_s3/__init__.py b/tests/integration/test_replicated_merge_tree_s3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml new file mode 100644 index 00000000000..a756c4434ea --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml @@ -0,0 +1,5 @@ + + 0.5 + 0.5 + 0.5 + diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml new file mode 100644 index 00000000000..318a6bca95d --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml @@ -0,0 +1,12 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..5b292446c6b --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml @@ -0,0 +1,28 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+
diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml new file mode 100644 index 00000000000..a13b24b278d --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.xml new file mode 100644 index 00000000000..24b7344df3a --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.xml @@ -0,0 +1,20 @@ + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py new file mode 100644 index 00000000000..53eb612c281 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -0,0 +1,108 @@ +import logging +import random +import string +import time + +import pytest +from helpers.cluster import ClickHouseCluster + +logging.getLogger().setLevel(logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler()) + + +# Creates S3 bucket for tests and allows anonymous read-write access to it. +def prepare_s3_bucket(cluster): + minio_client = cluster.minio_client + + if minio_client.bucket_exists(cluster.minio_bucket): + minio_client.remove_bucket(cluster.minio_bucket) + + minio_client.make_bucket(cluster.minio_bucket) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + + cluster.add_instance("node1", config_dir="configs", with_minio=True, with_zookeeper=True) + cluster.add_instance("node2", config_dir="configs") + cluster.add_instance("node3", config_dir="configs") + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + prepare_s3_bucket(cluster) + logging.info("S3 bucket created") + + yield cluster + finally: + cluster.shutdown() + + +FILES_OVERHEAD = 1 +FILES_OVERHEAD_PER_COLUMN = 2 # Data and mark files +FILES_OVERHEAD_PER_PART = FILES_OVERHEAD_PER_COLUMN * 3 + 2 + 6 + + +def random_string(length): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(length)) + + +def generate_values(date_str, count, sign=1): + data = [[date_str, sign*(i + 1), random_string(10)] for i in range(count)] + data.sort(key=lambda tup: tup[1]) + return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data]) + + +def create_table(cluster): + create_table_statement = """ + CREATE TABLE s3_test ( + dt Date, + id Int64, + data String, + INDEX min_max (id) TYPE minmax GRANULARITY 3 + ) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/test_mutations', '{instance}') + PARTITION BY dt + ORDER BY (dt, id) + SETTINGS + old_parts_lifetime=0, index_granularity=512 + """ + + for node in cluster.instances: + node.query(create_table_statement) + + +@pytest.fixture(autouse=True) +def drop_table(cluster): + yield + for node in cluster.instances: + node.query("DROP TABLE IF EXISTS s3_test") + + minio = cluster.minio_client + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + + +def test_insert_select_replicated(cluster): + create_table(cluster) + + all_values = "" + for node_idx in range(1, 4): + node = cluster.instances["node" + str(node_idx)] + values = generate_values("2020-01-0" + str(node_idx), 4096) + node.query("INSERT INTO s3_test VALUES {}".format(values)) + if node_idx != 1: + all_values += "," + all_values += values + + # Wait for replication + time.sleep(10) + + for node_idx in range(1, 4): + node = cluster.instances["node" + str(node_idx)] + assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == all_values + + minio = cluster.minio_client + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 3 * (FILES_OVERHEAD + FILES_OVERHEAD_PER_PART * 3) From 89fe81ed62227f9643881389e8e393da25d19a98 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 8 Apr 2020 14:56:31 +0300 Subject: [PATCH 104/752] Integration test for ReplicatedMergeTree over S3 fixes. --- src/Storages/MergeTree/DataPartsExchange.cpp | 3 +-- .../configs/config.xml | 20 --------------- .../test_replicated_merge_tree_s3/test.py | 25 ++++++++----------- 3 files changed, 12 insertions(+), 36 deletions(-) delete mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.xml diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 4e40d4a5977..9ef7a4d37aa 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -254,7 +254,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( const ReservationPtr reservation, PooledReadWriteBufferFromHTTP & in) { - size_t files; readBinary(files, in); @@ -285,7 +284,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( /// File must be inside "absolute_part_path" directory. /// Otherwise malicious ClickHouse replica may force us to write to arbitrary path. String absolute_file_path = Poco::Path(part_download_path + file_name).absolute().toString(); - if (!startsWith(absolute_file_path, part_download_path)) + if (!startsWith(absolute_file_path, Poco::Path(part_download_path).absolute().toString())) throw Exception("File path (" + absolute_file_path + ") doesn't appear to be inside part path (" + part_download_path + ")." " This may happen if we are trying to download part from malicious replica or logical error.", ErrorCodes::INSECURE_PATH); diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.xml deleted file mode 100644 index 24b7344df3a..00000000000 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - 9000 - 127.0.0.1 - - - - true - none - - AcceptCertificateHandler - - - - - 500 - 5368709120 - ./clickhouse/ - users.xml - diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py index 53eb612c281..118a43a905e 100644 --- a/tests/integration/test_replicated_merge_tree_s3/test.py +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -25,9 +25,9 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node1", config_dir="configs", with_minio=True, with_zookeeper=True) - cluster.add_instance("node2", config_dir="configs") - cluster.add_instance("node3", config_dir="configs") + cluster.add_instance("node1", config_dir="configs", macros={'cluster': 'test1'}, with_minio=True, with_zookeeper=True) + cluster.add_instance("node2", config_dir="configs", macros={'cluster': 'test1'}, with_zookeeper=True) + cluster.add_instance("node3", config_dir="configs", macros={'cluster': 'test1'}, with_zookeeper=True) logging.info("Starting cluster...") cluster.start() @@ -64,25 +64,25 @@ def create_table(cluster): id Int64, data String, INDEX min_max (id) TYPE minmax GRANULARITY 3 - ) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/test_mutations', '{instance}') + ) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/s3', '{instance}') PARTITION BY dt ORDER BY (dt, id) SETTINGS old_parts_lifetime=0, index_granularity=512 """ - for node in cluster.instances: + for node in cluster.instances.values(): node.query(create_table_statement) @pytest.fixture(autouse=True) def drop_table(cluster): yield - for node in cluster.instances: - node.query("DROP TABLE IF EXISTS s3_test") + #for node in cluster.instances.values(): + # node.query("DROP TABLE IF EXISTS s3_test") - minio = cluster.minio_client - assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + #minio = cluster.minio_client + #assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 def test_insert_select_replicated(cluster): @@ -92,17 +92,14 @@ def test_insert_select_replicated(cluster): for node_idx in range(1, 4): node = cluster.instances["node" + str(node_idx)] values = generate_values("2020-01-0" + str(node_idx), 4096) - node.query("INSERT INTO s3_test VALUES {}".format(values)) + node.query("INSERT INTO s3_test VALUES {}".format(values), settings={"insert_quorum": 3}) if node_idx != 1: all_values += "," all_values += values - # Wait for replication - time.sleep(10) - for node_idx in range(1, 4): node = cluster.instances["node" + str(node_idx)] - assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == all_values + assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values", settings={"select_sequential_consistency": 1}) == all_values minio = cluster.minio_client assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 3 * (FILES_OVERHEAD + FILES_OVERHEAD_PER_PART * 3) From 3603c98c819857fd3b88faa403474bec32daff99 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 8 Apr 2020 14:59:24 +0300 Subject: [PATCH 105/752] Move gtest for S3 URI to proper folder. --- {dbms/src => src}/IO/tests/gtest_s3_uri.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {dbms/src => src}/IO/tests/gtest_s3_uri.cpp (100%) diff --git a/dbms/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp similarity index 100% rename from dbms/src/IO/tests/gtest_s3_uri.cpp rename to src/IO/tests/gtest_s3_uri.cpp From f1cb928737f137ecc747e7e53bafe71391ce3b3b Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 8 Apr 2020 13:27:46 +0800 Subject: [PATCH 106/752] more monotonicity for toString() --- src/Functions/FunctionHelpers.h | 6 ++++++ src/Functions/FunctionsConversion.h | 8 +++++--- .../01234_to_string_monotonic.reference | 2 ++ .../0_stateless/01234_to_string_monotonic.sql | 14 ++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01234_to_string_monotonic.reference create mode 100644 tests/queries/0_stateless/01234_to_string_monotonic.sql diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 00957935448..34aa0add6e1 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -24,6 +24,12 @@ const Type * checkAndGetDataType(const IDataType * data_type) return typeid_cast(data_type); } +template +bool checkDataTypes(const IDataType * data_type) +{ + return (... || typeid_cast(data_type)); +} + template const ColumnConst * checkAndGetColumnConst(const IColumn * column) { diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 100737b43c7..d201b967fb1 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1496,10 +1496,12 @@ struct ToStringMonotonicity IFunction::Monotonicity positive(true, true); IFunction::Monotonicity not_monotonic; - /// `toString` function is monotonous if the argument is Date or DateTime, or non-negative numbers with the same number of symbols. + auto type_ptr = &type; + if (auto * low_cardinality_type = checkAndGetDataType(type_ptr)) + type_ptr = low_cardinality_type->getDictionaryType().get(); - if (checkAndGetDataType(&type) - || typeid_cast(&type)) + /// `toString` function is monotonous if the argument is Date or DateTime or String, or non-negative numbers with the same number of symbols. + if (checkDataTypes(type_ptr)) return positive; if (left.isNull() || right.isNull()) diff --git a/tests/queries/0_stateless/01234_to_string_monotonic.reference b/tests/queries/0_stateless/01234_to_string_monotonic.reference new file mode 100644 index 00000000000..75404a347a4 --- /dev/null +++ b/tests/queries/0_stateless/01234_to_string_monotonic.reference @@ -0,0 +1,2 @@ +1234 +1234 diff --git a/tests/queries/0_stateless/01234_to_string_monotonic.sql b/tests/queries/0_stateless/01234_to_string_monotonic.sql new file mode 100644 index 00000000000..87324fdda27 --- /dev/null +++ b/tests/queries/0_stateless/01234_to_string_monotonic.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1 (s String) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1; +CREATE TABLE test2 (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1; + +INSERT INTO test1 SELECT toString(number) FROM numbers(10000); +INSERT INTO test2 SELECT toString(number) FROM numbers(10000); + +SELECT s FROM test1 WHERE toString(s) = '1234' SETTINGS max_rows_to_read = 2; +SELECT s FROM test2 WHERE toString(s) = '1234' SETTINGS max_rows_to_read = 2; + +DROP TABLE test1; +DROP TABLE test2; From eca6caa8db919f63d6dd61fc77e6f7dc67dad0f2 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Wed, 8 Apr 2020 15:48:16 +0300 Subject: [PATCH 107/752] Integration tests for MergeTree over S3 code cleanup. --- tests/integration/test_merge_tree_s3/test.py | 12 ++++++++---- .../configs/config.d/bg_processing_pool_conf.xml | 5 ----- .../configs/config.d/log_conf.xml | 12 ------------ .../configs/config.d/users.xml | 6 ------ .../test_replicated_merge_tree_s3/test.py | 15 +++++++++------ 5 files changed, 17 insertions(+), 33 deletions(-) delete mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml delete mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml delete mode 100644 tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index f69c09631e8..e12e31ebff2 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -84,7 +84,12 @@ def drop_table(cluster): minio = cluster.minio_client node.query("DROP TABLE IF EXISTS s3_test") - assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + try: + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + finally: + # Remove extra objects to prevent tests cascade failing + for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')): + minio.remove_object(cluster.minio_bucket, obj.object_name) @pytest.mark.parametrize( @@ -210,7 +215,7 @@ def test_attach_detach_partition(cluster): assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-04'") - node.query("SET allow_drop_detached=1; ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'") + node.query("ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1}) assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD @@ -245,8 +250,7 @@ def test_table_manipulations(cluster): assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE*2 node.query("RENAME TABLE s3_renamed TO s3_test") - # TODO: Doesn't work with min_max index. - #assert node.query("SET check_query_single_value_result='false'; CHECK TABLE s3_test FORMAT Values") == "(1)" + assert node.query("CHECK TABLE s3_test FORMAT Values") == "(1)" node.query("DETACH TABLE s3_test") node.query("ATTACH TABLE s3_test") diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml deleted file mode 100644 index a756c4434ea..00000000000 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/bg_processing_pool_conf.xml +++ /dev/null @@ -1,5 +0,0 @@ - - 0.5 - 0.5 - 0.5 - diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml deleted file mode 100644 index 318a6bca95d..00000000000 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/log_conf.xml +++ /dev/null @@ -1,12 +0,0 @@ - - 3 - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml deleted file mode 100644 index a13b24b278d..00000000000 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/users.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py index 118a43a905e..a8b7cf63e38 100644 --- a/tests/integration/test_replicated_merge_tree_s3/test.py +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -67,8 +67,6 @@ def create_table(cluster): ) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/s3', '{instance}') PARTITION BY dt ORDER BY (dt, id) - SETTINGS - old_parts_lifetime=0, index_granularity=512 """ for node in cluster.instances.values(): @@ -78,11 +76,16 @@ def create_table(cluster): @pytest.fixture(autouse=True) def drop_table(cluster): yield - #for node in cluster.instances.values(): - # node.query("DROP TABLE IF EXISTS s3_test") + for node in cluster.instances.values(): + node.query("DROP TABLE IF EXISTS s3_test") - #minio = cluster.minio_client - #assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + minio = cluster.minio_client + try: + assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 + finally: + # Remove extra objects to prevent tests cascade failing + for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')): + minio.remove_object(cluster.minio_bucket, obj.object_name) def test_insert_select_replicated(cluster): From 7376428a6b123ae05c1707b60c85c7ebdd1aa62e Mon Sep 17 00:00:00 2001 From: elenaspb2019 <47083263+elenaspb2019@users.noreply.github.com> Date: Wed, 8 Apr 2020 16:22:32 +0300 Subject: [PATCH 108/752] elenbaskakova-DOCSUP-780 (#87) * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * Update docs/en/operations/system_tables.md Co-Authored-By: BayoNet * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * Update docs/en/operations/system_tables.md Co-Authored-By: BayoNet * Update docs/en/operations/system_tables.md Co-Authored-By: BayoNet * Update docs/en/operations/system_tables.md Co-Authored-By: BayoNet * Update docs/en/operations/system_tables.md Co-Authored-By: BayoNet * "docs(system.settings): Table 'system.settings' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries' has been edited" * "docs(system.dictionaries): Table 'system.dictionaries has been edited" Co-authored-by: elenbaskakova Co-authored-by: BayoNet --- docs/en/operations/system_tables.md | 73 ++++++++++++---- .../dicts/external_dicts_dict_lifetime.md | 83 +++++++++++++++++++ docs/ru/operations/system_tables.md | 71 ++++++++++++---- 3 files changed, 195 insertions(+), 32 deletions(-) create mode 100644 docs/en/query_language/dicts/external_dicts_dict_lifetime.md diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 8d0e4b74b86..fd6c70fb076 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -147,27 +147,68 @@ This system table is used for implementing the `SHOW DATABASES` query. Contains information about detached parts of [MergeTree](../engines/table_engines/mergetree_family/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../query_language/query_language/alter/#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). -## system.dictionaries {#system-dictionaries} +## system.dictionaries {#system_tables-dictionaries} -Contains information about external dictionaries. +Contains information about [external dictionaries](../query_language/dicts/external_dicts.md). Columns: -- `name` (String) — Dictionary name. -- `type` (String) — Dictionary type: Flat, Hashed, Cache. -- `origin` (String) — Path to the configuration file that describes the dictionary. -- `attribute.names` (Array(String)) — Array of attribute names provided by the dictionary. -- `attribute.types` (Array(String)) — Corresponding array of attribute types that are provided by the dictionary. -- `has_hierarchy` (UInt8) — Whether the dictionary is hierarchical. -- `bytes_allocated` (UInt64) — The amount of RAM the dictionary uses. -- `hit_rate` (Float64) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count` (UInt64) — The number of items stored in the dictionary. -- `load_factor` (Float64) — The percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `creation_time` (DateTime) — The time when the dictionary was created or last successfully reloaded. -- `last_exception` (String) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. -- `source` (String) — Text describing the data source for the dictionary. +- `database` ([String](../data_types/string.md)) — Database name where the dictionary is located. Only for dictionaries created by DDL query, for others is always an empty string. +- `name` ([String](../data_types/string.md)) — [Dictionary name](../query_language/dicts/external_dicts_dict.md). +- `status` ([Enum8](../data_types/enum.md)) — Dictionary status. Possible values: + - `NOT_LOADED` — Dictionary was not loaded because it was not used. + - `LOADED` — Dictionary loaded successfully. + - `FAILED` — Unable to load the dictionary as a result of an error. + - `LOADING` — Dictionary is loading now. + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../query_language/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). + - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. +- `origin` ([String](../data_types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([String](../data_types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../query_language/dicts/external_dicts_dict_layout.md). +- `key` — [Key type](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../data_types/int_uint.md#uint-ranges)) or Сomposite key ([String](../data_types/string.md)) — form "(type 1, type 2, ..., type n)". +- `attribute.names` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Array of [attribute names](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes) provided by the dictionary. +- `attribute.types` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Corresponding array of [attribute types](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes) that are provided by the dictionary. +- `bytes_allocated` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../data_types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `element_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../data_types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([String](../data_types/string.md)) — Text describing the [data source](../query_language/dicts/external_dicts_dict_sources.md) for the dictionary. +- `lifetime_min` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Minimum [lifetime](../query_language/dicts/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `lifetime_max` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Maximum [lifetime](../query_language/dicts/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `loading_start_time` ([DateTime](../data_types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([DateTime](../data_types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. +- `loading_duration` ([Float32](../data_types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([String](../data_types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. -Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. + +**Example** + +Configure the dictionary. + +```sql +CREATE DICTIONARY dictdb.dict +( + `key` Int64 DEFAULT -1, + `value_default` String DEFAULT 'world', + `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT()) +``` + +Make sure that the dictionary is loaded. + +```sql +SELECT * FROM system.dictionaries +``` + +```text +┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ +│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ +└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ +``` ## system.events {#system_tables-events} diff --git a/docs/en/query_language/dicts/external_dicts_dict_lifetime.md b/docs/en/query_language/dicts/external_dicts_dict_lifetime.md new file mode 100644 index 00000000000..7f7fb08b0ef --- /dev/null +++ b/docs/en/query_language/dicts/external_dicts_dict_lifetime.md @@ -0,0 +1,83 @@ + +# Dictionary Updates + +ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `` tag in seconds. + +Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. + +Example of settings: + +```xml + + ... + 300 + ... + +``` + +```sql +CREATE DICTIONARY (...) +... +LIFETIME(300) +... +``` + +Setting `0` (`LIFETIME(0)`) prevents dictionaries from updating. + +You can set a time interval for upgrades, and ClickHouse will choose a uniformly random time within this range. This is necessary in order to distribute the load on the dictionary source when upgrading on a large number of servers. + +Example of settings: + +```xml + + ... + + 300 + 360 + + ... + +``` + +or + +```sql +LIFETIME(MIN 300 MAX 360) +``` +If `0` and `0`, ClickHouse does not reload the dictionary by timeout. +In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. + +When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [ source](external_dicts_dict_sources.md): + +- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. +- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. +- Dictionaries from other sources are updated every time by default. + +For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: + +- The dictionary table must have a field that always changes when the source data is updated. +- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](external_dicts_dict_sources.md). + +Example of settings: + +```xml + + ... + + ... + SELECT update_time FROM dictionary_source where id = 1 + + ... + +``` + +or + +```sql +... +SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1')) +... +``` + + +[Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_lifetime/) diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index fc4ed0446e6..89d922793ec 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -135,27 +135,66 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova' Такие куски могут быть присоединены с помощью [ALTER TABLE ATTACH PARTITION\|PART](../query_language/query_language/alter/#alter_attach-partition). Остальные столбцы описаны в [system.parts](#system_tables-parts). Если имя куска некорректно, значения некоторых столбцов могут быть `NULL`. Такие куски могут быть удалены с помощью [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). -## system.dictionaries {#system-dictionaries} +## system.dictionaries {#system_tables-dictionaries} -Содержит информацию о внешних словарях. +Содержит информацию о [внешних словарях](../query_language/dicts/external_dicts.md). Столбцы: -- `name String` — Имя словаря. -- `type String` — Тип словаря: Flat, Hashed, Cache. -- `origin String` — Путь к конфигурационному файлу, в котором описан словарь. -- `attribute.names Array(String)` — Массив имён атрибутов, предоставляемых словарём. -- `attribute.types Array(String)` — Соответствующий массив типов атрибутов, предоставляемых словарём. -- `has_hierarchy UInt8` — Является ли словарь иерархическим. -- `bytes_allocated UInt64` — Количество оперативной памяти, которое использует словарь. -- `hit_rate Float64` — Для cache-словарей - доля использований, для которых значение было в кэше. -- `element_count UInt64` — Количество хранящихся в словаре элементов. -- `load_factor Float64` — Доля заполненности словаря (для hashed словаря - доля заполнения хэш-таблицы). -- `creation_time DateTime` — Время создания или последней успешной перезагрузки словаря. -- `last_exception String` — Текст ошибки, возникшей при создании или перезагрузке словаря, если словарь не удалось создать. -- `source String` - Текст, описывающий источник данных для словаря. +- `database` ([String](../data_types/string.md)) — Имя базы данных, в которой находится словарь. Только для словарей, созданных с помощью DDL-запроса, для остальных — всегда пустая строка. +- `name` ([String](../data_types/string.md)) — [Имя словаря](../query_language/dicts/external_dicts_dict.md). +- `status` ([Enum8](../data_types/enum.md)) — Статус словаря. Возможные значения: + - `NOT_LOADED` — Словарь не загружен, потому что не использовался. + - `LOADED` — Словарь загружен успешно. + - `FAILED` — Словарь не загружен в результате ошибки. + - `LOADING` — Словарь в процессе загрузки. + - `LOADED_AND_RELOADING` — Словарь загружен успешно, сейчас перезагружается (частые причины: запрос [SYSTEM RELOAD DICTIONARY](../query_language/system.md#query_language-system-reload-dictionary), таймаут, изменение настроек словаря). + - `FAILED_AND_RELOADING` — Словарь не загружен в результате ошибки, сейчас перезагружается. +- `origin` ([String](../data_types/string.md)) — Путь к конфигурационному файлу, описывающему словарь. +- `type` ([String](../data_types/string.md)) — Тип размещения словаря. [Хранение словарей в памяти](../query_language/dicts/external_dicts_dict_layout.md). +- `key` — [Тип ключа](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-key): Числовой ключ ([UInt64](../data_types/int_uint.md#uint-ranges)) или Составной ключ ([String](../data_types/string.md)) — строка вида "(тип 1, тип 2, ..., тип n)". +- `attribute.names` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Массив [имен атрибутов](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `attribute.types` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Соответствующий массив [типов атрибутов](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `bytes_allocated` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Объем оперативной памяти, используемый словарем. +- `query_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Количество запросов с момента загрузки словаря или с момента последней успешной перезагрузки. +- `hit_rate` ([Float64](../data_types/float.md)) — Для cache-словарей — процент закэшированных значений. +- `element_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Количество элементов, хранящихся в словаре. +- `load_factor` ([Float64](../data_types/float.md)) — Процент заполнения словаря (для хэшированного словаря — процент заполнения хэш-таблицы). +- `source` ([String](../data_types/string.md)) — Текст, описывающий [источник данных](../query_language/dicts/external_dicts_dict_sources.md) для словаря. +- `lifetime_min` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Минимальное [время обновления](../query_language/dicts/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `lifetime_max` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Максимальное [время обновления](../query_language/dicts/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `loading_start_time` ([DateTime](../data_types/datetime.md)) — Время начала загрузки словаря. +- `loading_duration` ([Float32](../data_types/float.md)) — Время, затраченное на загрузку словаря. +- `last_exception` ([String](../data_types/string.md)) — Текст ошибки, возникающей при создании или перезагрузке словаря, если словарь не удалось создать. -Заметим, что количество оперативной памяти, которое использует словарь, не является пропорциональным количеству элементов, хранящихся в словаре. Так, для flat и cached словарей, все ячейки памяти выделяются заранее, независимо от реальной заполненности словаря. +**Пример** + +Настройте словарь. + +```sql +CREATE DICTIONARY dictdb.dict +( + `key` Int64 DEFAULT -1, + `value_default` String DEFAULT 'world', + `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT()) +``` + +Убедитесь, что словарь загружен. + +```sql +SELECT * FROM system.dictionaries +``` + +```text +┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ +│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ +└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ +``` ## system.events {#system_tables-events} From 1aeacfb071fdd8c2c304cce5bdb07df8448b7c68 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Wed, 8 Apr 2020 16:30:02 +0300 Subject: [PATCH 109/752] Updated fit new structure of en docs. --- .../dicts/external_dicts_dict_lifetime.md | 83 ------------------- .../external_dicts_dict_lifetime.md | 5 ++ 2 files changed, 5 insertions(+), 83 deletions(-) delete mode 100644 docs/en/query_language/dicts/external_dicts_dict_lifetime.md diff --git a/docs/en/query_language/dicts/external_dicts_dict_lifetime.md b/docs/en/query_language/dicts/external_dicts_dict_lifetime.md deleted file mode 100644 index 7f7fb08b0ef..00000000000 --- a/docs/en/query_language/dicts/external_dicts_dict_lifetime.md +++ /dev/null @@ -1,83 +0,0 @@ - -# Dictionary Updates - -ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `` tag in seconds. - -Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -Example of settings: - -```xml - - ... - 300 - ... - -``` - -```sql -CREATE DICTIONARY (...) -... -LIFETIME(300) -... -``` - -Setting `0` (`LIFETIME(0)`) prevents dictionaries from updating. - -You can set a time interval for upgrades, and ClickHouse will choose a uniformly random time within this range. This is necessary in order to distribute the load on the dictionary source when upgrading on a large number of servers. - -Example of settings: - -```xml - - ... - - 300 - 360 - - ... - -``` - -or - -```sql -LIFETIME(MIN 300 MAX 360) -``` -If `0` and `0`, ClickHouse does not reload the dictionary by timeout. -In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. - -When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [ source](external_dicts_dict_sources.md): - -- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. -- Dictionaries from other sources are updated every time by default. - -For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: - -- The dictionary table must have a field that always changes when the source data is updated. -- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](external_dicts_dict_sources.md). - -Example of settings: - -```xml - - ... - - ... - SELECT update_time FROM dictionary_source where id = 1 - - ... - -``` - -or - -```sql -... -SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1')) -... -``` - - -[Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_lifetime/) diff --git a/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md b/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md index 645c6347f66..97d5b6e4474 100644 --- a/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md +++ b/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md @@ -49,6 +49,11 @@ or LIFETIME(MIN 300 MAX 360) ``` +If `0` and `0`, ClickHouse does not reload the dictionary by timeout. +In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. + +When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external_dicts_dict_sources.md): + When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external_dicts_dict_sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. From b06801a2c098ca734d51566c6d7748b44d8142d1 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Wed, 8 Apr 2020 18:09:40 +0300 Subject: [PATCH 110/752] CLICKHOUSEDOCS-475: Links fixed. --- docs/en/operations/system_tables.md | 44 ++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index b8971108eba..60d13b939fb 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -149,36 +149,36 @@ Contains information about detached parts of [MergeTree](../engines/table_engine ## system.dictionaries {#system_tables-dictionaries} -Contains information about [external dictionaries](../query_language/dicts/external_dicts.md). +Contains information about [external dictionaries](../sql_reference/dictionaries/external_dictionaries/external_dicts.md). Columns: -- `database` ([String](../data_types/string.md)) — Database name where the dictionary is located. Only for dictionaries created by DDL query, for others is always an empty string. -- `name` ([String](../data_types/string.md)) — [Dictionary name](../query_language/dicts/external_dicts_dict.md). -- `status` ([Enum8](../data_types/enum.md)) — Dictionary status. Possible values: +- `database` ([String](../sql_reference/data_types/string.md)) — Database name where the dictionary is located. Only for dictionaries created by DDL query, for others is always an empty string. +- `name` ([String](../sql_reference/data_types/string.md)) — [Dictionary name](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md). +- `status` ([Enum8](../sql_reference/data_types/enum.md)) — Dictionary status. Possible values: - `NOT_LOADED` — Dictionary was not loaded because it was not used. - `LOADED` — Dictionary loaded successfully. - `FAILED` — Unable to load the dictionary as a result of an error. - `LOADING` — Dictionary is loading now. - - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../query_language/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../sql_reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. -- `origin` ([String](../data_types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([String](../data_types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../query_language/dicts/external_dicts_dict_layout.md). -- `key` — [Key type](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../data_types/int_uint.md#uint-ranges)) or Сomposite key ([String](../data_types/string.md)) — form "(type 1, type 2, ..., type n)". -- `attribute.names` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Array of [attribute names](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes) provided by the dictionary. -- `attribute.types` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Corresponding array of [attribute types](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes) that are provided by the dictionary. -- `bytes_allocated` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. -- `query_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. -- `hit_rate` ([Float64](../data_types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Number of items stored in the dictionary. -- `load_factor` ([Float64](../data_types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `source` ([String](../data_types/string.md)) — Text describing the [data source](../query_language/dicts/external_dicts_dict_sources.md) for the dictionary. -- `lifetime_min` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Minimum [lifetime](../query_language/dicts/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `lifetime_max` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Maximum [lifetime](../query_language/dicts/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `loading_start_time` ([DateTime](../data_types/datetime.md)) — Start time for loading the dictionary. -- `last_successful_update_time` ([DateTime](../data_types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. -- `loading_duration` ([Float32](../data_types/float.md)) — Duration of a dictionary loading. -- `last_exception` ([String](../data_types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. +- `origin` ([String](../sql_reference/data_types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([String](../sql_reference/data_types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md). +- `key` — [Key type](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) or Сomposite key ([String](../sql_reference/data_types/string.md)) — form "(type 1, type 2, ..., type n)". +- `attribute.names` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Array of [attribute names](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes) provided by the dictionary. +- `attribute.types` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Corresponding array of [attribute types](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes) that are provided by the dictionary. +- `bytes_allocated` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../sql_reference/data_types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `element_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../sql_reference/data_types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([String](../sql_reference/data_types/string.md)) — Text describing the [data source](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) for the dictionary. +- `lifetime_min` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Minimum [lifetime](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `lifetime_max` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Maximum [lifetime](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `loading_start_time` ([DateTime](../sql_reference/data_types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([DateTime](../sql_reference/data_types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. +- `loading_duration` ([Float32](../sql_reference/data_types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([String](../sql_reference/data_types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. **Example** From c8c4dc8104a0b9c0e59457885b0dfc6c5a09994f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 8 Apr 2020 19:20:52 +0300 Subject: [PATCH 111/752] fix 'ALTER MODIFY COLUMN' with compact parts --- src/Storages/MergeTree/IMergeTreeReader.cpp | 24 ++++++++++--- src/Storages/MergeTree/IMergeTreeReader.h | 11 ++++-- .../MergeTree/MergeTreeReaderCompact.cpp | 20 ++--------- .../MergeTree/MergeTreeReaderWide.cpp | 36 ++----------------- 4 files changed, 34 insertions(+), 57 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 4978aeaaa58..8243983d837 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -32,6 +32,8 @@ IMergeTreeReader::IMergeTreeReader(const MergeTreeData::DataPartPtr & data_part_ , all_mark_ranges(all_mark_ranges_) , alter_conversions(storage.getAlterConversionsForPart(data_part)) { + for (const NameAndTypePair & column_from_part : data_part->getColumns()) + columns_from_part[column_from_part.name] = column_from_part.type; } IMergeTreeReader::~IMergeTreeReader() = default; @@ -183,6 +185,23 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns } } +NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & required_column) const +{ + auto it = columns_from_part.find(required_column.name); + if (it != columns_from_part.end()) + return {it->first, it->second}; + + if (alter_conversions.isColumnRenamed(required_column.name)) + { + String old_name = alter_conversions.getColumnOldName(required_column.name); + it = columns_from_part.find(old_name); + if (it != columns_from_part.end()) + return {it->first, it->second}; + } + + return required_column; +} + void IMergeTreeReader::performRequiredConversions(Columns & res_columns) { try @@ -209,10 +228,7 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) if (res_columns[pos] == nullptr) continue; - if (columns_from_part.count(name_and_type->name)) - copy_block.insert({res_columns[pos], columns_from_part[name_and_type->name], name_and_type->name}); - else - copy_block.insert({res_columns[pos], name_and_type->type, name_and_type->name}); + copy_block.insert({res_columns[pos], getColumnFromPart(*name_and_type).type, name_and_type->name}); } DB::performRequiredConversions(copy_block, columns, storage.global_context); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 622e11dae8b..02d8f67f9d0 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -4,7 +4,6 @@ #include #include - namespace DB { @@ -59,6 +58,9 @@ public: MergeTreeData::DataPartPtr data_part; protected: + /// Returns actual column type in part, which can differ from table metadata. + NameAndTypePair getColumnFromPart(const NameAndTypePair & required_column) const; + /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; /// Stores states for IDataType::deserializeBinaryBulk @@ -67,8 +69,6 @@ protected: /// Columns that are read. NamesAndTypesList columns; - std::unordered_map columns_from_part; - UncompressedCache * uncompressed_cache; MarkCache * mark_cache; @@ -78,8 +78,13 @@ protected: MarkRanges all_mark_ranges; friend class MergeTreeRangeReader::DelayedStream; + +private: /// Alter conversions, which must be applied on fly if required MergeTreeData::AlterConversions alter_conversions; + + /// Actual data type of columns in part + std::unordered_map columns_from_part; }; } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index e4f7275f4a5..a895149e12e 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -78,15 +78,9 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( auto name_and_type = columns.begin(); for (size_t i = 0; i < columns_num; ++i, ++name_and_type) { - const auto & [name, type] = *name_and_type; + const auto & [name, type] = getColumnFromPart(*name_and_type); auto position = data_part->getColumnPosition(name); - if (!position && alter_conversions.isColumnRenamed(name)) - { - String old_name = alter_conversions.getColumnOldName(name); - position = data_part->getColumnPosition(old_name); - } - if (!position && typeid_cast(type.get())) { /// If array of Nested column is missing in part, @@ -118,7 +112,7 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, bool append = res_columns[i] != nullptr; if (!append) - res_columns[i] = column_it->type->createColumn(); + res_columns[i] = getColumnFromPart(*column_it).type->createColumn(); mutable_columns[i] = res_columns[i]->assumeMutable(); } @@ -132,15 +126,7 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, if (!res_columns[pos]) continue; - auto [name, type] = *name_and_type; - - if (alter_conversions.isColumnRenamed(name)) - { - String old_name = alter_conversions.getColumnOldName(name); - if (!data_part->getColumnPosition(name) && data_part->getColumnPosition(old_name)) - name = old_name; - } - + auto [name, type] = getColumnFromPart(*name_and_type); auto & column = mutable_columns[pos]; try diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index ad676b4db03..1a03acb5758 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -41,28 +41,10 @@ MergeTreeReaderWide::MergeTreeReaderWide( { try { - for (const NameAndTypePair & column_from_part : data_part->getColumns()) - columns_from_part[column_from_part.name] = column_from_part.type; - for (const NameAndTypePair & column : columns) { - if (columns_from_part.count(column.name)) - { - addStreams(column.name, *columns_from_part[column.name], profile_callback_, clock_type_); - } - else - { - if (alter_conversions.isColumnRenamed(column.name)) - { - String old_name = alter_conversions.getColumnOldName(column.name); - if (columns_from_part.count(old_name)) - addStreams(old_name, *columns_from_part[old_name], profile_callback_, clock_type_); - } - else - { - addStreams(column.name, *column.type, profile_callback_, clock_type_); - } - } + auto column_from_part = getColumnFromPart(column); + addStreams(column_from_part.name, *column_from_part.type, profile_callback_, clock_type_); } } catch (...) @@ -93,19 +75,7 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si auto name_and_type = columns.begin(); for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) { - String name = name_and_type->name; - if (alter_conversions.isColumnRenamed(name)) - { - String original_name = alter_conversions.getColumnOldName(name); - if (!columns_from_part.count(name) && columns_from_part.count(original_name)) - name = original_name; - } - - DataTypePtr type; - if (columns_from_part.count(name)) - type = columns_from_part[name]; - else - type = name_and_type->type; + auto [name, type] = getColumnFromPart(*name_and_type); /// The column is already present in the block so we will append the values to the end. bool append = res_columns[pos] != nullptr; From 1e3ec9113a77d44772336050b6fe8fd7d41b43c0 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Wed, 8 Apr 2020 19:29:08 +0300 Subject: [PATCH 112/752] Fix compiler possible values --- docker/packager/packager | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/packager b/docker/packager/packager index 360a358c6e5..10b4c7e901c 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -214,7 +214,7 @@ if __name__ == "__main__": parser.add_argument("--clickhouse-repo-path", default="../../") parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-8", "clang-8-darwin", "clang-8-aarch64", "gcc-8", "gcc-9", "clang-9"), default="gcc-8") + parser.add_argument("--compiler", choices=("clang-8", "clang-8-darwin", "clang-9-aarch64", "clang-9-freebsd", "gcc-8", "gcc-9", "clang-9"), default="gcc-8") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--unbundled", action="store_true") parser.add_argument("--split-binary", action="store_true") From ae2a05d4143fa23586f684f129e58178b5b91160 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 8 Apr 2020 19:33:57 +0300 Subject: [PATCH 113/752] add test with 'ALTER MODIFY' and compact parts --- .../01114_alter_modify_compact_parts.reference | 1 + .../0_stateless/01114_alter_modify_compact_parts.sql | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/01114_alter_modify_compact_parts.reference create mode 100644 tests/queries/0_stateless/01114_alter_modify_compact_parts.sql diff --git a/tests/queries/0_stateless/01114_alter_modify_compact_parts.reference b/tests/queries/0_stateless/01114_alter_modify_compact_parts.reference new file mode 100644 index 00000000000..4ec38dfb475 --- /dev/null +++ b/tests/queries/0_stateless/01114_alter_modify_compact_parts.reference @@ -0,0 +1 @@ +999000 diff --git a/tests/queries/0_stateless/01114_alter_modify_compact_parts.sql b/tests/queries/0_stateless/01114_alter_modify_compact_parts.sql new file mode 100644 index 00000000000..a5aa12548e7 --- /dev/null +++ b/tests/queries/0_stateless/01114_alter_modify_compact_parts.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS mt_compact; + +CREATE TABLE mt_compact (d Date, id UInt32, s String) + ENGINE = MergeTree ORDER BY id PARTITION BY d + SETTINGS min_bytes_for_wide_part = 10000000, index_granularity = 128; + +INSERT INTO mt_compact SELECT toDate('2020-01-05'), number, toString(number) FROM numbers(1000); +INSERT INTO mt_compact SELECT toDate('2020-01-06'), number, toString(number) FROM numbers(1000); +ALTER TABLE mt_compact MODIFY COLUMN s UInt64; +SELECT sum(s) from mt_compact; + +DROP TABLE IF EXISTS mt_compact; From 9ed708b9027d47b7d0bb8326cdf54dce36afebd7 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 7 Apr 2020 13:14:49 +0800 Subject: [PATCH 114/752] ISSUES-10056 add some check and support identifier argument for MySQL Database Engine --- src/Databases/DatabaseFactory.cpp | 34 ++++++++++++++++--- .../test_mysql_database_engine/test.py | 7 +++- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 40e5682565d..b6300ab3482 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,28 @@ DatabasePtr DatabaseFactory::get( } } +template +static inline ValueType getLiteralValue(const ASTPtr & ast, const String & engine_name) +{ + if (!ast || !ast->as()) + throw Exception("Database engine " + engine_name + " requested literal argument.", ErrorCodes::BAD_ARGUMENTS); + + return ast->as()->value.safeGet(); +} + +[[maybe_unused]] static inline String getIdentifierOrStringLiteral(const ASTPtr & ast, const String & engine_name) +{ + if (ast) + { + if (const auto & literal = ast->as()) + return literal->value.safeGet(); + else if (const auto & identifier = ast->as()) + return identifier->name; + } + + throw Exception("Database engine " + engine_name + " requested literal or identifier argument.", ErrorCodes::BAD_ARGUMENTS); +} + DatabasePtr DatabaseFactory::getImpl( const String & database_name, const String & metadata_path, const ASTStorage * engine_define, Context & context) { @@ -79,11 +102,12 @@ DatabasePtr DatabaseFactory::getImpl( throw Exception("MySQL Database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.", ErrorCodes::BAD_ARGUMENTS); + const auto & arguments = engine->arguments->children; - const auto & host_name_and_port = arguments[0]->as()->value.safeGet(); - const auto & database_name_in_mysql = arguments[1]->as()->value.safeGet(); - const auto & mysql_user_name = arguments[2]->as()->value.safeGet(); - const auto & mysql_user_password = arguments[3]->as()->value.safeGet(); + const auto & host_name_and_port = getLiteralValue(arguments[0], "MySQL"); + const auto & database_name_in_mysql = getIdentifierOrStringLiteral(arguments[1], "MySQL"); + const auto & mysql_user_name = getLiteralValue(arguments[2], "MySQL"); + const auto & mysql_user_password = getLiteralValue(arguments[3], "MySQL"); try { @@ -114,7 +138,7 @@ DatabasePtr DatabaseFactory::getImpl( const auto & arguments = engine->arguments->children; - const auto cache_expiration_time_seconds = arguments[0]->as()->value.safeGet(); + const auto cache_expiration_time_seconds = getLiteralValue(arguments[0], "Lazy"); return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context); } diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 86e0b9df5fd..42663e46752 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -92,7 +92,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster): with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', port=3308)) as mysql_node: mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") mysql_node.query('CREATE TABLE `test_database`.`test_table` ( `i``d` int(11) NOT NULL, PRIMARY KEY (`i``d`)) ENGINE=InnoDB;') - clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', 'test_database', 'root', 'clickhouse')") + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', test_database, 'root', 'clickhouse')") assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '0' clickhouse_node.query("INSERT INTO `test_database`.`test_table`(`i\`d`) select number from numbers(10000)") @@ -120,3 +120,8 @@ def test_clickhouse_join_for_mysql_database(started_cluster): "LEFT JOIN default.t1_remote_mysql AS s_ref " "ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' mysql_node.query("DROP DATABASE test") + +def test_bad_arguments_for_mysql_database_engine(started_cluster): + assert clickhouse_node.query( + "CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306', 'test', 't1_mysql_local', root, 'clickhouse')").find( + 'Database engine MySQL requested literal argument.') != -1 From aa0fcf40886f06cd66711071d734198019895348 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 7 Apr 2020 20:25:01 +0800 Subject: [PATCH 115/752] ISSUES-10056 update docs --- docs/en/engines/database_engines/mysql.md | 4 ++-- docs/ru/database_engines/mysql.md | 4 +--- docs/zh/engines/database_engines/mysql.md | 4 +--- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/en/engines/database_engines/mysql.md b/docs/en/engines/database_engines/mysql.md index 678c174e1fb..467a3aa032d 100644 --- a/docs/en/engines/database_engines/mysql.md +++ b/docs/en/engines/database_engines/mysql.md @@ -3,7 +3,7 @@ toc_priority: 30 toc_title: MySQL --- -# Mysql {#mysql} +# MySQL {#mysql} Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL. @@ -19,7 +19,7 @@ You cannot perform the following queries: ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') +ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` **Engine Parameters** diff --git a/docs/ru/database_engines/mysql.md b/docs/ru/database_engines/mysql.md index 420ca370297..45547407be6 100644 --- a/docs/ru/database_engines/mysql.md +++ b/docs/ru/database_engines/mysql.md @@ -6,8 +6,6 @@ Не поддерживаемые виды запросов: -- `ATTACH`/`DETACH` -- `DROP` - `RENAME` - `CREATE TABLE` - `ALTER` @@ -16,7 +14,7 @@ ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') +ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` **Параметры движка** diff --git a/docs/zh/engines/database_engines/mysql.md b/docs/zh/engines/database_engines/mysql.md index 78844154bce..80ff82ec2d3 100644 --- a/docs/zh/engines/database_engines/mysql.md +++ b/docs/zh/engines/database_engines/mysql.md @@ -7,8 +7,6 @@ MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并 但您无法对其执行以下操作: -- `ATTACH`/`DETACH` -- `DROP` - `RENAME` - `CREATE TABLE` - `ALTER` @@ -17,7 +15,7 @@ MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并 ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') +ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` **MySQL数据库引擎参数** From 9eb96b87db4ae5b1fba90640aa65205ad1fc8379 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 8 Apr 2020 13:41:11 +0800 Subject: [PATCH 116/752] ISSUES-10056 reused evaluateConstantExpressionOrIdentifierAsLiteral --- src/Databases/DatabaseFactory.cpp | 30 +++++++------------ .../test_mysql_database_engine/test.py | 18 +++++++---- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index b6300ab3482..f1cea04dc29 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -16,6 +16,7 @@ #if USE_MYSQL #include +#include #endif @@ -51,7 +52,7 @@ DatabasePtr DatabaseFactory::get( } template -static inline ValueType getLiteralValue(const ASTPtr & ast, const String & engine_name) +static inline ValueType safeGetLiteralValue(const ASTPtr &ast, const String &engine_name) { if (!ast || !ast->as()) throw Exception("Database engine " + engine_name + " requested literal argument.", ErrorCodes::BAD_ARGUMENTS); @@ -59,19 +60,6 @@ static inline ValueType getLiteralValue(const ASTPtr & ast, const String & engin return ast->as()->value.safeGet(); } -[[maybe_unused]] static inline String getIdentifierOrStringLiteral(const ASTPtr & ast, const String & engine_name) -{ - if (ast) - { - if (const auto & literal = ast->as()) - return literal->value.safeGet(); - else if (const auto & identifier = ast->as()) - return identifier->name; - } - - throw Exception("Database engine " + engine_name + " requested literal or identifier argument.", ErrorCodes::BAD_ARGUMENTS); -} - DatabasePtr DatabaseFactory::getImpl( const String & database_name, const String & metadata_path, const ASTStorage * engine_define, Context & context) { @@ -103,11 +91,13 @@ DatabasePtr DatabaseFactory::getImpl( ErrorCodes::BAD_ARGUMENTS); - const auto & arguments = engine->arguments->children; - const auto & host_name_and_port = getLiteralValue(arguments[0], "MySQL"); - const auto & database_name_in_mysql = getIdentifierOrStringLiteral(arguments[1], "MySQL"); - const auto & mysql_user_name = getLiteralValue(arguments[2], "MySQL"); - const auto & mysql_user_password = getLiteralValue(arguments[3], "MySQL"); + ASTs & arguments = engine->arguments->children; + arguments[1] = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[1], context); + + const auto & host_name_and_port = safeGetLiteralValue(arguments[0], "MySQL"); + const auto & database_name_in_mysql = safeGetLiteralValue(arguments[1], "MySQL"); + const auto & mysql_user_name = safeGetLiteralValue(arguments[2], "MySQL"); + const auto & mysql_user_password = safeGetLiteralValue(arguments[3], "MySQL"); try { @@ -138,7 +128,7 @@ DatabasePtr DatabaseFactory::getImpl( const auto & arguments = engine->arguments->children; - const auto cache_expiration_time_seconds = getLiteralValue(arguments[0], "Lazy"); + const auto cache_expiration_time_seconds = safeGetLiteralValue(arguments[0], "Lazy"); return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context); } diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 42663e46752..2791cc7b382 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -5,6 +5,7 @@ import pymysql.cursors import pytest from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_mysql=True) @@ -116,12 +117,17 @@ def test_clickhouse_join_for_mysql_database(started_cluster): clickhouse_node.query("CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306','test','t1_mysql_local','root','clickhouse')") clickhouse_node.query("CREATE TABLE default.t2_remote_mysql AS mysql('mysql1:3306','test','t2_mysql_local','root','clickhouse')") assert clickhouse_node.query("SELECT s.pays " - "FROM default.t1_remote_mysql AS s " - "LEFT JOIN default.t1_remote_mysql AS s_ref " - "ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' + "FROM default.t1_remote_mysql AS s " + "LEFT JOIN default.t1_remote_mysql AS s_ref " + "ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' mysql_node.query("DROP DATABASE test") + def test_bad_arguments_for_mysql_database_engine(started_cluster): - assert clickhouse_node.query( - "CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306', 'test', 't1_mysql_local', root, 'clickhouse')").find( - 'Database engine MySQL requested literal argument.') != -1 + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', port=3308)) as mysql_node: + with pytest.raises(QueryRuntimeException) as exception: + mysql_node.query("CREATE DATABASE IF NOT EXISTS test_bad_arguments DEFAULT CHARACTER SET 'utf8'") + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', test_bad_arguments, root, 'clickhouse')") + + assert 'Database engine MySQL requested literal argument.' in str(exception.value) + mysql_node.query("DROP DATABASE test_bad_arguments") From 3b4682d1791716510847a7ffba6e1a6179687250 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 8 Apr 2020 20:46:48 +0300 Subject: [PATCH 117/752] Updated 01232_preparing_sets_race_condition. --- .../01232_preparing_sets_race_condition.reference | 9 --------- .../0_stateless/01232_preparing_sets_race_condition.sh | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference b/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference index 2fc36ed5c97..45a4fb75db8 100644 --- a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference +++ b/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference @@ -1,10 +1 @@ 8 -8 -8 -8 -8 -8 -8 -8 -8 -8 diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh index 5f7b76c0e99..25a8cdb12ea 100755 --- a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh +++ b/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh @@ -18,7 +18,7 @@ echo " insert into tableB select number, number % 100000, addDays(toDate('2020-01-01'), number % 90) from numbers(50000000); " | $CLICKHOUSE_CLIENT -n -for i in {1..10}; do echo " +for i in {1..1}; do echo " SELECT tableName FROM ( From 824255f603abbf1363c6bfe2e9a078011f9131d8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 8 Apr 2020 11:18:35 +0300 Subject: [PATCH 118/752] Update tests for multiline SHOW CREATE The following has been used for this: git grep '^CREATE' tests/queries/**.reference | cut -d: -f1 | sort -u | xargs -n1 -i sh -c 'show-create-rewrite.py < {} | sponge {}' show-create-rewrite.py is available here: https://gist.github.com/azat/916b98b5ddf9573f7dc9a4dce33b59b5 And for 00998_constraints_all_tables test FORMAT TSVRaw I simply drop. --- .../00061_merge_tree_alter.reference | 20 ++++---- ...cated_merge_tree_alter_zookeeper.reference | 48 +++++++++---------- ...00564_temporary_table_management.reference | 2 +- .../00599_create_view_with_subquery.reference | 2 +- .../00604_show_create_database.reference | 2 +- .../queries/0_stateless/00642_cast.reference | 9 +++- .../00643_cast_zookeeper.reference | 9 +++- .../00725_comment_columns.reference | 12 ++--- .../00725_ipv4_ipv6_domains.reference | 4 +- ...51_default_databasename_for_view.reference | 27 ++++++++++- .../00753_comment_columns_zookeeper.reference | 4 +- ...4_alter_modify_column_partitions.reference | 4 +- .../00754_alter_modify_order_by.reference | 2 +- ...fy_order_by_replicated_zookeeper.reference | 4 +- ...4_test_custom_compression_codecs.reference | 4 +- ...m_compression_codes_log_storages.reference | 8 ++-- .../0_stateless/00836_indices_alter.reference | 10 ++-- ...dices_alter_replicated_zookeeper.reference | 24 +++++----- ...om_compression_codecs_replicated.reference | 2 +- .../00916_create_or_replace_view.reference | 4 +- .../0_stateless/00933_alter_ttl.reference | 2 +- .../00933_ttl_replicated_zookeeper.reference | 2 +- .../0_stateless/00933_ttl_simple.reference | 8 ++-- .../00980_merge_alter_settings.reference | 10 ++-- ...keeper_merge_tree_alter_settings.reference | 12 ++--- .../00998_constraints_all_tables.reference | 4 +- .../00998_constraints_all_tables.sql | 4 +- .../01018_ddl_dictionaries_create.reference | 2 +- ...age_odbc_parsing_exception_check.reference | 2 +- .../01055_compact_parts_1.reference | 4 +- .../01069_database_memory.reference | 4 +- .../01070_alter_with_ttl.reference | 4 +- .../01079_alter_default_zookeeper.reference | 16 +++---- .../01079_bad_alters_zookeeper.reference | 4 +- ..._expressions_in_engine_arguments.reference | 14 +++--- ...tionary_layout_without_arguments.reference | 2 +- ...13_alter_rename_column_zookeeper.reference | 4 +- .../01213_alter_rename_nested.reference | 6 +-- ...er_rename_with_default_zookeeper.reference | 10 ++-- .../01213_alter_table_rename_nested.reference | 4 +- 40 files changed, 179 insertions(+), 140 deletions(-) diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.reference b/tests/queries/0_stateless/00061_merge_tree_alter.reference index 571affd7231..b609bc257f1 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.reference +++ b/tests/queries/0_stateless/00061_merge_tree_alter.reference @@ -1,14 +1,14 @@ d Date k UInt64 i32 Int32 -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 10 42 d Date k UInt64 i32 Int32 n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 8 40 [1,2,3] ['12','13','14'] 2015-01-01 10 42 [] [] d Date @@ -17,7 +17,7 @@ i32 Int32 n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] [] @@ -28,7 +28,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), `s` String DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date), \n `s` String DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 @@ -39,7 +39,7 @@ i32 Int32 n.ui8 Array(UInt8) n.s Array(String) s Int64 DEFAULT \'0\' -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `s` Int64 DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` Int64 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 @@ -51,7 +51,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 DEFAULT \'0\' n.d Array(Date) -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `s` UInt32 DEFAULT \'0\', `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\', \n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -65,7 +65,7 @@ k UInt64 i32 Int32 n.s Array(String) s UInt32 DEFAULT \'0\' -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `n.s` Array(String), `s` UInt32 DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 ['120','130','140'] 0 2015-01-01 8 40 ['12','13','14'] 0 @@ -74,7 +74,7 @@ d Date k UInt64 i32 Int32 s UInt32 DEFAULT \'0\' -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `s` UInt32 DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 @@ -85,7 +85,7 @@ i32 Int32 s UInt32 DEFAULT \'0\' n.s Array(String) n.d Array(Date) -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `s` UInt32 DEFAULT \'0\', `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `s` UInt32 DEFAULT \'0\', \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 [] [] 2015-01-01 7 39 0 [] [] 2015-01-01 8 40 0 [] [] @@ -94,7 +94,7 @@ d Date k UInt64 i32 Int32 s UInt32 DEFAULT \'0\' -CREATE TABLE default.alter_00061 (`d` Date, `k` UInt64, `i32` Int32, `s` UInt32 DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 diff --git a/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference b/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference index 6f2eb080286..fa5e65d2d60 100644 --- a/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference +++ b/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference @@ -1,22 +1,22 @@ d Date k UInt64 i32 Int32 -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 10 42 d Date k UInt64 i32 Int32 dt DateTime -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 9 41 1992-01-01 08:00:00 2015-01-01 10 42 0000-00-00 00:00:00 d Date @@ -25,14 +25,14 @@ i32 Int32 dt DateTime n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] 2015-01-01 10 42 0000-00-00 00:00:00 [] [] @@ -43,7 +43,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -51,7 +51,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] [] @@ -64,7 +64,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), `s` String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date), \n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -73,7 +73,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), `s` String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date), \n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 @@ -86,7 +86,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) s Int64 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` Int64 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -94,7 +94,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) s Int64 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` Int64 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 @@ -108,7 +108,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 DEFAULT \'0\' n.d Array(Date) -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` UInt32 DEFAULT \'0\', `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\', \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -117,7 +117,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 DEFAULT \'0\' n.d Array(Date) -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` UInt32 DEFAULT \'0\', `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\', \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -129,14 +129,14 @@ i32 Int32 dt DateTime n.s Array(String) s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.s` Array(String), `s` UInt32 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime n.s Array(String) s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.s` Array(String), `s` UInt32 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 ['12','13','14'] 0 @@ -147,13 +147,13 @@ k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 @@ -166,7 +166,7 @@ dt DateTime s UInt32 DEFAULT \'0\' n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32 DEFAULT \'0\', `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\', \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -174,7 +174,7 @@ dt DateTime s UInt32 DEFAULT \'0\' n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32 DEFAULT \'0\', `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\', \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 [] [] 2015-01-01 7 39 2014-07-14 13:26:50 0 [] [] 2015-01-01 8 40 2012-12-12 12:12:12 0 [] [] @@ -185,13 +185,13 @@ k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32 DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 @@ -202,13 +202,13 @@ k UInt64 i32 Int32 dt Date s DateTime DEFAULT \'0000-00-00 00:00:00\' -CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` Date, `s` DateTime DEFAULT \'0000-00-00 00:00:00\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` Date, \n `s` DateTime DEFAULT \'0000-00-00 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt Date s DateTime DEFAULT \'0000-00-00 00:00:00\' -CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` Date, `s` DateTime DEFAULT \'0000-00-00 00:00:00\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` Date, \n `s` DateTime DEFAULT \'0000-00-00 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 1970-01-02 06:55:00 2015-01-01 7 39 2014-07-14 0000-00-00 00:00:00 2015-01-01 8 40 2012-12-12 0000-00-00 00:00:00 diff --git a/tests/queries/0_stateless/00564_temporary_table_management.reference b/tests/queries/0_stateless/00564_temporary_table_management.reference index edd17b9ea39..4cfb4230223 100644 --- a/tests/queries/0_stateless/00564_temporary_table_management.reference +++ b/tests/queries/0_stateless/00564_temporary_table_management.reference @@ -1,4 +1,4 @@ 1 -CREATE TEMPORARY TABLE temp_tab (`number` UInt64) ENGINE = Memory +CREATE TEMPORARY TABLE temp_tab\n(\n `number` UInt64\n)\nENGINE = Memory temp_tab 0 diff --git a/tests/queries/0_stateless/00599_create_view_with_subquery.reference b/tests/queries/0_stateless/00599_create_view_with_subquery.reference index 13e0f35b075..d83d2837a18 100644 --- a/tests/queries/0_stateless/00599_create_view_with_subquery.reference +++ b/tests/queries/0_stateless/00599_create_view_with_subquery.reference @@ -1 +1 @@ -CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599 WHERE id = (SELECT 1) +CREATE VIEW default.test_view_00599\n(\n `id` UInt64\n) AS\nSELECT *\nFROM default.test_00599\nWHERE id = \n(\n SELECT 1\n) diff --git a/tests/queries/0_stateless/00604_show_create_database.reference b/tests/queries/0_stateless/00604_show_create_database.reference index 1fe93a5e393..a9ad6abea25 100644 --- a/tests/queries/0_stateless/00604_show_create_database.reference +++ b/tests/queries/0_stateless/00604_show_create_database.reference @@ -1 +1 @@ -CREATE DATABASE test_00604 ENGINE = Ordinary +CREATE DATABASE test_00604\nENGINE = Ordinary diff --git a/tests/queries/0_stateless/00642_cast.reference b/tests/queries/0_stateless/00642_cast.reference index f75503efffe..907861c1784 100644 --- a/tests/queries/0_stateless/00642_cast.reference +++ b/tests/queries/0_stateless/00642_cast.reference @@ -7,7 +7,14 @@ hello hello hello 1970-01-01 00:00:01 -CREATE TABLE default.cast (`x` UInt8, `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = MergeTree ORDER BY e SETTINGS index_granularity = 8192 +CREATE TABLE default.cast +( + `x` UInt8, + `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') +) +ENGINE = MergeTree +ORDER BY e +SETTINGS index_granularity = 8192 x UInt8 e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.reference b/tests/queries/0_stateless/00643_cast_zookeeper.reference index 86a8b164844..b79eb07aee3 100644 --- a/tests/queries/0_stateless/00643_cast_zookeeper.reference +++ b/tests/queries/0_stateless/00643_cast_zookeeper.reference @@ -1,4 +1,11 @@ -CREATE TABLE test.cast1 (`x` UInt8, `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_cast', 'r1') ORDER BY e SETTINGS index_granularity = 8192 +CREATE TABLE test.cast1 +( + `x` UInt8, + `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_cast', 'r1') +ORDER BY e +SETTINGS index_granularity = 8192 x UInt8 e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/tests/queries/0_stateless/00725_comment_columns.reference b/tests/queries/0_stateless/00725_comment_columns.reference index 7204496753c..86794581daf 100644 --- a/tests/queries/0_stateless/00725_comment_columns.reference +++ b/tests/queries/0_stateless/00725_comment_columns.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.check_query_comment_column (`first_column` UInt8 DEFAULT 1 COMMENT \'comment 1\', `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2\', `third_column` UInt8 ALIAS second_column COMMENT \'comment 3\', `fourth_column` UInt8 COMMENT \'comment 4\', `fifth_column` UInt8) ENGINE = TinyLog +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1\', \n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2\', \n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3\', \n `fourth_column` UInt8 COMMENT \'comment 4\', \n `fifth_column` UInt8\n)\nENGINE = TinyLog first_column UInt8 DEFAULT 1 comment 1 second_column UInt8 MATERIALIZED first_column comment 2 third_column UInt8 ALIAS second_column comment 3 @@ -11,7 +11,7 @@ fifth_column UInt8 │ check_query_comment_column │ fourth_column │ comment 4 │ │ check_query_comment_column │ fifth_column │ │ └────────────────────────────┴───────────────┴───────────┘ -CREATE TABLE default.check_query_comment_column (`first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_1\', `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\', `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_1\', `fourth_column` UInt8 COMMENT \'comment 4_1\', `fifth_column` UInt8 COMMENT \'comment 5_1\') ENGINE = TinyLog +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_1\', \n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\', \n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_1\', \n `fourth_column` UInt8 COMMENT \'comment 4_1\', \n `fifth_column` UInt8 COMMENT \'comment 5_1\'\n)\nENGINE = TinyLog ┌─table──────────────────────┬─name──────────┬─comment─────┐ │ check_query_comment_column │ first_column │ comment 1_2 │ │ check_query_comment_column │ second_column │ comment 2_2 │ @@ -19,8 +19,8 @@ CREATE TABLE default.check_query_comment_column (`first_column` UInt8 DEFAULT 1 │ check_query_comment_column │ fourth_column │ comment 4_2 │ │ check_query_comment_column │ fifth_column │ comment 5_2 │ └────────────────────────────┴───────────────┴─────────────┘ -CREATE TABLE default.check_query_comment_column (`first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_2\', `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\', `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_2\', `fourth_column` UInt8 COMMENT \'comment 4_2\', `fifth_column` UInt8 COMMENT \'comment 5_2\') ENGINE = TinyLog -CREATE TABLE default.check_query_comment_column (`first_column` UInt8 COMMENT \'comment 1\', `second_column` UInt8 COMMENT \'comment 2\', `third_column` UInt8 COMMENT \'comment 3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_2\', \n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\', \n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_2\', \n `fourth_column` UInt8 COMMENT \'comment 4_2\', \n `fifth_column` UInt8 COMMENT \'comment 5_2\'\n)\nENGINE = TinyLog +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1\', \n `second_column` UInt8 COMMENT \'comment 2\', \n `third_column` UInt8 COMMENT \'comment 3\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 first_column UInt8 comment 1 second_column UInt8 comment 2 third_column UInt8 comment 3 @@ -29,8 +29,8 @@ third_column UInt8 comment 3 │ check_query_comment_column │ second_column │ comment 2 │ │ check_query_comment_column │ third_column │ comment 3 │ └────────────────────────────┴───────────────┴───────────┘ -CREATE TABLE default.check_query_comment_column (`first_column` UInt8 COMMENT \'comment 1_2\', `second_column` UInt8 COMMENT \'comment 2_2\', `third_column` UInt8 COMMENT \'comment 3_2\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 -CREATE TABLE default.check_query_comment_column (`first_column` UInt8 COMMENT \'comment 1_3\', `second_column` UInt8 COMMENT \'comment 2_3\', `third_column` UInt8 COMMENT \'comment 3_3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1_2\', \n `second_column` UInt8 COMMENT \'comment 2_2\', \n `third_column` UInt8 COMMENT \'comment 3_2\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1_3\', \n `second_column` UInt8 COMMENT \'comment 2_3\', \n `third_column` UInt8 COMMENT \'comment 3_3\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 ┌─table──────────────────────┬─name──────────┬─comment─────┐ │ check_query_comment_column │ first_column │ comment 1_3 │ │ check_query_comment_column │ second_column │ comment 2_3 │ diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference index a31b4bd7308..28051d15f65 100644 --- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference +++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ipv4_test (`ipv4_` IPv4) ENGINE = Memory +CREATE TABLE default.ipv4_test\n(\n `ipv4_` IPv4\n)\nENGINE = Memory 0.0.0.0 00 8.8.8.8 08080808 127.0.0.1 7F000001 @@ -10,7 +10,7 @@ CREATE TABLE default.ipv4_test (`ipv4_` IPv4) ENGINE = Memory > 127.0.0.1 255.255.255.255 = 127.0.0.1 127.0.0.1 euqality of IPv4-mapped IPv6 value and IPv4 promoted to IPv6 with function: 1 -CREATE TABLE default.ipv6_test (`ipv6_` IPv6) ENGINE = Memory +CREATE TABLE default.ipv6_test\n(\n `ipv6_` IPv6\n)\nENGINE = Memory :: 00000000000000000000000000000000 :: 00000000000000000000000000000000 ::ffff:8.8.8.8 00000000000000000000FFFF08080808 diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/tests/queries/0_stateless/00751_default_databasename_for_view.reference index 2873fcbee3b..5ba1861e3ef 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,29 @@ -CREATE MATERIALIZED VIEW test_00751.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test_00751.t_00751 WHERE (app = (SELECT min(app) FROM test_00751.u_00751)) AND (platform = (SELECT (SELECT min(platform) FROM test_00751.v_00751))) +CREATE MATERIALIZED VIEW test_00751.t_mv_00751 +( + `date` Date, + `platform` Enum8('a' = 0, 'b' = 1), + `app` Enum8('a' = 0, 'b' = 1) +) +ENGINE = MergeTree +ORDER BY date +SETTINGS index_granularity = 8192 AS +SELECT + date, + platform, + app +FROM test_00751.t_00751 +WHERE (app = +( + SELECT min(app) + FROM test_00751.u_00751 +)) AND (platform = +( + SELECT + ( + SELECT min(platform) + FROM test_00751.v_00751 + ) +)) 2000-01-01 a a 2000-01-02 b b 2000-01-03 a a diff --git a/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference b/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference index 8b1eeea8203..b5021d00f56 100644 --- a/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference +++ b/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference @@ -1,6 +1,6 @@ -CREATE TABLE test.check_comments (`column_name1` UInt8 DEFAULT 1 COMMENT \'comment\', `column_name2` UInt8 COMMENT \'non default comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 +CREATE TABLE test.check_comments\n(\n `column_name1` UInt8 DEFAULT 1 COMMENT \'comment\', \n `column_name2` UInt8 COMMENT \'non default comment\'\n)\nENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\')\nORDER BY column_name1\nSETTINGS index_granularity = 8192 column_name1 UInt8 DEFAULT 1 comment column_name2 UInt8 non default comment -CREATE TABLE test.check_comments (`column_name1` UInt8 DEFAULT 1 COMMENT \'another comment\', `column_name2` UInt8 COMMENT \'non default comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 +CREATE TABLE test.check_comments\n(\n `column_name1` UInt8 DEFAULT 1 COMMENT \'another comment\', \n `column_name2` UInt8 COMMENT \'non default comment\'\n)\nENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\')\nORDER BY column_name1\nSETTINGS index_granularity = 8192 column_name1 UInt8 DEFAULT 1 another comment column_name2 UInt8 non default comment diff --git a/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference b/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference index 93f15318634..a1493508b61 100644 --- a/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference +++ b/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference @@ -1,5 +1,5 @@ *** Check SHOW CREATE TABLE *** -CREATE TABLE default.alter_column (`x` UInt32, `y` Int32) ENGINE = MergeTree PARTITION BY x ORDER BY x SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_column\n(\n `x` UInt32, \n `y` Int32\n)\nENGINE = MergeTree\nPARTITION BY x\nORDER BY x\nSETTINGS index_granularity = 8192 *** Check parts *** 0 0 10 -10 @@ -52,7 +52,7 @@ CREATE TABLE default.alter_column (`x` UInt32, `y` Int32) ENGINE = MergeTree PAR 8 -8 9 -9 *** Check SHOW CREATE TABLE after ALTER MODIFY *** -CREATE TABLE default.alter_column (`x` UInt32, `y` Int64) ENGINE = MergeTree PARTITION BY x ORDER BY x SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_column\n(\n `x` UInt32, \n `y` Int64\n)\nENGINE = MergeTree\nPARTITION BY x\nORDER BY x\nSETTINGS index_granularity = 8192 *** Check parts after ALTER MODIFY *** 0 0 10 -10 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.reference b/tests/queries/0_stateless/00754_alter_modify_order_by.reference index 1bcdae884f8..f0dc413a186 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.reference @@ -9,4 +9,4 @@ 1 2 1 30 1 2 4 90 *** Check SHOW CREATE TABLE *** -CREATE TABLE default.summing (`x` UInt32, `y` UInt32, `z` UInt32, `val` UInt32) ENGINE = SummingMergeTree PRIMARY KEY (x, y) ORDER BY (x, y, -z) SETTINGS index_granularity = 8192 +CREATE TABLE default.summing\n(\n `x` UInt32, \n `y` UInt32, \n `z` UInt32, \n `val` UInt32\n)\nENGINE = SummingMergeTree\nPRIMARY KEY (x, y)\nORDER BY (x, y, -z)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference index ebe30941f3f..938a90a27b4 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference @@ -9,6 +9,6 @@ 1 2 1 30 1 2 4 90 *** Check SHOW CREATE TABLE *** -CREATE TABLE test.summing_r2 (`x` UInt32, `y` UInt32, `z` UInt32, `val` UInt32) ENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\') PRIMARY KEY (x, y) ORDER BY (x, y, -z) SETTINGS index_granularity = 8192 +CREATE TABLE test.summing_r2\n(\n `x` UInt32, \n `y` UInt32, \n `z` UInt32, \n `val` UInt32\n)\nENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\')\nPRIMARY KEY (x, y)\nORDER BY (x, y, -z)\nSETTINGS index_granularity = 8192 *** Check SHOW CREATE TABLE after offline ALTER *** -CREATE TABLE test.summing_r2 (`x` UInt32, `y` UInt32, `z` UInt32, `t` UInt32, `val` UInt32) ENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\') PRIMARY KEY (x, y) ORDER BY (x, y, t * t) SETTINGS index_granularity = 8192 +CREATE TABLE test.summing_r2\n(\n `x` UInt32, \n `y` UInt32, \n `z` UInt32, \n `t` UInt32, \n `val` UInt32\n)\nENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\')\nPRIMARY KEY (x, y)\nORDER BY (x, y, t * t)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference index 6da97ff6091..f778c4f5d90 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference @@ -9,10 +9,10 @@ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_multiple_more_types (`id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), \n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), \n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), \n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = MergeTree()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1000000000000 xxxxxxxxxxxx [127] ['Henry'] ! 222 !ZSTD -CREATE TABLE default.test_default_delta (`id` UInt64 CODEC(Delta(8)), `data` String CODEC(Delta(1)), `somedate` Date CODEC(Delta(2)), `somenum` Float64 CODEC(Delta(8)), `somestr` FixedString(3) CODEC(Delta(1)), `othernum` Int64 CODEC(Delta(8)), `yetothernum` Float32 CODEC(Delta(4)), `ddd.age` Array(UInt8) CODEC(Delta(1)), `ddd.Name` Array(String) CODEC(Delta(1)), `ddd.OName` Array(String) CODEC(Delta(1)), `ddd.BName` Array(String) CODEC(Delta(1))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default.test_default_delta\n(\n `id` UInt64 CODEC(Delta(8)), \n `data` String CODEC(Delta(1)), \n `somedate` Date CODEC(Delta(2)), \n `somenum` Float64 CODEC(Delta(8)), \n `somestr` FixedString(3) CODEC(Delta(1)), \n `othernum` Int64 CODEC(Delta(8)), \n `yetothernum` Float32 CODEC(Delta(4)), \n `ddd.age` Array(UInt8) CODEC(Delta(1)), \n `ddd.Name` Array(String) CODEC(Delta(1)), \n `ddd.OName` Array(String) CODEC(Delta(1)), \n `ddd.BName` Array(String) CODEC(Delta(1))\n)\nENGINE = MergeTree()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference index 322b207bf7d..b33535364e5 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference @@ -1,9 +1,9 @@ -CREATE TABLE default.compression_codec_log (`id` UInt64 CODEC(LZ4), `data` String CODEC(ZSTD(1)), `ddd` Date CODEC(NONE), `somenum` Float64 CODEC(ZSTD(2)), `somestr` FixedString(3) CODEC(LZ4HC(7)), `othernum` Int64 CODEC(Delta(8))) ENGINE = Log() +CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4), \n `data` String CODEC(ZSTD(1)), \n `ddd` Date CODEC(NONE), \n `somenum` Float64 CODEC(ZSTD(2)), \n `somestr` FixedString(3) CODEC(LZ4HC(7)), \n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = Log() 1 hello 2018-12-14 1.1 aaa 5 2 world 2018-12-15 2.2 bbb 6 3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE default.compression_codec_multiple_log (`id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))) ENGINE = Log() +CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), \n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), \n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), \n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = Log() 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 @@ -11,12 +11,12 @@ CREATE TABLE default.compression_codec_multiple_log (`id` UInt64 CODEC(LZ4, ZSTD 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_tiny_log (`id` UInt64 CODEC(LZ4), `data` String CODEC(ZSTD(1)), `ddd` Date CODEC(NONE), `somenum` Float64 CODEC(ZSTD(2)), `somestr` FixedString(3) CODEC(LZ4HC(7)), `othernum` Int64 CODEC(Delta(8))) ENGINE = TinyLog() +CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4), \n `data` String CODEC(ZSTD(1)), \n `ddd` Date CODEC(NONE), \n `somenum` Float64 CODEC(ZSTD(2)), \n `somestr` FixedString(3) CODEC(LZ4HC(7)), \n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = TinyLog() 1 hello 2018-12-14 1.1 aaa 5 2 world 2018-12-15 2.2 bbb 6 3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE default.compression_codec_multiple_tiny_log (`id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))) ENGINE = TinyLog() +CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), \n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), \n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), \n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = TinyLog() 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 diff --git a/tests/queries/0_stateless/00836_indices_alter.reference b/tests/queries/0_stateless/00836_indices_alter.reference index e30c17eb673..6efa25f47b7 100644 --- a/tests/queries/0_stateless/00836_indices_alter.reference +++ b/tests/queries/0_stateless/00836_indices_alter.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -6,15 +6,15 @@ CREATE TABLE default.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i3 1 2 1 2 1 2 -CREATE TABLE default.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 1 2 1 2 1 2 -CREATE TABLE default.minmax_idx (`u64` UInt64, `i32` Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE default.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -23,6 +23,6 @@ CREATE TABLE default.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i3 1 2 1 2 1 2 -CREATE TABLE default.minmax_idx2 (`u64` UInt64, `i32` Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx2\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 diff --git a/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference b/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference index b2c2b41f460..ec9de160fcc 100644 --- a/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference +++ b/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference @@ -1,5 +1,5 @@ -CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,8 +14,8 @@ CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -28,10 +28,10 @@ CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -44,14 +44,14 @@ CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx2 (`u64` UInt64, `i32` Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 3 1 2 1 3 -CREATE TABLE test.minmax_idx2 (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 3 1 2 diff --git a/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference b/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference index 29bda49a8e5..ee481c88d89 100644 --- a/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference +++ b/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference @@ -20,7 +20,7 @@ 274972506.6 9175437371954010821 9175437371954010821 -CREATE TABLE test.compression_codec_multiple_more_types_replicated (`id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/compression_codec_multiple_more_types_replicated\', \'1\') ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE test.compression_codec_multiple_more_types_replicated\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), \n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), \n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), \n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/compression_codec_multiple_more_types_replicated\', \'1\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1000000000000 xxxxxxxxxxxx [127] ['Henry'] ! diff --git a/tests/queries/0_stateless/00916_create_or_replace_view.reference b/tests/queries/0_stateless/00916_create_or_replace_view.reference index 30d14bf1e41..50323e47556 100644 --- a/tests/queries/0_stateless/00916_create_or_replace_view.reference +++ b/tests/queries/0_stateless/00916_create_or_replace_view.reference @@ -1,2 +1,2 @@ -CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers -CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers +CREATE VIEW default.t\n(\n `number` UInt64\n) AS\nSELECT number\nFROM system.numbers +CREATE VIEW default.t\n(\n `next_number` UInt64\n) AS\nSELECT number + 1 AS next_number\nFROM system.numbers diff --git a/tests/queries/0_stateless/00933_alter_ttl.reference b/tests/queries/0_stateless/00933_alter_ttl.reference index 44ba49026a7..9b5cec0f773 100644 --- a/tests/queries/0_stateless/00933_alter_ttl.reference +++ b/tests/queries/0_stateless/00933_alter_ttl.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ttl (`d` Date, `a` Int32) ENGINE = MergeTree PARTITION BY toDayOfMonth(d) ORDER BY a TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl\n(\n `d` Date, \n `a` Int32\n)\nENGINE = MergeTree\nPARTITION BY toDayOfMonth(d)\nORDER BY a\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192 2100-10-10 3 2100-10-10 4 d Date diff --git a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference index 986bc6b4a24..629fbf2a4a3 100644 --- a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference +++ b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference @@ -1,3 +1,3 @@ 200 400 -CREATE TABLE test.ttl_repl2 (`d` Date, `x` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\') PARTITION BY toDayOfMonth(d) ORDER BY x TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192 +CREATE TABLE test.ttl_repl2\n(\n `d` Date, \n `x` UInt32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\')\nPARTITION BY toDayOfMonth(d)\nORDER BY x\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00933_ttl_simple.reference b/tests/queries/0_stateless/00933_ttl_simple.reference index e8b0c699aec..102639947a3 100644 --- a/tests/queries/0_stateless/00933_ttl_simple.reference +++ b/tests/queries/0_stateless/00933_ttl_simple.reference @@ -6,11 +6,11 @@ 2000-10-10 00:00:00 0 2100-10-10 00:00:00 3 2100-10-10 2 -CREATE TABLE default.ttl_00933_1 (`b` Int32, `a` Int32 TTL now() - 1000) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL now() - 1000\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 0 -CREATE TABLE default.ttl_00933_1 (`b` Int32, `a` Int32 TTL now() + 1000) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL now() + 1000\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 1 -CREATE TABLE default.ttl_00933_1 (`b` Int32, `a` Int32 TTL today() - 1) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL today() - 1\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 0 -CREATE TABLE default.ttl_00933_1 (`b` Int32, `a` Int32 TTL today() + 1) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL today() + 1\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 1 diff --git a/tests/queries/0_stateless/00980_merge_alter_settings.reference b/tests/queries/0_stateless/00980_merge_alter_settings.reference index ee3818d25dc..340cf29ce89 100644 --- a/tests/queries/0_stateless/00980_merge_alter_settings.reference +++ b/tests/queries/0_stateless/00980_merge_alter_settings.reference @@ -1,6 +1,6 @@ -CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096 -CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 -CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100 2 -CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 30 -CREATE TABLE default.table_for_alter (`id` UInt64, `Data` String, `Data2` UInt64) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 15 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 30 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String, \n `Data2` UInt64\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 15 diff --git a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference index 159102e1ca7..ab006ea6931 100644 --- a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference +++ b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference @@ -1,12 +1,12 @@ -CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192 -CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 4 4 4 4 6 6 -CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1 -CREATE TABLE default.replicated_table_for_alter2 (`id` UInt64, `Data` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\') ORDER BY id SETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 -CREATE TABLE default.replicated_table_for_alter1 (`id` UInt64, `Data` String, `Data2` UInt64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\') ORDER BY id SETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1, check_delay_period = 15 -CREATE TABLE default.replicated_table_for_alter2 (`id` UInt64, `Data` String, `Data2` UInt64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\') ORDER BY id SETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1 +CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String, \n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1, check_delay_period = 15 +CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64, \n `Data` String, \n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.reference b/tests/queries/0_stateless/00998_constraints_all_tables.reference index 730df555af3..3de251daa71 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.reference +++ b/tests/queries/0_stateless/00998_constraints_all_tables.reference @@ -10,5 +10,5 @@ 0 0 3 -CREATE TABLE default.constrained (`URL` String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log -CREATE TABLE default.constrained2 (`URL` String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log +CREATE TABLE default.constrained\n(\n `URL` String, \n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\', \n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained2\n(\n `URL` String, \n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\', \n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.sql b/tests/queries/0_stateless/00998_constraints_all_tables.sql index 66b93fca97b..e47b7eaf83c 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.sql +++ b/tests/queries/0_stateless/00998_constraints_all_tables.sql @@ -45,8 +45,8 @@ DROP TABLE constrained; DROP TABLE IF EXISTS constrained2; CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; CREATE TABLE constrained2 AS constrained; -SHOW CREATE TABLE constrained FORMAT TSVRaw; -SHOW CREATE TABLE constrained2 FORMAT TSVRaw; +SHOW CREATE TABLE constrained; +SHOW CREATE TABLE constrained2; INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } INSERT INTO constrained2 VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } DROP TABLE constrained; diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index 327c02a4b8a..ad16e8ae7f2 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -1,5 +1,5 @@ =DICTIONARY in Ordinary DB -CREATE DICTIONARY ordinary_db.dict1 (`key_column` UInt64 DEFAULT 0, `second_column` UInt8 DEFAULT 1, `third_column` String DEFAULT \'qqq\') PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict\')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) +CREATE DICTIONARY ordinary_db.dict1\n(\n `key_column` UInt64 DEFAULT 0, \n `second_column` UInt8 DEFAULT 1, \n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict1 1 ordinary_db dict1 diff --git a/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference b/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference index bba4944f4a8..c2d7d849fae 100644 --- a/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference +++ b/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference @@ -1 +1 @@ -CREATE TABLE default.BannerDict (`BannerID` UInt64, `CompaignID` UInt64) ENGINE = ODBC(\'DSN=pgconn;Database=postgres\', \'somedb\', \'bannerdict\') +CREATE TABLE default.BannerDict\n(\n `BannerID` UInt64, \n `CompaignID` UInt64\n)\nENGINE = ODBC(\'DSN=pgconn;Database=postgres\', \'somedb\', \'bannerdict\') diff --git a/tests/queries/0_stateless/01055_compact_parts_1.reference b/tests/queries/0_stateless/01055_compact_parts_1.reference index 7c9dd4a0ef9..b99f336d3b0 100644 --- a/tests/queries/0_stateless/01055_compact_parts_1.reference +++ b/tests/queries/0_stateless/01055_compact_parts_1.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.mt_compact (`a` Int32, `s` String) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity_bytes = 0, index_granularity = 8192 -CREATE TABLE default.mt_compact (`a` Int32, `s` String) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, index_granularity = 8192, parts_to_delay_insert = 300 +CREATE TABLE default.mt_compact\n(\n `a` Int32, \n `s` String\n)\nENGINE = MergeTree\nPARTITION BY a\nORDER BY a\nSETTINGS index_granularity_bytes = 0, index_granularity = 8192 +CREATE TABLE default.mt_compact\n(\n `a` Int32, \n `s` String\n)\nENGINE = MergeTree\nPARTITION BY a\nORDER BY a\nSETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, index_granularity = 8192, parts_to_delay_insert = 300 diff --git a/tests/queries/0_stateless/01069_database_memory.reference b/tests/queries/0_stateless/01069_database_memory.reference index 393c85070b9..e7486d57276 100644 --- a/tests/queries/0_stateless/01069_database_memory.reference +++ b/tests/queries/0_stateless/01069_database_memory.reference @@ -1,8 +1,8 @@ -CREATE DATABASE memory_01069 ENGINE = Memory() +CREATE DATABASE memory_01069\nENGINE = Memory() 1 2 3 4 3 4 -CREATE TABLE memory_01069.file (`n` UInt8) ENGINE = File(\'CSV\') +CREATE TABLE memory_01069.file\n(\n `n` UInt8\n)\nENGINE = File(\'CSV\') diff --git a/tests/queries/0_stateless/01070_alter_with_ttl.reference b/tests/queries/0_stateless/01070_alter_with_ttl.reference index 8b2bd9d1389..de7833472a1 100644 --- a/tests/queries/0_stateless/01070_alter_with_ttl.reference +++ b/tests/queries/0_stateless/01070_alter_with_ttl.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.alter_ttl (`i` Int32, `s` String TTL toDate(\'2020-01-01\')) ENGINE = MergeTree ORDER BY i TTL toDate(\'2020-05-05\') SETTINGS index_granularity = 8192 -CREATE TABLE default.alter_ttl (`d` Date, `s` String TTL d + toIntervalDay(1)) ENGINE = MergeTree ORDER BY d TTL d + toIntervalMonth(1) SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_ttl\n(\n `i` Int32, \n `s` String TTL toDate(\'2020-01-01\')\n)\nENGINE = MergeTree\nORDER BY i\nTTL toDate(\'2020-05-05\')\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_ttl\n(\n `d` Date, \n `s` String TTL d + toIntervalDay(1)\n)\nENGINE = MergeTree\nORDER BY d\nTTL d + toIntervalMonth(1)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01079_alter_default_zookeeper.reference b/tests/queries/0_stateless/01079_alter_default_zookeeper.reference index 35ba20aff3e..62d26bc9b4b 100644 --- a/tests/queries/0_stateless/01079_alter_default_zookeeper.reference +++ b/tests/queries/0_stateless/01079_alter_default_zookeeper.reference @@ -1,11 +1,11 @@ -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` String DEFAULT \'10\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` String DEFAULT \'10\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 1000 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt64 DEFAULT \'10\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt64 DEFAULT 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt64 DEFAULT \'10\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt64 DEFAULT 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 1000 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt64 DEFAULT 100) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt16 DEFAULT 100) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt64 DEFAULT 100\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt16 DEFAULT 100\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 10000 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt8 DEFAULT 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt8 DEFAULT 10, `better_column` UInt8 DEFAULT \'1\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default (`date` Date, `key` UInt64, `value` UInt8 DEFAULT 10, `better_column` UInt8 DEFAULT \'1\', `other_date` String DEFAULT 1) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt8 DEFAULT 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt8 DEFAULT 10, \n `better_column` UInt8 DEFAULT \'1\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt8 DEFAULT 10, \n `better_column` UInt8 DEFAULT \'1\', \n `other_date` String DEFAULT 1\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference b/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference index 198f79cf9a4..ea3fbec34a8 100644 --- a/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference +++ b/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference @@ -1,6 +1,6 @@ Wrong column name. -CREATE TABLE default.table_for_bad_alters (`key` UInt64, `value1` UInt8, `value2` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_bad_alters (`key` UInt64, `value1` UInt8, `value2` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\') ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_bad_alters\n(\n `key` UInt64, \n `value1` UInt8, \n `value2` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_bad_alters\n(\n `key` UInt64, \n `value1` UInt8, \n `value2` UInt32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 syntax error at begin of string. 7 Hello diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference index 5b376a0654f..2007eda0f07 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference @@ -1,11 +1,11 @@ -CREATE TABLE test_01083.file (`n` Int8) ENGINE = File(\'TSVWithNamesAndTypes\') -CREATE TABLE test_01083.buffer (`n` Int8) ENGINE = Buffer(\'test_01083\', \'file\', 16, 10, 200, 10000, 1000000, 10000000, 1000000000) -CREATE TABLE test_01083.merge (`n` Int8) ENGINE = Merge(\'test_01083\', \'distributed\') +CREATE TABLE test_01083.file\n(\n `n` Int8\n)\nENGINE = File(\'TSVWithNamesAndTypes\') +CREATE TABLE test_01083.buffer\n(\n `n` Int8\n)\nENGINE = Buffer(\'test_01083\', \'file\', 16, 10, 200, 10000, 1000000, 10000000, 1000000000) +CREATE TABLE test_01083.merge\n(\n `n` Int8\n)\nENGINE = Merge(\'test_01083\', \'distributed\') CREATE TABLE test_01083.merge_tf AS merge(\'test_01083\', \'.*\') -CREATE TABLE test_01083.distributed (`n` Int8) ENGINE = Distributed(\'test_shard_localhost\', \'test_01083\', \'file\') +CREATE TABLE test_01083.distributed\n(\n `n` Int8\n)\nENGINE = Distributed(\'test_shard_localhost\', \'test_01083\', \'file\') CREATE TABLE test_01083.distributed_tf AS cluster(\'test_shard_localhost\', \'test_01083\', \'buffer\') -CREATE TABLE test_01083.url (`n` UInt64, `col` String) ENGINE = URL(\'https://localhost:8443/?query=select+n,+_table+from+test_01083.merge+format+CSV\', \'CSV\') +CREATE TABLE test_01083.url\n(\n `n` UInt64, \n `col` String\n)\nENGINE = URL(\'https://localhost:8443/?query=select+n,+_table+from+test_01083.merge+format+CSV\', \'CSV\') CREATE TABLE test_01083.rich_syntax AS remote(\'localhos{x|y|t}\', cluster(\'test_shard_localhost\', remote(\'127.0.0.{1..4}\', \'test_01083\', \'view\'))) -CREATE VIEW test_01083.view (`n` Int64) AS SELECT toInt64(n) AS n FROM (SELECT toString(n) AS n FROM test_01083.merge WHERE _table != \'qwerty\' ORDER BY _table ASC) UNION ALL SELECT * FROM test_01083.file -CREATE DICTIONARY test_01083.dict (`n` UInt64, `col` String DEFAULT \'42\') PRIMARY KEY n SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\' DB \'test_01083\')) LIFETIME(MIN 0 MAX 1) LAYOUT(CACHE(SIZE_IN_CELLS 1)) +CREATE VIEW test_01083.view\n(\n `n` Int64\n) AS\nSELECT toInt64(n) AS n\nFROM \n(\n SELECT toString(n) AS n\n FROM test_01083.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM test_01083.file +CREATE DICTIONARY test_01083.dict\n(\n `n` UInt64, \n `col` String DEFAULT \'42\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\' DB \'test_01083\'))\nLIFETIME(MIN 0 MAX 1)\nLAYOUT(CACHE(SIZE_IN_CELLS 1)) 16 diff --git a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference index a0518e78891..852abeea187 100644 --- a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference +++ b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference @@ -1,3 +1,3 @@ World -CREATE DICTIONARY db_for_dict.dict_with_hashed_layout (`key1` UInt64, `value` String) PRIMARY KEY key1 SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\')) LIFETIME(MIN 1 MAX 10) LAYOUT(HASHED) +CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64, \n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) Hello diff --git a/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference b/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference index a2c0e0d7d77..e2d6007c57f 100644 --- a/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference +++ b/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference @@ -1,6 +1,6 @@ 1 -CREATE TABLE default.table_for_rename_replicated (`date` Date, `key` UInt64, `value1` String, `value2` String, `value3` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\') PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_rename_replicated (`date` Date, `key` UInt64, `renamed_value1` String, `value2` String, `value3` String) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\') PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_replicated\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String, \n `value3` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_replicated\n(\n `date` Date, \n `key` UInt64, \n `renamed_value1` String, \n `value2` String, \n `value3` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 1 date key renamed_value1 value2 value3 2019-10-02 1 1 1 1 diff --git a/tests/queries/0_stateless/01213_alter_rename_nested.reference b/tests/queries/0_stateless/01213_alter_rename_nested.reference index 8b7aaaa3d5a..2641df46aeb 100644 --- a/tests/queries/0_stateless/01213_alter_rename_nested.reference +++ b/tests/queries/0_stateless/01213_alter_rename_nested.reference @@ -1,10 +1,10 @@ [8,9,10] ['a','b','c'] -CREATE TABLE default.table_for_rename_nested (`date` Date, `key` UInt64, `n.x` Array(UInt32), `n.y` Array(String), `value1` Array(Array(LowCardinality(String)))) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_rename_nested (`date` Date, `key` UInt64, `n.renamed_x` Array(UInt32), `n.renamed_y` Array(String), `value1` Array(Array(LowCardinality(String)))) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.x` Array(UInt32), \n `n.y` Array(String), \n `value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.renamed_x` Array(UInt32), \n `n.renamed_y` Array(String), \n `value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 7 [8,9,10] 7 ['a','b','c'] [['7']] -CREATE TABLE default.table_for_rename_nested (`date` Date, `key` UInt64, `n.renamed_x` Array(UInt32), `n.renamed_y` Array(String), `renamed_value1` Array(Array(LowCardinality(String)))) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.renamed_x` Array(UInt32), \n `n.renamed_y` Array(String), \n `renamed_value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 date key n.renamed_x n.renamed_y renamed_value1 2019-10-01 7 [8,9,10] ['a','b','c'] [['7']] diff --git a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference index 06f136d6dbc..251e664b522 100644 --- a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference +++ b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference @@ -1,17 +1,17 @@ date key value1 value2 2019-10-02 1 1 Hello 1 -CREATE TABLE default.table_rename_with_default (`date` Date, `key` UInt64, `value1` String, `value2` String DEFAULT concat(\'Hello \', value1), `value3` String ALIAS concat(\'Word \', value1)) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_default\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String DEFAULT concat(\'Hello \', value1), \n `value3` String ALIAS concat(\'Word \', value1)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 date key renamed_value1 value2 2019-10-02 1 1 Hello 1 -CREATE TABLE default.table_rename_with_default (`date` Date, `key` UInt64, `renamed_value1` String, `value2` String DEFAULT concat(\'Hello \', renamed_value1), `value3` String ALIAS concat(\'Word \', renamed_value1)) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_default\n(\n `date` Date, \n `key` UInt64, \n `renamed_value1` String, \n `value2` String DEFAULT concat(\'Hello \', renamed_value1), \n `value3` String ALIAS concat(\'Word \', renamed_value1)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 Hello 1 Word 1 date1 date2 value1 value2 2019-10-02 2018-10-02 1 1 -CREATE TABLE default.table_rename_with_ttl (`date1` Date, `date2` Date, `value1` String, `value2` String TTL date1 + toIntervalMonth(10000)) ENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\') ORDER BY tuple() TTL date2 + toIntervalMonth(10000) SETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_ttl\n(\n `date1` Date, \n `date2` Date, \n `value1` String, \n `value2` String TTL date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 renamed_date1 date2 value1 value2 2019-10-02 2018-10-02 1 1 -CREATE TABLE default.table_rename_with_ttl (`renamed_date1` Date, `date2` Date, `value1` String, `value2` String TTL renamed_date1 + toIntervalMonth(10000)) ENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\') ORDER BY tuple() TTL date2 + toIntervalMonth(10000) SETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_ttl\n(\n `renamed_date1` Date, \n `date2` Date, \n `value1` String, \n `value2` String TTL renamed_date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 renamed_date1 renamed_date2 value1 value2 2019-10-02 2018-10-02 1 1 -CREATE TABLE default.table_rename_with_ttl (`renamed_date1` Date, `renamed_date2` Date, `value1` String, `value2` String TTL renamed_date1 + toIntervalMonth(10000)) ENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\') ORDER BY tuple() TTL renamed_date2 + toIntervalMonth(10000) SETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_ttl\n(\n `renamed_date1` Date, \n `renamed_date2` Date, \n `value1` String, \n `value2` String TTL renamed_date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL renamed_date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01213_alter_table_rename_nested.reference b/tests/queries/0_stateless/01213_alter_table_rename_nested.reference index 51647dc2e7b..8e6d93dbcce 100644 --- a/tests/queries/0_stateless/01213_alter_table_rename_nested.reference +++ b/tests/queries/0_stateless/01213_alter_table_rename_nested.reference @@ -1,6 +1,6 @@ [8,9,10] ['a','b','c'] -CREATE TABLE default.table_for_rename_nested (`date` Date, `key` UInt64, `n.x` Array(UInt32), `n.y` Array(String), `value1` String) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_rename_nested (`date` Date, `key` UInt64, `n.renamed_x` Array(UInt32), `n.renamed_y` Array(String), `value1` String) ENGINE = MergeTree() PARTITION BY date ORDER BY key SETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.x` Array(UInt32), \n `n.y` Array(String), \n `value1` String\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.renamed_x` Array(UInt32), \n `n.renamed_y` Array(String), \n `value1` String\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 7 [8,9,10] 7 ['a','b','c'] From 458c7f516deb834bcb6775cdec732b404e793ce0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 8 Apr 2020 15:40:04 +0300 Subject: [PATCH 119/752] Fix extremes for Processors. --- .../ClusterProxy/SelectStreamFactory.cpp | 23 ++- src/Interpreters/InterpreterSelectQuery.cpp | 4 +- .../TreeExecutorBlockInputStream.cpp | 35 +++- .../Executors/TreeExecutorBlockInputStream.h | 5 +- .../Formats/Impl/PrettyBlockOutputFormat.cpp | 4 +- src/Processors/NullSink.h | 11 ++ src/Processors/Pipe.cpp | 19 +- src/Processors/Pipe.h | 6 +- src/Processors/QueryPipeline.cpp | 162 ++++++++++++------ src/Processors/QueryPipeline.h | 2 +- .../Sources/SourceFromInputStream.cpp | 53 ++++-- .../Sources/SourceFromInputStream.h | 10 +- 12 files changed, 243 insertions(+), 91 deletions(-) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 39bbb1eb667..45e2fc9dc4b 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -82,7 +82,16 @@ Pipe createLocalStream(const ASTPtr & query_ast, const Block & header, const Con /// This flag means that pipeline must be tree-shaped, /// so we can't enable processors for InterpreterSelectQuery here. auto stream = interpreter.execute().in; - Pipe pipe(std::make_shared(std::move(stream))); + auto source = std::make_shared(std::move(stream)); + + bool add_totals_and_extremes_port = processed_stage == QueryProcessingStage::Complete; + if (add_totals_and_extremes_port) + { + source->addTotalsPort(); + source->addExtremesPort(); + } + + Pipe pipe(std::move(source)); pipe.addSimpleTransform(std::make_shared( pipe.getHeader(), header, ConvertingTransform::MatchColumnsMode::Name, context)); @@ -130,7 +139,7 @@ void SelectStreamFactory::createForShard( Pipes & res) { bool force_add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - bool add_totals_port = processed_stage == QueryProcessingStage::Complete; + bool add_totals_and_extremes_port = processed_stage == QueryProcessingStage::Complete; auto modified_query_ast = query_ast->clone(); if (has_virtual_shard_num_column) @@ -153,8 +162,11 @@ void SelectStreamFactory::createForShard( auto source = std::make_shared(std::move(stream), force_add_agg_info); - if (add_totals_port) + if (add_totals_and_extremes_port) + { source->addTotalsPort(); + source->addExtremesPort(); + } res.emplace_back(std::move(source)); }; @@ -303,8 +315,11 @@ void SelectStreamFactory::createForShard( auto lazy_stream = std::make_shared("LazyShardWithLocalReplica", header, lazily_create_stream); auto source = std::make_shared(std::move(lazy_stream), force_add_agg_info); - if (add_totals_port) + if (add_totals_and_extremes_port) + { source->addTotalsPort(); + source->addExtremesPort(); + } res.emplace_back(std::move(source)); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c58b0eab71b..63007f070db 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -87,7 +87,6 @@ #include #include #include -#include #include #include #include @@ -2541,8 +2540,7 @@ void InterpreterSelectQuery::executeExtremes(QueryPipeline & pipeline) if (!context->getSettingsRef().extremes) return; - auto transform = std::make_shared(pipeline.getHeader()); - pipeline.addExtremesTransform(std::move(transform)); + pipeline.addExtremesTransform(); } diff --git a/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index ee5b254ccf9..84fd97f4781 100644 --- a/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -30,7 +30,10 @@ static void checkProcessorHasSingleOutput(IProcessor * processor) /// Check tree invariants (described in TreeExecutor.h). /// Collect sources with progress. -static void validateTree(const Processors & processors, IProcessor * root, IProcessor * totals_root, std::vector & sources) +static void validateTree( + const Processors & processors, + IProcessor * root, IProcessor * totals_root, IProcessor * extremes_root, + std::vector & sources) { std::unordered_map index; @@ -49,6 +52,8 @@ static void validateTree(const Processors & processors, IProcessor * root, IProc stack.push(root); if (totals_root) stack.push(totals_root); + if (extremes_root) + stack.push(extremes_root); while (!stack.empty()) { @@ -104,11 +109,15 @@ void TreeExecutorBlockInputStream::init() root = &output_port.getProcessor(); IProcessor * totals_root = nullptr; + IProcessor * extremes_root = nullptr; if (totals_port) totals_root = &totals_port->getProcessor(); - validateTree(processors, root, totals_root, sources_with_progress); + if (extremes_port) + extremes_root = &extremes_port->getProcessor(); + + validateTree(processors, root, totals_root, extremes_root, sources_with_progress); input_port = std::make_unique(getHeader(), root); connect(output_port, *input_port); @@ -121,15 +130,24 @@ void TreeExecutorBlockInputStream::init() input_totals_port->setNeeded(); } + if (extremes_port) + { + input_extremes_port = std::make_unique(extremes_port->getHeader(), root); + connect(*extremes_port, *input_extremes_port); + input_extremes_port->setNeeded(); + } + initRowsBeforeLimit(); } -void TreeExecutorBlockInputStream::execute(bool on_totals) +void TreeExecutorBlockInputStream::execute(bool on_totals, bool on_extremes) { std::stack stack; if (on_totals) stack.push(&totals_port->getProcessor()); + else if (on_extremes) + stack.push(&extremes_port->getProcessor()); else stack.push(root); @@ -283,11 +301,18 @@ Block TreeExecutorBlockInputStream::readImpl() { if (totals_port && !input_totals_port->isFinished()) { - execute(true); + execute(true, false); if (input_totals_port->hasData()) totals = getHeader().cloneWithColumns(input_totals_port->pull().detachColumns()); } + if (extremes_port && !input_extremes_port->isFinished()) + { + execute(false, true); + if (input_extremes_port->hasData()) + extremes = getHeader().cloneWithColumns(input_extremes_port->pull().detachColumns()); + } + if (rows_before_limit_at_least && rows_before_limit_at_least->hasAppliedLimit()) info.setRowsBeforeLimit(rows_before_limit_at_least->get()); @@ -311,7 +336,7 @@ Block TreeExecutorBlockInputStream::readImpl() return block; } - execute(false); + execute(false, false); } } diff --git a/src/Processors/Executors/TreeExecutorBlockInputStream.h b/src/Processors/Executors/TreeExecutorBlockInputStream.h index 24cab387eb8..dfe8e66ed09 100644 --- a/src/Processors/Executors/TreeExecutorBlockInputStream.h +++ b/src/Processors/Executors/TreeExecutorBlockInputStream.h @@ -31,6 +31,7 @@ public: interpreter_context.emplace_back(context); totals_port = pipe.getTotalsPort(); + extremes_port = pipe.getExtremesPort(); processors = std::move(pipe).detachProcessors(); init(); } @@ -52,10 +53,12 @@ protected: private: OutputPort & output_port; OutputPort * totals_port = nullptr; + OutputPort * extremes_port = nullptr; Processors processors; IProcessor * root = nullptr; std::unique_ptr input_port; std::unique_ptr input_totals_port; + std::unique_ptr input_extremes_port; RowsBeforeLimitCounterPtr rows_before_limit_at_least; /// Remember sources that support progress. @@ -65,7 +68,7 @@ private: void init(); /// Execute tree step-by-step until root returns next chunk or execution is finished. - void execute(bool on_totals); + void execute(bool on_totals, bool on_extremes); void initRowsBeforeLimit(); diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index a816cdd5318..ae86a01a52a 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -225,7 +225,7 @@ void PrettyBlockOutputFormat::consumeTotals(Chunk chunk) { total_rows = 0; writeSuffixIfNot(); - writeCString("\nExtremes:\n", out); + writeCString("\nTotals:\n", out); write(chunk, PortKind::Totals); } @@ -233,7 +233,7 @@ void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk) { total_rows = 0; writeSuffixIfNot(); - writeCString("\nTotals:\n", out); + writeCString("\nExtremes:\n", out); write(chunk, PortKind::Extremes); } diff --git a/src/Processors/NullSink.h b/src/Processors/NullSink.h index e4968daee29..b3c3bc1ac60 100644 --- a/src/Processors/NullSink.h +++ b/src/Processors/NullSink.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -19,4 +20,14 @@ public: InputPort & getPort() { return inputs.front(); } }; +class EmptySink : public ISink +{ +public: + explicit EmptySink(Block header) : ISink(std::move(header)) {} + String getName() const override { return "EmptySink"; } + +protected: + void consume(Chunk) override {} +}; + } diff --git a/src/Processors/Pipe.cpp b/src/Processors/Pipe.cpp index 4461d714264..d9b21dbc854 100644 --- a/src/Processors/Pipe.cpp +++ b/src/Processors/Pipe.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -48,7 +49,7 @@ static void checkSource(const IProcessor & source) throw Exception("Source for pipe should have single output, but it doesn't have any", ErrorCodes::LOGICAL_ERROR); - if (source.getOutputs().size() > 2) + if (source.getOutputs().size() > 1) throw Exception("Source for pipe should have single or two outputs, but " + source.getName() + " has " + toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); } @@ -56,18 +57,22 @@ static void checkSource(const IProcessor & source) Pipe::Pipe(ProcessorPtr source) { - checkSource(*source); - output_port = &source->getOutputs().front(); + if (auto * source_from_input_stream = typeid_cast(source.get())) + { + totals = source_from_input_stream->getTotalsPort(); + extremes = source_from_input_stream->getExtremesPort(); + } + else if (source->getOutputs().size() != 1) + checkSource(*source); - if (source->getOutputs().size() > 1) - totals = &source->getOutputs().back(); + output_port = &source->getOutputs().front(); processors.emplace_back(std::move(source)); max_parallel_streams = 1; } -Pipe::Pipe(Processors processors_, OutputPort * output_port_, OutputPort * totals_) - : processors(std::move(processors_)), output_port(output_port_), totals(totals_) +Pipe::Pipe(Processors processors_, OutputPort * output_port_, OutputPort * totals_, OutputPort * extremes_) + : processors(std::move(processors_)), output_port(output_port_), totals(totals_), extremes(extremes_) { } diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h index 60715d986af..42bbd4e06d0 100644 --- a/src/Processors/Pipe.h +++ b/src/Processors/Pipe.h @@ -47,8 +47,11 @@ public: void enableQuota(); + /// Totals and extremes port. void setTotalsPort(OutputPort * totals_) { totals = totals_; } + void setExtremesPort(OutputPort * extremes_) { extremes = extremes_; } OutputPort * getTotalsPort() const { return totals; } + OutputPort * getExtremesPort() const { return extremes; } size_t maxParallelStreams() const { return max_parallel_streams; } @@ -67,6 +70,7 @@ private: Processors processors; OutputPort * output_port = nullptr; OutputPort * totals = nullptr; + OutputPort * extremes = nullptr; /// It is the max number of processors which can be executed in parallel for each step. See QueryPipeline::Streams. size_t max_parallel_streams = 0; @@ -84,7 +88,7 @@ private: /// and therefore we can skip those checks. /// Note that Pipe represents a tree if it was created using public interface. But this constructor can't assert it. /// So, it's possible that TreeExecutorBlockInputStream could be unable to convert such Pipe to IBlockInputStream. - explicit Pipe(Processors processors_, OutputPort * output_port, OutputPort * totals); + explicit Pipe(Processors processors_, OutputPort * output_port, OutputPort * totals, OutputPort * extremes); friend class QueryPipeline; }; diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index ee6938a48a6..d20086e726f 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -60,6 +60,58 @@ void QueryPipeline::init(Pipe pipe) init(std::move(pipes)); } +static OutputPort * uniteExtremes(const std::vector & ports, const Block & header, Processors & processors) +{ + /// Here we calculate extremes for extremes in case we unite several pipelines. + /// Example: select number from numbers(2) union all select number from numbers(3) + + /// ->> Resize -> Extremes --(output port)----> Null + /// --(extremes port)--> ... + + auto resize = std::make_shared(header, ports.size(), 1); + auto extremes = std::make_shared(header); + auto sink = std::make_shared(header); + + auto * extremes_port = &extremes->getExtremesPort(); + + auto in = resize->getInputs().begin(); + for (auto & port : ports) + connect(*port, *(in++)); + + connect(resize->getOutputs().front(), extremes->getInputPort()); + connect(extremes->getOutputPort(), sink->getPort()); + + processors.emplace_back(std::move(resize)); + processors.emplace_back(std::move(extremes)); + processors.emplace_back(std::move(sink)); + + return extremes_port; +} + +static OutputPort * uniteTotals(const std::vector & ports, const Block & header, Processors & processors) +{ + /// Calculate totals fro several streams. + /// Take totals from first sources which has any, skip others. + + /// ->> Concat -> Limit + + auto concat = std::make_shared(header, ports.size()); + auto limit = std::make_shared(header, 1, 0); + + auto * totals_port = &limit->getOutputPort(); + + auto in = concat->getInputs().begin(); + for (auto & port : ports) + connect(*port, *(in++)); + + connect(concat->getOutputs().front(), limit->getInputPort()); + + processors.emplace_back(std::move(concat)); + processors.emplace_back(std::move(limit)); + + return totals_port; +} + void QueryPipeline::init(Pipes pipes) { if (initialized()) @@ -82,6 +134,7 @@ void QueryPipeline::init(Pipes pipes) } std::vector totals; + std::vector extremes; for (auto & pipe : pipes) { @@ -98,6 +151,12 @@ void QueryPipeline::init(Pipes pipes) totals.emplace_back(totals_port); } + if (auto * extremes_port_ = pipe.getExtremesPort()) + { + assertBlocksHaveEqualStructure(current_header, extremes_port_->getHeader(), "QueryPipeline"); + extremes.emplace_back(extremes_port_); + } + streams.addStream(&pipe.getPort(), pipe.maxParallelStreams()); auto cur_processors = std::move(pipe).detachProcessors(); processors.insert(processors.end(), cur_processors.begin(), cur_processors.end()); @@ -108,15 +167,15 @@ void QueryPipeline::init(Pipes pipes) if (totals.size() == 1) totals_having_port = totals.back(); else - { - auto resize = std::make_shared(current_header, totals.size(), 1); - auto in = resize->getInputs().begin(); - for (auto & total : totals) - connect(*total, *(in++)); + totals_having_port = uniteTotals(totals, current_header, processors); + } - totals_having_port = &resize->getOutputs().front(); - processors.emplace_back(std::move(resize)); - } + if (!extremes.empty()) + { + if (extremes.size() == 1) + extremes_port = extremes.back(); + else + extremes_port = uniteExtremes(extremes, current_header, processors); } } @@ -356,29 +415,31 @@ void QueryPipeline::dropTotalsIfHas() } } -void QueryPipeline::addExtremesTransform(ProcessorPtr transform) +void QueryPipeline::addExtremesTransform() { checkInitialized(); - if (!typeid_cast(transform.get())) - throw Exception("ExtremesTransform expected for QueryPipeline::addExtremesTransform.", - ErrorCodes::LOGICAL_ERROR); - if (extremes_port) throw Exception("Extremes transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); - if (getNumStreams() != 1) - throw Exception("Cant't add Extremes transform because pipeline is expected to have single stream, " - "but it has " + toString(getNumStreams()) + " streams.", ErrorCodes::LOGICAL_ERROR); + std::vector extremes; + extremes.reserve(streams.size()); - connect(*streams.front(), transform->getInputs().front()); + for (auto & stream : streams) + { + auto transform = std::make_shared(current_header); + connect(*stream, transform->getInputPort()); - auto & outputs = transform->getOutputs(); + stream = &transform->getOutputPort(); + extremes.push_back(&transform->getExtremesPort()); - streams.assign({ &outputs.front() }); - extremes_port = &outputs.back(); - current_header = outputs.front().getHeader(); - processors.emplace_back(std::move(transform)); + processors.emplace_back(std::move(transform)); + } + + if (extremes.size() == 1) + extremes_port = extremes.front(); + else + extremes_port = uniteExtremes(extremes, current_header, processors); } void QueryPipeline::addCreatingSetsTransform(ProcessorPtr transform) @@ -455,6 +516,13 @@ void QueryPipeline::unitePipelines( }); std::vector extremes; + std::vector totals; + + if (extremes_port) + extremes.push_back(extremes_port); + + if (totals_having_port) + totals.push_back(totals_having_port); for (auto & pipeline : pipelines) { @@ -479,17 +547,12 @@ void QueryPipeline::unitePipelines( /// Take totals only from first port. if (pipeline.totals_having_port) { - if (!totals_having_port) - { - auto converting = std::make_shared( - pipeline.current_header, common_header, ConvertingTransform::MatchColumnsMode::Position, context); + auto converting = std::make_shared( + pipeline.current_header, common_header, ConvertingTransform::MatchColumnsMode::Position, context); - connect(*pipeline.totals_having_port, converting->getInputPort()); - totals_having_port = &converting->getOutputPort(); - processors.push_back(std::move(converting)); - } - else - pipeline.dropTotalsIfHas(); + connect(*pipeline.totals_having_port, converting->getInputPort()); + totals.push_back(&converting->getOutputPort()); + processors.push_back(std::move(converting)); } processors.insert(processors.end(), pipeline.processors.begin(), pipeline.processors.end()); @@ -504,28 +567,18 @@ void QueryPipeline::unitePipelines( if (!extremes.empty()) { - size_t num_inputs = extremes.size() + (extremes_port ? 1u : 0u); - - if (num_inputs == 1) - extremes_port = extremes.front(); + if (extremes.size() == 1) + extremes_port = extremes.back(); else - { - /// Add extra processor for extremes. - auto resize = std::make_shared(current_header, num_inputs, 1); - auto input = resize->getInputs().begin(); + extremes_port = uniteExtremes(extremes, current_header, processors); + } - if (extremes_port) - connect(*extremes_port, *(input++)); - - for (auto & output : extremes) - connect(*output, *(input++)); - - auto transform = std::make_shared(current_header); - extremes_port = &transform->getOutputPort(); - - connect(resize->getOutputs().front(), transform->getInputPort()); - processors.emplace_back(std::move(transform)); - } + if (!totals.empty()) + { + if (totals.size() == 1) + totals_having_port = totals.back(); + else + totals_having_port = uniteTotals(totals, current_header, processors); } } @@ -644,7 +697,7 @@ void QueryPipeline::initRowsBeforeLimit() Pipe QueryPipeline::getPipe() && { resize(1); - Pipe pipe(std::move(processors), streams.at(0), totals_having_port); + Pipe pipe(std::move(processors), streams.at(0), totals_having_port, extremes_port); pipe.max_parallel_streams = streams.maxParallelStreams(); for (auto & lock : table_locks) @@ -659,6 +712,9 @@ Pipe QueryPipeline::getPipe() && if (totals_having_port) pipe.setTotalsPort(totals_having_port); + if (extremes_port) + pipe.setExtremesPort(extremes_port); + return pipe; } diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index 9ce12e75b91..e8ba80cf65b 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -99,7 +99,7 @@ public: void addSimpleTransform(const ProcessorGetterWithStreamKind & getter); void addPipe(Processors pipe); void addTotalsHavingTransform(ProcessorPtr transform); - void addExtremesTransform(ProcessorPtr transform); + void addExtremesTransform(); void addCreatingSetsTransform(ProcessorPtr transform); void setOutput(ProcessorPtr output); diff --git a/src/Processors/Sources/SourceFromInputStream.cpp b/src/Processors/Sources/SourceFromInputStream.cpp index 6f2a7eeb28a..e7ca28f72b9 100644 --- a/src/Processors/Sources/SourceFromInputStream.cpp +++ b/src/Processors/Sources/SourceFromInputStream.cpp @@ -28,11 +28,20 @@ void SourceFromInputStream::init() void SourceFromInputStream::addTotalsPort() { - if (has_totals_port) + if (totals_port) throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); outputs.emplace_back(outputs.front().getHeader(), this); - has_totals_port = true; + totals_port = &outputs.back(); +} + +void SourceFromInputStream::addExtremesPort() +{ + if (extremes_port) + throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); + + outputs.emplace_back(outputs.front().getHeader(), this); + extremes_port = &outputs.back(); } IProcessor::Status SourceFromInputStream::prepare() @@ -47,23 +56,32 @@ IProcessor::Status SourceFromInputStream::prepare() if (!is_stream_finished && !isCancelled()) return Status::Ready; - if (has_totals_port) + if (totals_port && !totals_port->isFinished()) { - auto & totals_out = outputs.back(); - - if (totals_out.isFinished()) - return Status::Finished; - if (has_totals) { - if (!totals_out.canPush()) + if (!totals_port->canPush()) return Status::PortFull; - totals_out.push(std::move(totals)); + totals_port->push(std::move(totals)); has_totals = false; } - totals_out.finish(); + totals_port->finish(); + } + + if (extremes_port && !extremes_port->isFinished()) + { + if (has_extremes) + { + if (!extremes_port->canPush()) + return Status::PortFull; + + extremes_port->push(std::move(extremes)); + has_extremes = false; + } + + extremes_port->finish(); } } @@ -138,13 +156,22 @@ Chunk SourceFromInputStream::generate() if (auto totals_block = stream->getTotals()) { - if (totals_block.rows() == 1) /// Sometimes we can get empty totals. Skip it. + if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it. { - totals.setColumns(totals_block.getColumns(), 1); + totals.setColumns(totals_block.getColumns(), totals_block.rows()); has_totals = true; } } + if (auto extremes_block = stream->getExtremes()) + { + if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it. + { + extremes.setColumns(extremes_block.getColumns(), extremes_block.rows()); + has_extremes = true; + } + } + is_stream_finished = true; return {}; } diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h index 0fc92164059..b547e6a6d1f 100644 --- a/src/Processors/Sources/SourceFromInputStream.h +++ b/src/Processors/Sources/SourceFromInputStream.h @@ -23,6 +23,10 @@ public: BlockInputStreamPtr & getStream() { return stream; } void addTotalsPort(); + void addExtremesPort(); + + OutputPort * getTotalsPort() const { return totals_port; } + OutputPort * getExtremesPort() const { return extremes_port; } void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } @@ -44,9 +48,13 @@ private: RowsBeforeLimitCounterPtr rows_before_limit; Chunk totals; - bool has_totals_port = false; + OutputPort * totals_port = nullptr; bool has_totals = false; + Chunk extremes; + OutputPort * extremes_port = nullptr; + bool has_extremes = false; + bool is_generating_finished = false; bool is_stream_finished = false; bool is_stream_started = false; From b9f73a9f772935f27c5de2134daeb56677eb6913 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 8 Apr 2020 21:48:46 +0300 Subject: [PATCH 120/752] Added test. --- .../0_stateless/01232_extremes.reference | 110 ++++++++++++++++++ tests/queries/0_stateless/01232_extremes.sql | 51 ++++++++ 2 files changed, 161 insertions(+) create mode 100644 tests/queries/0_stateless/01232_extremes.reference create mode 100644 tests/queries/0_stateless/01232_extremes.sql diff --git a/tests/queries/0_stateless/01232_extremes.reference b/tests/queries/0_stateless/01232_extremes.reference new file mode 100644 index 00000000000..d5b66dcbd4b --- /dev/null +++ b/tests/queries/0_stateless/01232_extremes.reference @@ -0,0 +1,110 @@ +0 +1 + +0 +1 +- + +- +- +0 +1 +0 +1 + +0 +1 +- + +- +- +0 +1 +0 +1 + +0 +1 +- + +- +- +0 +1 +0 +1 + +0 +1 +- + +- +- +0 +1 + +0 +1 +------ + +------ +------ +0 +0 +0 +1 +1 +2 + +0 +2 +- + +- +- +0 +0 +0 +1 +1 +2 + +0 +2 +- + +- +- +0 +0 +0 +1 +1 +2 + +0 +2 +------ + +------ +------ +0 +0 +1 +1 +2 + +0 +2 +- + +- +- +0 +0 +1 +1 +2 + +0 +2 diff --git a/tests/queries/0_stateless/01232_extremes.sql b/tests/queries/0_stateless/01232_extremes.sql new file mode 100644 index 00000000000..80bf628d669 --- /dev/null +++ b/tests/queries/0_stateless/01232_extremes.sql @@ -0,0 +1,51 @@ +set send_logs_level = 'error'; +set extremes = 1; +-- set experimental_use_processors=0; + +select * from remote('127.0.0.1', numbers(2)); +select '-'; +select * from remote('127.0.0.{1,1}', numbers(2)); +select '-'; +select * from remote('127.0.0.{1,2}', numbers(2)); +select '-'; +select * from remote('127.0.0.{2,2}', numbers(2)); +select '-'; +select * from remote('127.0.0.2', numbers(2)); +select '------'; + +select * from (select * from numbers(2) union all select * from numbers(3) union all select * from numbers(1)) order by number; +select '-'; +select * from (select * from numbers(1) union all select * from numbers(2) union all select * from numbers(3)) order by number; +select '-'; +select * from (select * from numbers(3) union all select * from numbers(1) union all select * from numbers(2)) order by number; +select '------'; + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.num_01232; +drop table if exists shard_0.num2_01232; +drop table if exists shard_1.num_01232; +drop table if exists shard_1.num2_01232; + +create table shard_0.num_01232 (number UInt64) engine = MergeTree order by number; +create table shard_1.num_01232 (number UInt64) engine = MergeTree order by number; +insert into shard_0.num_01232 select number from numbers(2); +insert into shard_1.num_01232 select number from numbers(3); +create table distr (number UInt64) engine = Distributed(test_cluster_two_shards_different_databases, '', num_01232); + +create table shard_0.num2_01232 (number UInt64) engine = MergeTree order by number; +create table shard_1.num2_01232 (number UInt64) engine = MergeTree order by number; +insert into shard_0.num2_01232 select number from numbers(3); +insert into shard_1.num2_01232 select number from numbers(2); +create table distr2 (number UInt64) engine = Distributed(test_cluster_two_shards_different_databases, '', num2_01232); + +select * from distr order by number; +select '-'; +select * from distr2 order by number; + +drop table if exists shard_0.num_01232; +drop table if exists shard_0.num2_01232; +drop table if exists shard_1.num_01232; +drop table if exists shard_1.num2_01232; + From 8d6f6417964726c97a287a02d409aea6cbc3c49a Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Wed, 8 Apr 2020 21:59:52 +0300 Subject: [PATCH 121/752] join over dictionary concept --- src/Interpreters/DictionaryReader.h | 188 ++++++++++++++++++++ src/Interpreters/ExpressionAnalyzer.cpp | 87 ++++----- src/Interpreters/ExpressionAnalyzer.h | 2 - src/Interpreters/HashJoin.cpp | 109 ++++++++++-- src/Interpreters/HashJoin.h | 9 +- src/Interpreters/InterpreterSelectQuery.cpp | 3 +- src/Interpreters/JoinedTables.cpp | 64 +++++++ src/Interpreters/JoinedTables.h | 2 + src/Interpreters/SyntaxAnalyzer.cpp | 40 +---- src/Interpreters/SyntaxAnalyzer.h | 3 +- src/Interpreters/TableJoin.h | 12 +- src/Storages/StorageDictionary.h | 2 + 12 files changed, 424 insertions(+), 97 deletions(-) create mode 100644 src/Interpreters/DictionaryReader.h diff --git a/src/Interpreters/DictionaryReader.h b/src/Interpreters/DictionaryReader.h new file mode 100644 index 00000000000..c1cd3e9ef1d --- /dev/null +++ b/src/Interpreters/DictionaryReader.h @@ -0,0 +1,188 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; +} + +class DictionaryReader +{ +public: + struct FunctionWrapper + { + ExecutableFunctionPtr function; + ColumnNumbers arg_positions; + size_t result_pos = 0; + + FunctionWrapper(const IFunctionOverloadResolver & resolver, const ColumnsWithTypeAndName & arguments, Block & block, + const ColumnNumbers & arg_positions_, const String & column_name, TypeIndex expected_type) + : arg_positions(arg_positions_) + { + FunctionBasePtr prepare_function = resolver.build(arguments); + result_pos = block.columns(); + + ColumnWithTypeAndName result; + result.name = "get_" + column_name; + result.type = prepare_function->getReturnType(); + if (result.type->getTypeId() != expected_type) + throw Exception("Type mismatch in dictionary reader for: " + column_name, ErrorCodes::TYPE_MISMATCH); + + function = prepare_function->prepare(block, arg_positions, result_pos); + } + + void execute(Block & block, size_t rows) const + { + function->execute(block, arg_positions, result_pos, rows, false); + } + }; + + DictionaryReader(const String & dictionary_name, const Names & src_column_names, const NamesAndTypesList & result_columns, + const Context & context, size_t key_size = 1) + : result_header(makeResultBlock(result_columns)) + , key_position(key_size + result_header.columns()) + { + if (src_column_names.size() != result_columns.size()) + throw Exception("Columns number mismatch in dictionary reader", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); + + FunctionOverloadResolverPtr dict_has(FunctionFactory::instance().get("dictHas", context)); + FunctionOverloadResolverPtr dict_get(FunctionFactory::instance().get("dictGet", context)); + + ColumnWithTypeAndName dict_name; + ColumnWithTypeAndName key; + ColumnWithTypeAndName column_name; + + { + dict_name.name = "dict"; + dict_name.type = std::make_shared(); + dict_name.column = dict_name.type->createColumnConst(1, dictionary_name); + + /// TODO: composite key (key_size > 1) + key.name = "key"; + key.type = std::make_shared(); + + column_name.name = "column"; + column_name.type = std::make_shared(); + } + + /// dictHas('dict_name', id) + ColumnsWithTypeAndName arguments_has; + arguments_has.push_back(dict_name); + arguments_has.push_back(key); + + /// dictGet('dict_name', 'attr_name', id) + ColumnsWithTypeAndName arguments_get; + arguments_get.push_back(dict_name); + arguments_get.push_back(column_name); + arguments_get.push_back(key); + + sample_block.insert(dict_name); + + for (auto & columns_name : src_column_names) + { + ColumnWithTypeAndName name; + name.name = "col_" + columns_name; + name.type = std::make_shared(); + name.column = name.type->createColumnConst(1, columns_name); + + sample_block.insert(name); + } + + sample_block.insert(key); + + ColumnNumbers positions_has{0, key_position}; + function_has = std::make_unique( + *dict_has, arguments_has, sample_block, positions_has, "has", DataTypeUInt8().getTypeId()); + functions_get.reserve(result_header.columns()); + + for (size_t i = 0; i < result_header.columns(); ++i) + { + size_t column_name_pos = key_size + i; + auto & column = result_header.getByPosition(i); + ColumnNumbers positions_get{0, column_name_pos, key_position}; + functions_get.emplace_back(FunctionWrapper( + *dict_get, arguments_get, sample_block, positions_get, column.name, column.type->getTypeId())); + } + } + + void readKeys(const IColumn & keys, size_t size, Block & out_block, ColumnVector::Container & found, + std::vector & positions) const + { + Block working_block = sample_block; + size_t has_position = key_position + 1; + + /// set keys for dictHas() + ColumnWithTypeAndName & key_column = working_block.getByPosition(key_position); + key_column.column = keys.cloneResized(size); /// just a copy we cannot avoid + + /// calculate and extract dictHas() + function_has->execute(working_block, size); + ColumnWithTypeAndName & has_column = working_block.getByPosition(has_position); + auto mutable_has = (*std::move(has_column.column)).mutate(); + found.swap(typeid_cast &>(*mutable_has).getData()); + has_column.column = nullptr; + + /// set mapping form source keys to resulting rows in output block + positions.clear(); + positions.resize(size, 0); + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + if (found[i]) + positions[i] = pos++; + + /// set keys for dictGet(): remove not found keys + key_column.column = key_column.column->filter(found, -1); + size_t rows = key_column.column->size(); + + /// calculate dictGet() + for (auto & func : functions_get) + func.execute(working_block, rows); + + /// make result: copy header block with correct names and move data columns + out_block = result_header.cloneEmpty(); + size_t first_get_position = has_position + 1; + for (size_t i = 0; i < out_block.columns(); ++i) + { + auto & src_column = working_block.getByPosition(first_get_position + i); + auto & dst_column = out_block.getByPosition(i); + dst_column.column = src_column.column; + src_column.column = nullptr; + } + } + +private: + Block result_header; + Block sample_block; /// dictionary name, column names, key, dictHas() result, dictGet() results + size_t key_position; + std::unique_ptr function_has; + std::vector functions_get; + + static Block makeResultBlock(const NamesAndTypesList & names) + { + Block block; + for (auto & nm : names) + { + ColumnWithTypeAndName column{nullptr, nm.type, nm.name}; + if (column.type->isNullable()) + column.type = typeid_cast(*column.type).getNestedType(); + block.insert(std::move(column)); + } + return block; + } +}; + +} diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 6494918c532..3add5164316 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -31,11 +31,13 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -502,25 +504,11 @@ bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, b return true; } -static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, std::shared_ptr analyzed_join, - const Context & context) +static JoinPtr tryGetStorageJoin(std::shared_ptr analyzed_join) { - const auto & table_to_join = join_element.table_expression->as(); - - /// TODO This syntax does not support specifying a database name. - if (table_to_join.database_and_table_name) - { - auto table_id = context.resolveStorageID(table_to_join.database_and_table_name); - StoragePtr table = DatabaseCatalog::instance().tryGetTable(table_id); - - if (table) - { - auto * storage_join = dynamic_cast(table.get()); - if (storage_join) - return storage_join->getJoin(analyzed_join); - } - } - + if (auto * table = analyzed_join->joined_storage.get()) + if (auto * storage_join = dynamic_cast(table)) + return storage_join->getJoin(analyzed_join); return {}; } @@ -531,10 +519,22 @@ static ExpressionActionsPtr createJoinedBlockActions(const Context & context, co return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); } -static std::shared_ptr makeJoin(std::shared_ptr analyzed_join, const Block & sample_block) +static std::shared_ptr makeJoin(std::shared_ptr analyzed_join, const Block & sample_block, + const Names & original_right_columns, const Context & context) { bool allow_merge_join = analyzed_join->allowMergeJoin(); + /// TODO: check keys + if (auto * storage = analyzed_join->joined_storage.get()) + { + if (auto * dict = dynamic_cast(storage)) + { + analyzed_join->dictionary_reader = std::make_shared( + dict->dictionaryName(), original_right_columns, sample_block.getNamesAndTypesList(), context); + return std::make_shared(analyzed_join, sample_block); + } + } + if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join)) return std::make_shared(analyzed_join, sample_block); else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join)) @@ -550,48 +550,49 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer SubqueryForSet & subquery_for_join = subqueries_for_sets[join_subquery_id]; - /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping). + /// Use StorageJoin if any. if (!subquery_for_join.join) - subquery_for_join.join = tryGetStorageJoin(join_element, syntax->analyzed_join, context); + subquery_for_join.join = tryGetStorageJoin(syntax->analyzed_join); if (!subquery_for_join.join) { /// Actions which need to be calculated on joined block. ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin()); + Names original_right_columns; if (!subquery_for_join.source) { - NamesWithAliases required_columns_with_aliases = - analyzedJoin().getRequiredColumns(joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns()); - makeSubqueryForJoin(join_element, std::move(required_columns_with_aliases), subquery_for_join); + NamesWithAliases required_columns_with_aliases = analyzedJoin().getRequiredColumns( + joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns()); + for (auto & pr : required_columns_with_aliases) + original_right_columns.push_back(pr.first); + + /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs + * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, + * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. + * - this function shows the expression JOIN _data1. + */ + auto interpreter = interpretSubquery(join_element.table_expression, context, original_right_columns, query_options); + + subquery_for_join.makeSource(interpreter, std::move(required_columns_with_aliases)); } /// TODO You do not need to set this up when JOIN is only needed on remote servers. subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside - subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block); + subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block, original_right_columns, context); + + /// Do not make subquery for join over dictionary. + if (syntax->analyzed_join->dictionary_reader) + { + JoinPtr join = subquery_for_join.join; + subqueries_for_sets.erase(join_subquery_id); + return join; + } } return subquery_for_join.join; } -void SelectQueryExpressionAnalyzer::makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, - NamesWithAliases && required_columns_with_aliases, - SubqueryForSet & subquery_for_set) const -{ - /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs - * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, - * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. - * - this function shows the expression JOIN _data1. - */ - Names original_columns; - for (auto & pr : required_columns_with_aliases) - original_columns.push_back(pr.first); - - auto interpreter = interpretSubquery(join_element.table_expression, context, original_columns, query_options); - - subquery_for_set.makeSource(interpreter, std::move(required_columns_with_aliases)); -} - bool SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) { diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 4322a897378..b7fda92e33f 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -276,8 +276,6 @@ private: SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name); JoinPtr makeTableJoin(const ASTTablesInSelectQueryElement & join_element); - void makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element, NamesWithAliases && required_columns_with_aliases, - SubqueryForSet & subquery_for_set) const; const ASTSelectQuery * getAggregatingQuery() const; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index a3432ebebba..16187f10fa1 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -4,16 +4,21 @@ #include #include +#include #include #include #include +#include #include #include #include #include #include +#include + +#include #include #include @@ -21,8 +26,6 @@ #include #include #include -#include - namespace DB { @@ -282,6 +285,39 @@ static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes return KeyGetter(key_columns, key_sizes, nullptr); } +class KeyGetterForDict +{ +public: + using Mapped = JoinStuff::MappedOne; + using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; + + KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes & key_sizes_, void *) + : key_columns(key_columns_) + , key_sizes(key_sizes_) + {} + + FindResult findKey(const DictionaryReader & reader, size_t i, const Arena &) + { + if (!read_result) + { + reader.readKeys(*key_columns[0], key_sizes[0], read_result, found, positions); + result.block = &read_result; + /// TODO: check types and correct nullability + } + + result.row_num = positions[i]; + return FindResult(&result, found[i]); + } + +private: + const ColumnRawPtrs & key_columns; + const Sizes & key_sizes; + Block read_result; + Mapped result; + ColumnVector::Container found; + std::vector positions; +}; + template struct KeyGetterForTypeImpl; @@ -351,7 +387,7 @@ size_t HashJoin::getTotalRowCount() const for (const auto & block : data->blocks) res += block.rows(); } - else + else if (data->type != Type::DICT) { joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { res += map.getTotalRowCount(data->type); }); } @@ -368,7 +404,7 @@ size_t HashJoin::getTotalByteCount() const for (const auto & block : data->blocks) res += block.bytes(); } - else + else if (data->type != Type::DICT) { joinDispatch(kind, strictness, data->maps, [&](auto, auto, auto & map) { res += map.getTotalByteCountImpl(data->type); }); res += data->pool.size(); @@ -400,7 +436,13 @@ void HashJoin::setSampleBlock(const Block & block) if (nullable_right_side) JoinCommon::convertColumnsToNullable(sample_block_with_columns_to_add); - if (strictness == ASTTableJoin::Strictness::Asof) + if (table_join->dictionary_reader) + { + data->type = Type::DICT; + std::get(data->maps).create(Type::DICT); + chooseMethod(key_columns, key_sizes); /// init key_sizes + } + else if (strictness == ASTTableJoin::Strictness::Asof) { if (kind != ASTTableJoin::Kind::Left and kind != ASTTableJoin::Kind::Inner) throw Exception("ASOF only supports LEFT and INNER as base joins", ErrorCodes::NOT_IMPLEMENTED); @@ -526,7 +568,8 @@ namespace switch (type) { case HashJoin::Type::EMPTY: break; - case HashJoin::Type::CROSS: break; /// Do nothing. We have already saved block, and it is enough. + case HashJoin::Type::CROSS: break; /// Do nothing. We have already saved block, and it is enough. + case HashJoin::Type::DICT: break; /// Noone should call it with Type::DICT. #define M(TYPE) \ case HashJoin::Type::TYPE: \ @@ -598,6 +641,8 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { if (empty()) throw Exception("Logical error: HashJoin was not initialized", ErrorCodes::LOGICAL_ERROR); + if (overDictionary()) + throw Exception("Logical error: insert into hash-map in HashJoin over dictionary", ErrorCodes::LOGICAL_ERROR); /// There's no optimization for right side const columns. Remove constness if any. Block block = materializeBlock(source_block); @@ -932,8 +977,7 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_ case HashJoin::Type::TYPE: \ return joinRightColumnsSwitchNullability>::Type>(\ - *maps_.TYPE, added_columns, null_map);\ - break; + *maps_.TYPE, added_columns, null_map); APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -942,6 +986,20 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_ } } +template +IColumn::Filter dictionaryJoinRightColumns(const DictionaryReader & reader, AddedColumns & added_columns, const ConstNullMapPtr & null_map) +{ + if constexpr (KIND == ASTTableJoin::Kind::Left && + (STRICTNESS == ASTTableJoin::Strictness::Any || + STRICTNESS == ASTTableJoin::Strictness::Semi || + STRICTNESS == ASTTableJoin::Strictness::Anti)) + { + return joinRightColumnsSwitchNullability(reader, added_columns, null_map); + } + + throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR); +} + } /// nameless @@ -1002,7 +1060,9 @@ void HashJoin::joinBlockImpl( bool has_required_right_keys = (required_right_keys.columns() != 0); added_columns.need_filter = need_filter || has_required_right_keys; - IColumn::Filter row_filter = switchJoinRightColumns(maps_, added_columns, data->type, null_map); + IColumn::Filter row_filter = overDictionary() ? + dictionaryJoinRightColumns(*table_join->dictionary_reader, added_columns, null_map) : + switchJoinRightColumns(maps_, added_columns, data->type, null_map); for (size_t i = 0; i < added_columns.size(); ++i) block.insert(added_columns.moveColumn(i)); @@ -1205,7 +1265,36 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) const Names & key_names_left = table_join->keyNamesLeft(); JoinCommon::checkTypesOfKeys(block, key_names_left, right_table_keys, key_names_right); - if (joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map) + if (overDictionary()) + { + using Kind = ASTTableJoin::Kind; + using Strictness = ASTTableJoin::Strictness; + + auto & map = std::get(data->maps); + if (kind == Kind::Left) + { + switch (strictness) + { + case Strictness::Any: + case Strictness::All: + joinBlockImpl(block, key_names_left, sample_block_with_columns_to_add, map); + break; + case Strictness::Semi: + joinBlockImpl(block, key_names_left, sample_block_with_columns_to_add, map); + break; + case Strictness::Anti: + joinBlockImpl(block, key_names_left, sample_block_with_columns_to_add, map); + break; + default: + throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR); + } + } + else if (kind == Kind::Inner && strictness == Strictness::All) + joinBlockImpl(block, key_names_left, sample_block_with_columns_to_add, map); + else + throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR); + } + else if (joinDispatch(kind, strictness, data->maps, [&](auto kind_, auto strictness_, auto & map) { joinBlockImpl(block, key_names_left, sample_block_with_columns_to_add, map); })) diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 24ad2b871c9..48e7e9e9c9a 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -27,6 +27,7 @@ namespace DB { class TableJoin; +class DictionaryReader; namespace JoinStuff { @@ -148,7 +149,8 @@ class HashJoin : public IJoin public: HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false); - bool empty() { return data->type == Type::EMPTY; } + bool empty() const { return data->type == Type::EMPTY; } + bool overDictionary() const { return data->type == Type::DICT; } /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. @@ -220,12 +222,12 @@ public: { EMPTY, CROSS, + DICT, #define M(NAME) NAME, APPLY_FOR_JOIN_VARIANTS(M) #undef M }; - /** Different data structures, that are used to perform JOIN. */ template @@ -247,6 +249,7 @@ public: { case Type::EMPTY: break; case Type::CROSS: break; + case Type::DICT: break; #define M(NAME) \ case Type::NAME: NAME = std::make_unique(); break; @@ -261,6 +264,7 @@ public: { case Type::EMPTY: return 0; case Type::CROSS: return 0; + case Type::DICT: return 0; #define M(NAME) \ case Type::NAME: return NAME ? NAME->size() : 0; @@ -277,6 +281,7 @@ public: { case Type::EMPTY: return 0; case Type::CROSS: return 0; + case Type::DICT: return 0; #define M(NAME) \ case Type::NAME: return NAME ? NAME->getBufferSizeInBytes() : 0; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 35b33874ac1..6ebe15768c7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -305,12 +305,13 @@ InterpreterSelectQuery::InterpreterSelectQuery( max_streams = settings.max_threads; ASTSelectQuery & query = getSelectQuery(); + std::shared_ptr table_join = joined_tables.makeTableJoin(query); auto analyze = [&] (bool try_move_to_prewhere = true) { syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect( query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage), - options, joined_tables.tablesWithColumns(), required_result_column_names); + options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); /// Save scalar sub queries's results in the query context if (context->hasQueryContext()) diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index cedf95bea06..c00704ced9c 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -1,18 +1,26 @@ #include +#include #include #include #include #include #include + #include #include #include +#include +#include + #include +#include #include #include #include #include #include +#include +#include namespace DB { @@ -26,6 +34,34 @@ namespace ErrorCodes namespace { +void replaceJoinedTable(const ASTSelectQuery & select_query) +{ + const ASTTablesInSelectQueryElement * join = select_query.join(); + if (!join || !join->table_expression) + return; + + /// TODO: Push down for CROSS JOIN is not OK [disabled] + const auto & table_join = join->table_join->as(); + if (table_join.kind == ASTTableJoin::Kind::Cross) + return; + + auto & table_expr = join->table_expression->as(); + if (table_expr.database_and_table_name) + { + const auto & table_id = table_expr.database_and_table_name->as(); + String expr = "(select * from " + table_id.name + ") as " + table_id.shortName(); + + // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b", + // we can't replace aliased tables. + // FIXME: long table names include database name, which we can't save within alias. + if (table_id.alias.empty() && table_id.isShort()) + { + ParserTableExpression parser; + table_expr = parseQuery(parser, expr, 0)->as(); + } + } +} + template void checkTablesWithColumns(const std::vector & tables_with_columns, const Context & context) { @@ -209,4 +245,32 @@ void JoinedTables::rewriteDistributedInAndJoins(ASTPtr & query) } } +std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & select_query) +{ + auto settings = context.getSettingsRef(); + auto table_join = std::make_shared(settings, context.getTemporaryVolume()); + + const ASTTablesInSelectQueryElement * ast_join = select_query.join(); + const auto & table_to_join = ast_join->table_expression->as(); + + /// TODO This syntax does not support specifying a database name. + if (table_to_join.database_and_table_name) + { + auto joined_table_id = context.resolveStorageID(table_to_join.database_and_table_name); + StoragePtr table = DatabaseCatalog::instance().tryGetTable(joined_table_id); + if (table) + { + if (dynamic_cast(table.get()) || + dynamic_cast(table.get())) + table_join->joined_storage = table; + } + } + + if (!table_join->joined_storage && + settings.enable_optimize_predicate_expression) + replaceJoinedTable(select_query); + + return table_join; +} + } diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 66b3c8de609..6f5750823b0 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -10,6 +10,7 @@ namespace DB class ASTSelectQuery; class Context; +class TableJoin; struct SelectQueryOptions; /// Joined tables' columns resolver. @@ -28,6 +29,7 @@ public: StoragePtr getLeftTableStorage(); bool resolveTables(); void makeFakeTable(StoragePtr storage, const Block & source_header); + std::shared_ptr makeTableJoin(const ASTSelectQuery & select_query); const std::vector & tablesWithColumns() const { return tables_with_columns; } diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index 8a9a63206ba..7016d95f6ac 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -28,8 +28,6 @@ #include #include #include -#include -#include #include #include @@ -549,34 +547,6 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele } } -void replaceJoinedTable(const ASTSelectQuery & select_query) -{ - const ASTTablesInSelectQueryElement * join = select_query.join(); - if (!join || !join->table_expression) - return; - - /// TODO: Push down for CROSS JOIN is not OK [disabled] - const auto & table_join = join->table_join->as(); - if (table_join.kind == ASTTableJoin::Kind::Cross) - return; - - auto & table_expr = join->table_expression->as(); - if (table_expr.database_and_table_name) - { - const auto & table_id = table_expr.database_and_table_name->as(); - String expr = "(select * from " + table_id.name + ") as " + table_id.shortName(); - - // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b", - // we can't replace aliased tables. - // FIXME: long table names include database name, which we can't save within alias. - if (table_id.alias.empty() && table_id.isShort()) - { - ParserTableExpression parser; - table_expr = parseQuery(parser, expr, 0)->as(); - } - } -} - std::vector getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) { /// There can not be aggregate functions inside the WHERE and PREWHERE. @@ -783,7 +753,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( SyntaxAnalyzerResult && result, const SelectQueryOptions & select_options, const std::vector & tables_with_columns, - const Names & required_result_columns) const + const Names & required_result_columns, + std::shared_ptr table_join) const { auto * select_query = query->as(); if (!select_query) @@ -795,14 +766,13 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( const auto & settings = context.getSettingsRef(); const NameSet & source_columns_set = result.source_columns_set; - result.analyzed_join = std::make_shared(settings, context.getTemporaryVolume()); + result.analyzed_join = table_join; + if (!result.analyzed_join) /// ExpressionAnalyzer expects some not empty object here + result.analyzed_join = std::make_shared(); if (remove_duplicates) renameDuplicatedColumns(select_query); - if (settings.enable_optimize_predicate_expression) - replaceJoinedTable(*select_query); - /// TODO: Remove unneeded conversion std::vector tables_with_column_names; tables_with_column_names.reserve(tables_with_columns.size()); diff --git a/src/Interpreters/SyntaxAnalyzer.h b/src/Interpreters/SyntaxAnalyzer.h index 23e8a4b79aa..08afd14b83c 100644 --- a/src/Interpreters/SyntaxAnalyzer.h +++ b/src/Interpreters/SyntaxAnalyzer.h @@ -94,7 +94,8 @@ public: SyntaxAnalyzerResult && result, const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, - const Names & required_result_columns = {}) const; + const Names & required_result_columns = {}, + std::shared_ptr table_join = {}) const; private: const Context & context; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 0b5ed82411a..4cde414e270 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ class Context; class ASTSelectQuery; struct DatabaseAndTableWithAlias; class Block; +class DictionaryReader; struct Settings; @@ -42,10 +44,10 @@ class TableJoin friend class SyntaxAnalyzer; const SizeLimits size_limits; - const size_t default_max_bytes; - const bool join_use_nulls; + const size_t default_max_bytes = 0; + const bool join_use_nulls = false; const size_t max_joined_block_rows = 0; - JoinAlgorithm join_algorithm; + JoinAlgorithm join_algorithm = JoinAlgorithm::AUTO; const bool partial_merge_join_optimizations = false; const size_t partial_merge_join_rows_in_right_blocks = 0; @@ -69,6 +71,7 @@ class TableJoin VolumePtr tmp_volume; public: + TableJoin() = default; TableJoin(const Settings &, VolumePtr tmp_volume); /// for StorageJoin @@ -84,6 +87,9 @@ public: table_join.strictness = strictness; } + StoragePtr joined_storage; + std::shared_ptr dictionary_reader; + ASTTableJoin::Kind kind() const { return table_join.kind; } ASTTableJoin::Strictness strictness() const { return table_join.strictness; } bool sameStrictnessAndKind(ASTTableJoin::Strictness, ASTTableJoin::Kind) const; diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index fd6cb1902dc..85cddda399d 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -54,6 +54,8 @@ public: return description; } + const String & dictionaryName() const { return dictionary_name; } + private: using Ptr = MultiVersion::Version; From c389fee7e785340c9861bcfd3f943dc85dfa84a6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 8 Apr 2020 22:48:01 +0300 Subject: [PATCH 122/752] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ce65aca3e0..1af86f7d5f1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -246,7 +246,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( createTableIfNotExists(); - /// We have to check granularity on other replicas. It it's fixed we + /// We have to check granularity on other replicas. If it's fixed we /// must create our new replica with fixed granularity and store this /// information in /replica/metadata. other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); From ab12ebb5cfa5914cb49ce7a49a62d1ab8755fbea Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Wed, 8 Apr 2020 22:58:27 +0300 Subject: [PATCH 123/752] add allowDictJoin() --- src/Interpreters/ExpressionAnalyzer.cpp | 38 +++++++++++++++++++------ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3add5164316..fbf2b663f3b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -519,20 +520,41 @@ static ExpressionActionsPtr createJoinedBlockActions(const Context & context, co return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); } +static bool allowDictJoin(const TableJoin & table_join, const Context & context, String & dict_name) +{ + if (!table_join.joined_storage) + return false; + + const Names & right_keys = table_join.keyNamesRight(); + if (right_keys.size() != 1) + return false; + + const String & key_name = right_keys[0]; /// TODO: compound name + + auto * dict = dynamic_cast(table_join.joined_storage.get()); + if (!dict) + return false; + + dict_name = dict->dictionaryName(); + auto dictionary = context.getExternalDictionariesLoader().getDictionary(dict_name); + if (!dictionary) + return false; + + const DictionaryStructure & structure = dictionary->getStructure(); + return structure.id && (structure.id->name == key_name); /// key is UInt64 +} + static std::shared_ptr makeJoin(std::shared_ptr analyzed_join, const Block & sample_block, const Names & original_right_columns, const Context & context) { bool allow_merge_join = analyzed_join->allowMergeJoin(); - /// TODO: check keys - if (auto * storage = analyzed_join->joined_storage.get()) + String dict_name; + if (allowDictJoin(*analyzed_join, context, dict_name)) { - if (auto * dict = dynamic_cast(storage)) - { - analyzed_join->dictionary_reader = std::make_shared( - dict->dictionaryName(), original_right_columns, sample_block.getNamesAndTypesList(), context); - return std::make_shared(analyzed_join, sample_block); - } + analyzed_join->dictionary_reader = std::make_shared( + dict_name, original_right_columns, sample_block.getNamesAndTypesList(), context); + return std::make_shared(analyzed_join, sample_block); } if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join)) From 5e336ba0630396baff2daae7635c78cd0be37180 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 8 Apr 2020 23:33:40 +0300 Subject: [PATCH 124/752] Added another test #10077 --- .../0_stateless/01114_mysql_database_engine_segfault.reference | 0 .../queries/0_stateless/01114_mysql_database_engine_segfault.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/01114_mysql_database_engine_segfault.reference create mode 100644 tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.reference b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql new file mode 100644 index 00000000000..371df4f8dee --- /dev/null +++ b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql @@ -0,0 +1 @@ +CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError 1000 } From b6f4287d2f7aad81771ee23416d2741404a494c5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 8 Apr 2020 23:42:12 +0300 Subject: [PATCH 125/752] Remove vagrant build --- docker/packager/README.md | 8 ++-- docker/packager/freebsd/Vagrantfile | 4 -- docker/packager/packager | 57 +---------------------------- 3 files changed, 6 insertions(+), 63 deletions(-) delete mode 100644 docker/packager/freebsd/Vagrantfile diff --git a/docker/packager/README.md b/docker/packager/README.md index e02a45fdaea..5d9751a0fbd 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen Usage: -Build deb package with `gcc-8` in `debug` mode: +Build deb package with `gcc-9` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=gcc-8 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=gcc-9 --build-type=debug $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_18.14.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_18.14.2+debug_amd64.deb @@ -18,11 +18,11 @@ $ ls -l deb/test_output ``` -Build ClickHouse binary with `clang-6.0` and `address` sanitizer in `relwithdebuginfo` +Build ClickHouse binary with `clang-9.0` and `address` sanitizer in `relwithdebuginfo` mode: ``` $ mkdir $HOME/some_clickhouse -$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-6.0 --sanitizer=address +$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-9.0 --sanitizer=address $ ls -l $HOME/some_clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse diff --git a/docker/packager/freebsd/Vagrantfile b/docker/packager/freebsd/Vagrantfile deleted file mode 100644 index 765f46d5604..00000000000 --- a/docker/packager/freebsd/Vagrantfile +++ /dev/null @@ -1,4 +0,0 @@ -Vagrant.configure("2") do |config| - config.vm.box = "robot-clickhouse/clickhouse-freebsd" - config.vm.synced_folder ".", "/vagrant", disabled: true -end diff --git a/docker/packager/packager b/docker/packager/packager index 10b4c7e901c..71380b92fac 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -11,48 +11,8 @@ SCRIPT_PATH = os.path.realpath(__file__) IMAGE_MAP = { "deb": "yandex/clickhouse-deb-builder", "binary": "yandex/clickhouse-binary-builder", - "freebsd": os.path.join(os.path.dirname(SCRIPT_PATH), "freebsd"), } -class Vagrant(object): - def __init__(self, path_to_vagrant_file): - self.prefix = "VAGRANT_CWD=" + path_to_vagrant_file - - def __enter__(self): - subprocess.check_call("{} vagrant up".format(self.prefix), shell=True) - self.ssh_path = "/tmp/vagrant-ssh" - subprocess.check_call("{} vagrant ssh-config > {}".format(self.prefix, self.ssh_path), shell=True) - return self - - def copy_to_image(self, local_path, remote_path): - cmd = "scp -F {ssh} -r {lpath} default:{rpath}".format(ssh=self.ssh_path, lpath=local_path, rpath=remote_path) - logging.info("Copying to image %s", cmd) - subprocess.check_call( - cmd, - shell=True - ) - - def copy_from_image(self, remote_path, local_path): - cmd = "scp -F {ssh} -r default:{rpath} {lpath}".format(ssh=self.ssh_path, rpath=remote_path, lpath=local_path) - logging.info("Copying from image %s", cmd) - subprocess.check_call( - cmd, - shell=True - ) - - def execute_cmd(self, cmd): - cmd = '{} vagrant ssh -c "{}"'.format(self.prefix, cmd) - logging.info("Executin cmd %s", cmd) - subprocess.check_call( - cmd, - shell=True - ) - - def __exit__(self, exc_type, exc_val, exc_tb): - logging.info("Destroying image") - subprocess.check_call("{} vagrant destroy --force".format(self.prefix), shell=True) - - def check_image_exists_locally(image_name): try: output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True) @@ -94,15 +54,6 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache subprocess.check_call(cmd, shell=True) -def run_vagrant_box_with_env(image_path, output_dir, ch_root): - with Vagrant(image_path) as vagrant: - logging.info("Copying folder to vagrant machine") - vagrant.copy_to_image(ch_root, "~/ClickHouse") - logging.info("Running build") - vagrant.execute_cmd("cd ~/ClickHouse && cmake . && ninja") - logging.info("Copying binary back") - vagrant.copy_from_image("~/ClickHouse/programs/clickhouse", output_dir) - def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage): CLANG_PREFIX = "clang" DARWIN_SUFFIX = "-darwin" @@ -210,7 +161,7 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image") # 'performance' creates a combined .tgz with server and configs to be used for performance test. - parser.add_argument("--package-type", choices=['deb', 'binary', 'performance', 'freebsd'], required=True) + parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True) parser.add_argument("--clickhouse-repo-path", default="../../") parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") @@ -252,9 +203,5 @@ if __name__ == "__main__": args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, args.version, args.author, args.official, args.alien_pkgs, args.with_coverage) - if image_type != "freebsd": - run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) - else: - logging.info("Running freebsd build, arguments will be ignored") - run_vagrant_box_with_env(image_name, args.output_dir, ch_root) + run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) logging.info("Output placed into {}".format(args.output_dir)) From 6d85207bfbb4ad8b1a6bdfffb3633dcc46ced64e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 8 Apr 2020 23:07:29 +0300 Subject: [PATCH 126/752] Convert blocks if structure does not match on INSERT into Distributed() Follow-up for: #10105 --- .../DistributedBlockOutputStream.cpp | 35 +++++++++++++------ ...into_distributed_different_types.reference | 1 + ...nsert_into_distributed_different_types.sql | 2 +- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index af17a026927..80b7d4c019e 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -59,6 +61,26 @@ namespace ErrorCodes extern const int CANNOT_LINK; } +static void writeBlockConvert(const Context & context, const BlockOutputStreamPtr & out, const Block & block, const size_t repeats) +{ + if (!blocksHaveEqualStructure(out->getHeader(), block)) + { + ConvertingBlockInputStream convert(context, + std::make_shared(block), + out->getHeader(), + ConvertingBlockInputStream::MatchColumnsMode::Name); + auto adopted_block = convert.read(); + + for (size_t i = 0; i < repeats; ++i) + out->write(adopted_block); + } + else + { + for (size_t i = 0; i < repeats; ++i) + out->write(block); + } +} + DistributedBlockOutputStream::DistributedBlockOutputStream( const Context & context_, StorageDistributed & storage_, const ASTPtr & query_ast_, const ClusterPtr & cluster_, @@ -306,14 +328,12 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp InterpreterInsertQuery interp(query_ast, *job.local_context); auto block_io = interp.execute(); - assertBlocksHaveEqualStructure(block_io.out->getHeader(), shard_block, "flushing shard block for " + storage.getStorageID().getNameForLogs()); + job.stream = block_io.out; job.stream->writePrefix(); } - size_t num_repetitions = shard_info.getLocalNodeCount(); - for (size_t i = 0; i < num_repetitions; ++i) - job.stream->write(shard_block); + writeBlockConvert(context, job.stream, shard_block, shard_info.getLocalNodeCount()); } job.blocks_written += 1; @@ -547,13 +567,8 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_ auto block_io = interp.execute(); - assertBlocksHaveEqualStructure(block_io.out->getHeader(), block, "flushing " + storage.getStorageID().getNameForLogs()); - block_io.out->writePrefix(); - - for (size_t i = 0; i < repeats; ++i) - block_io.out->write(block); - + writeBlockConvert(context, block_io.out, block, repeats); block_io.out->writeSuffix(); } diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference index e69de29bb2d..573541ac970 100644 --- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference +++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql index 6b23c72981a..33f16eb241c 100644 --- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql +++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql @@ -4,6 +4,6 @@ DROP TABLE IF EXISTS underlying_00967; CREATE TABLE dist_00967 (key UInt64) Engine=Distributed('test_shard_localhost', currentDatabase(), underlying_00967); -- fails for TinyLog()/MergeTree()/... but not for Memory() CREATE TABLE underlying_00967 (key Nullable(UInt64)) Engine=TinyLog(); -INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1; -- { serverError 171; } +INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1; SELECT * FROM dist_00967; From c0051d9cd960ed872b6c3b6688c97ab29bb88259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 9 Apr 2020 00:48:00 +0300 Subject: [PATCH 127/752] fixed bug with ClickHouseDictionarySource & test for all sources added --- .../ClickHouseDictionarySource.cpp | 3 + .../__init__.py | 0 .../configs/config.xml | 30 +++++++ .../dictionaries/ClickHouseSourceConfig.xml | 48 +++++++++++ .../dictionaries/ExecutableSourceConfig.xml | 45 ++++++++++ .../configs/dictionaries/FileSourceConfig.xml | 45 ++++++++++ .../configs/dictionaries/HTTPSourceConfig.xml | 54 ++++++++++++ .../configs/dictionaries/source.csv | 3 + .../configs/users.xml | 23 +++++ .../http_server.py | 86 +++++++++++++++++++ .../test_dictionary_custom_settings/test.py | 62 +++++++++++++ 11 files changed, 399 insertions(+) create mode 100644 tests/integration/test_dictionary_custom_settings/__init__.py create mode 100644 tests/integration/test_dictionary_custom_settings/configs/config.xml create mode 100644 tests/integration/test_dictionary_custom_settings/configs/dictionaries/ClickHouseSourceConfig.xml create mode 100644 tests/integration/test_dictionary_custom_settings/configs/dictionaries/ExecutableSourceConfig.xml create mode 100644 tests/integration/test_dictionary_custom_settings/configs/dictionaries/FileSourceConfig.xml create mode 100644 tests/integration/test_dictionary_custom_settings/configs/dictionaries/HTTPSourceConfig.xml create mode 100644 tests/integration/test_dictionary_custom_settings/configs/dictionaries/source.csv create mode 100644 tests/integration/test_dictionary_custom_settings/configs/users.xml create mode 100644 tests/integration/test_dictionary_custom_settings/http_server.py create mode 100644 tests/integration/test_dictionary_custom_settings/test.py diff --git a/dbms/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/Dictionaries/ClickHouseDictionarySource.cpp index 45895ae93b2..8d6c5b205e0 100644 --- a/dbms/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/Dictionaries/ClickHouseDictionarySource.cpp @@ -74,6 +74,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). context.setUser(user, password, Poco::Net::SocketAddress("127.0.0.1", 0), {}); /// Processors are not supported here yet. + context.setSettings(context_.getSettings()); context.setSetting("experimental_use_processors", false); /// Query context is needed because some code in executeQuery function may assume it exists. /// Current example is Context::getSampleBlockCache from InterpreterSelectWithUnionQuery::getSampleBlock. @@ -217,6 +218,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) bool /* check_config */) -> DictionarySourcePtr { Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); + + std::cerr << "initialization: " << context_local_copy.getSettings().max_bytes_to_read << '\n'; /// Note that processors are not supported yet (see constructor), /// hence it is not possible to override experimental_use_processors setting return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context_local_copy); diff --git a/tests/integration/test_dictionary_custom_settings/__init__.py b/tests/integration/test_dictionary_custom_settings/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_dictionary_custom_settings/configs/config.xml b/tests/integration/test_dictionary_custom_settings/configs/config.xml new file mode 100644 index 00000000000..1e4c14585a9 --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/config.xml @@ -0,0 +1,30 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + + /etc/clickhouse-server/config.d/*.xml + diff --git a/tests/integration/test_dictionary_custom_settings/configs/dictionaries/ClickHouseSourceConfig.xml b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/ClickHouseSourceConfig.xml new file mode 100644 index 00000000000..2191c8ded8a --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/ClickHouseSourceConfig.xml @@ -0,0 +1,48 @@ + + + test_clickhouse + + + + localhost + 9000 + default + + default + source
+
+ + + 1 + + + + + 600 + + + + + + + + id + + + first + String + + + + second + String + + + + third + String + + + +
+
diff --git a/tests/integration/test_dictionary_custom_settings/configs/dictionaries/ExecutableSourceConfig.xml b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/ExecutableSourceConfig.xml new file mode 100644 index 00000000000..3191118c4e9 --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/ExecutableSourceConfig.xml @@ -0,0 +1,45 @@ + + + test_executable + + + + cat /etc/clickhouse-server/config.d/source.csv + CSVWithNames + + + + 0 + 0 + + + + + 600 + + + + + + + + id + + + first + String + + + + second + String + + + + third + String + + + + + diff --git a/tests/integration/test_dictionary_custom_settings/configs/dictionaries/FileSourceConfig.xml b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/FileSourceConfig.xml new file mode 100644 index 00000000000..ff7baf29be0 --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/FileSourceConfig.xml @@ -0,0 +1,45 @@ + + + test_file + + + + /etc/clickhouse-server/config.d/source.csv + CSVWithNames + + + + 0 + 0 + + + + + 600 + + + + + + + + id + + + first + String + + + + second + String + + + + third + String + + + + + diff --git a/tests/integration/test_dictionary_custom_settings/configs/dictionaries/HTTPSourceConfig.xml b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/HTTPSourceConfig.xml new file mode 100644 index 00000000000..dc03974c4b6 --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/HTTPSourceConfig.xml @@ -0,0 +1,54 @@ + + + test_http + + + http://localhost:5555/source.csv + CSVWithNames + + foo + bar + + +
+ api-key + secret +
+
+
+ + + 0 + 0 + + + + + 600 + + + + + + + + id + + + first + String + + + + second + String + + + + third + String + + + +
+
diff --git a/tests/integration/test_dictionary_custom_settings/configs/dictionaries/source.csv b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/source.csv new file mode 100644 index 00000000000..23d113e5225 --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/dictionaries/source.csv @@ -0,0 +1,3 @@ +id,first,second,third +1,'a,"b,c +2,'d,"e,f diff --git a/tests/integration/test_dictionary_custom_settings/configs/users.xml b/tests/integration/test_dictionary_custom_settings/configs/users.xml new file mode 100644 index 00000000000..6061af8e33d --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/configs/users.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_dictionary_custom_settings/http_server.py b/tests/integration/test_dictionary_custom_settings/http_server.py new file mode 100644 index 00000000000..c7920a9024d --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/http_server.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +import argparse +from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer +import socket +import ssl +import csv + + +# Decorator used to see if authentication works for external dictionary who use a HTTP source. +def check_auth(fn): + def wrapper(req): + auth_header = req.headers.get('authorization', None) + api_key = req.headers.get('api-key', None) + if not auth_header or auth_header != 'Basic Zm9vOmJhcg==' or not api_key or api_key != 'secret': + req.send_response(401) + else: + fn(req) + return wrapper + + +def start_server(server_address, data_path, schema, cert_path, address_family): + class TSVHTTPHandler(BaseHTTPRequestHandler): + @check_auth + def do_GET(self): + self.__send_headers() + self.__send_data() + + @check_auth + def do_POST(self): + ids = self.__read_and_decode_post_ids() + print "ids=", ids + self.__send_headers() + self.__send_data(ids) + + def __send_headers(self): + self.send_response(200) + self.send_header('Content-type', 'text/csv') + self.end_headers() + + def __send_data(self, only_ids = None): + with open(data_path, 'r') as fl: + reader = csv.reader(fl, delimiter='\t') + for row in reader: + if not only_ids or (row[0] in only_ids): + self.wfile.write('\t'.join(row) + '\n') + + def __read_and_decode_post_ids(self): + data = self.__read_and_decode_post_data() + return filter(None, data.split()) + + def __read_and_decode_post_data(self): + transfer_encoding = self.headers.get("Transfer-encoding") + decoded = "" + if transfer_encoding == "chunked": + while True: + s = self.rfile.readline() + chunk_length = int(s, 16) + if not chunk_length: + break + decoded += self.rfile.read(chunk_length) + self.rfile.readline() + else: + content_length = int(self.headers.get("Content-Length", 0)) + decoded = self.rfile.read(content_length) + return decoded + + if address_family == "ipv6": + HTTPServer.address_family = socket.AF_INET6 + httpd = HTTPServer(server_address, TSVHTTPHandler) + if schema == "https": + httpd.socket = ssl.wrap_socket(httpd.socket, certfile=cert_path, server_side=True) + httpd.serve_forever() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Simple HTTP server returns data from file") + parser.add_argument("--host", default="localhost") + parser.add_argument("--port", default=5555, type=int) + parser.add_argument("--data-path", required=True) + parser.add_argument("--schema", choices=("http", "https"), required=True) + parser.add_argument("--cert-path", default="./fake_cert.pem") + parser.add_argument('--address-family', choices=("ipv4", "ipv6"), default="ipv4") + + args = parser.parse_args() + + start_server((args.host, args.port), args.data_path, args.schema, args.cert_path, args.address_family) diff --git a/tests/integration/test_dictionary_custom_settings/test.py b/tests/integration/test_dictionary_custom_settings/test.py new file mode 100644 index 00000000000..a9a7c19d4af --- /dev/null +++ b/tests/integration/test_dictionary_custom_settings/test.py @@ -0,0 +1,62 @@ +import os +import pytest + +from helpers.cluster import ClickHouseCluster + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +config_dir = os.path.join(SCRIPT_DIR, './configs') +DICTIONARY_FILES = [ + 'configs/dictionaries/FileSourceConfig.xml', + 'configs/dictionaries/ExecutableSourceConfig.xml', + 'configs/dictionaries/source.csv', + 'configs/dictionaries/HTTPSourceConfig.xml', + 'configs/dictionaries/ClickHouseSourceConfig.xml' +] + +cluster = ClickHouseCluster(__file__, base_configs_dir=config_dir) +instance = cluster.add_instance('node', main_configs=DICTIONARY_FILES, config_dir=config_dir) + +def prepare(): + node = instance + path = "/source.csv" + + script_dir = os.path.dirname(os.path.realpath(__file__)) + node.copy_file_to_container(os.path.join(script_dir, './http_server.py'), '/http_server.py') + node.copy_file_to_container(os.path.join(script_dir, 'configs/dictionaries/source.csv'), './source.csv') + node.exec_in_container([ + "bash", + "-c", + "python2 /http_server.py --data-path={tbl} --schema=http --host=localhost --port=5555".format( + tbl=path) + ], detach=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + prepare() + yield cluster + finally: + cluster.shutdown() + +def test_work(start_cluster): + query = instance.query + + assert query("SELECT dictGetString('test_file', 'first', toUInt64(1))") == "\\\'a\n" + assert query("SELECT dictGetString('test_file', 'second', toUInt64(1))") == "\"b\n" + assert query("SELECT dictGetString('test_executable', 'first', toUInt64(1))") == "\\\'a\n" + assert query("SELECT dictGetString('test_executable', 'second', toUInt64(1))") == "\"b\n" + + caught_exception = '' + try: + instance.query("CREATE TABLE source (id UInt64, first String, second String, third String) ENGINE=File(CSVWithNames);") + instance.query("INSERT INTO default.source VALUES (1, 'aaa', 'bbb', 'cccc'), (2, 'ddd', 'eee', 'fff')") + instance.query("SELECT dictGetString('test_clickhouse', 'second', toUInt64(1))") + except Exception as e: + caught_exception = str(e) + + assert caught_exception.find("Limit for result exceeded") != -1 + + assert query("SELECT dictGetString('test_http', 'first', toUInt64(1))") == "\\\'a\n" + assert query("SELECT dictGetString('test_http', 'second', toUInt64(1))") == "\"b\n" \ No newline at end of file From f48fdda6787d44ec259b3e897729dcaf7788ca7f Mon Sep 17 00:00:00 2001 From: BohuTANG <172204+BohuTANG@users.noreply.github.com> Date: Thu, 9 Apr 2020 05:52:19 +0800 Subject: [PATCH 128/752] Enhanced compatibility with native mysql-connector-java(JDBC) (#10021) * Skip the `/* comments */ SELECT @@variables ...` from mysql-connector-java setup for MySQL Handler #9336 mysql-connector setup query: /* mysql-connector-java-5.1.38 ( Revision: ${revinfo.commit} ) */SELECT @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_connection, @@character_set_results AS character_set_results, @@character_set_server AS character_set_server, @@init_connect AS init_connect, @@interactive_timeout AS interactive_timeout... ClickHouse side Error: {} executeQuery: Code: 62, e.displayText() = DB::Exception: Syntax error: failed at position 74: @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_conn. Expected one of: CAST, NULL... Client side Exception: java.sql.SQLException: Syntax error: failed at position 74: @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_conn. Expected one of: CAST... * add repalce 'SHOW VARIABLES' for mysql-connector-java-5.1.34 #9336 * Add java client(JDBC) integration test to test_mysql_protocol * shift out java tests from dbms * Update MySQLHandler.cpp * Update MySQLHandler.cpp * test_mysql_protocol: add Test.java exit code 1 when expection Co-authored-by: alexey-milovidov --- programs/server/MySQLHandler.cpp | 33 ++++++-- .../clients/java/0.reference | 15 ++++ .../clients/java/Dockerfile | 18 +++++ .../clients/java/Test.java | 76 +++++++++++++++++++ .../clients/java/docker_compose.yml | 8 ++ tests/integration/test_mysql_protocol/test.py | 22 ++++++ 6 files changed, 165 insertions(+), 7 deletions(-) create mode 100644 tests/integration/test_mysql_protocol/clients/java/0.reference create mode 100644 tests/integration/test_mysql_protocol/clients/java/Dockerfile create mode 100644 tests/integration/test_mysql_protocol/clients/java/Test.java create mode 100644 tests/integration/test_mysql_protocol/clients/java/docker_compose.yml diff --git a/programs/server/MySQLHandler.cpp b/programs/server/MySQLHandler.cpp index 3e1432dbfce..b72aa8104d3 100644 --- a/programs/server/MySQLHandler.cpp +++ b/programs/server/MySQLHandler.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #if USE_POCO_NETSSL #include @@ -268,7 +269,8 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -static bool isFederatedServerSetupCommand(const String & query); +static bool isFederatedServerSetupSetCommand(const String & query); +static bool isFederatedServerSetupSelectVarCommand(const String & query); void MySQLHandler::comQuery(ReadBuffer & payload) { @@ -276,7 +278,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload) // This is a workaround in order to support adding ClickHouse to MySQL using federated server. // As Clickhouse doesn't support these statements, we just send OK packet in response. - if (isFederatedServerSetupCommand(query)) + if (isFederatedServerSetupSetCommand(query)) { packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); } @@ -288,10 +290,11 @@ void MySQLHandler::comQuery(ReadBuffer & payload) // Translate query from MySQL to ClickHouse. // This is a temporary workaround until ClickHouse supports the syntax "@@var_name". - if (query == "select @@version_comment limit 1") // MariaDB client starts session with that query + if (isFederatedServerSetupSelectVarCommand(query)) { should_replace = true; } + // This is a workaround in order to support adding ClickHouse to MySQL using federated server. if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) { @@ -358,11 +361,27 @@ void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t #endif -static bool isFederatedServerSetupCommand(const String & query) +static bool isFederatedServerSetupSetCommand(const String & query) { - return 0 == strncasecmp("SET NAMES", query.c_str(), 9) || 0 == strncasecmp("SET character_set_results", query.c_str(), 25) - || 0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22) || 0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14) - || 0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39); + static const std::regex expr{ + "(^(SET NAMES(.*)))" + "|(^(SET character_set_results(.*)))" + "|(^(SET FOREIGN_KEY_CHECKS(.*)))" + "|(^(SET AUTOCOMMIT(.*)))" + "|(^(SET sql_mode(.*)))" + "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))" + , std::regex::icase}; + return 1 == std::regex_match(query, expr); +} + +static bool isFederatedServerSetupSelectVarCommand(const String & query) +{ + static const std::regex expr{ + "|(^(SELECT @@(.*)))" + "|(^((/\\*(.*)\\*/)([ \t]*)(SELECT([ \t]*)@@(.*))))" + "|(^((/\\*(.*)\\*/)([ \t]*)(SHOW VARIABLES(.*))))" + , std::regex::icase}; + return 1 == std::regex_match(query, expr); } const String MySQLHandler::show_table_status_replacement_query("SELECT" diff --git a/tests/integration/test_mysql_protocol/clients/java/0.reference b/tests/integration/test_mysql_protocol/clients/java/0.reference new file mode 100644 index 00000000000..bcf9e3dde94 --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/0.reference @@ -0,0 +1,15 @@ +33jdbc +44ck +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 diff --git a/tests/integration/test_mysql_protocol/clients/java/Dockerfile b/tests/integration/test_mysql_protocol/clients/java/Dockerfile new file mode 100644 index 00000000000..96713a68e66 --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y software-properties-common build-essential openjdk-8-jdk libmysql-java curl + +RUN rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* \ +RUN apt-get clean + +ARG ver=5.1.46 +RUN curl -L -o /mysql-connector-java-${ver}.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/${ver}/mysql-connector-java-${ver}.jar +ENV CLASSPATH=$CLASSPATH:/mysql-connector-java-${ver}.jar + +WORKDIR /jdbc +COPY Test.java Test.java +RUN javac Test.java diff --git a/tests/integration/test_mysql_protocol/clients/java/Test.java b/tests/integration/test_mysql_protocol/clients/java/Test.java new file mode 100644 index 00000000000..50ce824f67c --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/Test.java @@ -0,0 +1,76 @@ +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +class JavaConnectorTest { + private static final String CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS default.test1 (age Int32, name String) Engine = Memory"; + private static final String INSERT_SQL = "INSERT INTO default.test1 VALUES(33, 'jdbc'),(44, 'ck')"; + private static final String SELECT_SQL = "SELECT * FROM default.test1"; + private static final String SELECT_NUMBER_SQL = "SELECT * FROM system.numbers LIMIT 13"; + private static final String DROP_TABLE_SQL = "DROP TABLE default.test1"; + + public static void main(String[] args) { + int i = 0; + String host = "127.0.0.1"; + String port = "9004"; + String user = "default"; + String password = ""; + String database = "default"; + while (i < args.length) { + switch (args[i]) { + case "--host": + host = args[++i]; + break; + case "--port": + port = args[++i]; + break; + case "--user": + user = args[++i]; + break; + case "--password": + password = args[++i]; + break; + case "--database": + database = args[++i]; + break; + default: + i++; + break; + } + } + + String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?maxAllowedPacket=67108864&useSSL=false", host, port, database); + + Connection conn = null; + Statement stmt = null; + try { + conn = DriverManager.getConnection(jdbcUrl, user, password); + stmt = conn.createStatement(); + stmt.executeUpdate(CREATE_TABLE_SQL); + stmt.executeUpdate(INSERT_SQL); + + ResultSet rs = stmt.executeQuery(SELECT_SQL); + while (rs.next()) { + System.out.print(rs.getString("age")); + System.out.print(rs.getString("name")); + System.out.println(); + } + + stmt.executeUpdate(DROP_TABLE_SQL); + + rs = stmt.executeQuery(SELECT_NUMBER_SQL); + while (rs.next()) { + System.out.print(rs.getString(1)); + System.out.println(); + } + + stmt.close(); + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } +} diff --git a/tests/integration/test_mysql_protocol/clients/java/docker_compose.yml b/tests/integration/test_mysql_protocol/clients/java/docker_compose.yml new file mode 100644 index 00000000000..dbe404232a0 --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/docker_compose.yml @@ -0,0 +1,8 @@ +version: '2.2' +services: + java1: + build: + context: ./ + network: host + # to keep container running + command: sleep infinity diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 7987076c29a..b5ee3cecec9 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -79,6 +79,13 @@ def nodejs_container(): yield docker.from_env().containers.get(cluster.project_name + '_mysqljs1_1') +@pytest.fixture(scope='module') +def java_container(): + docker_compose = os.path.join(SCRIPT_DIR, 'clients', 'java', 'docker_compose.yml') + subprocess.check_call(['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d', '--build']) + yield docker.from_env().containers.get(cluster.project_name + '_java1_1') + + def test_mysql_client(mysql_client, server_address): # type: (Container, str) -> None code, (stdout, stderr) = mysql_client.exec_run(''' @@ -266,6 +273,21 @@ def test_mysqljs_client(server_address, nodejs_container): assert code == 1 +def test_java_client(server_address, java_container): + # type: (str, Container) -> None + with open(os.path.join(SCRIPT_DIR, 'clients', 'java', '0.reference')) as fp: + reference = fp.read() + + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_empty_password --database ' + 'abc'.format(host=server_address, port=server_port), demux=True) + assert code == 1 + + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_empty_password --database ' + 'default'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == reference + + def test_types(server_address): client = pymysql.connections.Connection(host=server_address, user='default', password='123', database='default', port=server_port) From f2fda8570848b3ea7f24e7742c97700dfbb32386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 9 Apr 2020 00:53:48 +0300 Subject: [PATCH 129/752] documentation edit --- .../dicts/external_dicts_dict_sources.md | 19 +++++++++++++++++++ .../dicts/external_dicts_dict_sources.md | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index 1756936febf..9a67d4bde9d 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -30,6 +30,25 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration The source is configured in the `source` section. +For source types +[Local file](#dicts-external_dicts_dict_sources-local_file), +[Executable file](#dicts-external_dicts_dict_sources-executable), +[HTTP(s)](#dicts-external_dicts_dict_sources-http), +[ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) +optional format settings are available: + +``` xml + + + /opt/dictionaries/os.tsv + TabSeparated + + + 0 + + +``` + Types of sources (`source_type`): - [Local file](#dicts-external_dicts_dict_sources-local_file) diff --git a/docs/ru/query_language/dicts/external_dicts_dict_sources.md b/docs/ru/query_language/dicts/external_dicts_dict_sources.md index 8b9961ee7fc..5b13a438179 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/ru/query_language/dicts/external_dicts_dict_sources.md @@ -30,6 +30,25 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration Источник настраивается в разделе `source`. +Для типов источников +[Локальный файл](#dicts-external_dicts_dict_sources-local_file), +[Исполняемый файл](#dicts-external_dicts_dict_sources-executable), +[HTTP(s)](#dicts-external_dicts_dict_sources-http), +[ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) +доступны дополнительные настройки форматирования: + +``` xml + + + /opt/dictionaries/os.tsv + TabSeparated + + + 0 + + +``` + Типы источников (`source_type`): - [Локальный файл](#dicts-external_dicts_dict_sources-local_file) From eea558b713cd4d8d0582dd08a504d5b2f32a017d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 9 Apr 2020 00:57:20 +0300 Subject: [PATCH 130/752] minor changes --- dbms/Dictionaries/DictionaryStructure.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/Dictionaries/DictionaryStructure.cpp b/dbms/Dictionaries/DictionaryStructure.cpp index fe4b1474e84..5528be7a2bb 100644 --- a/dbms/Dictionaries/DictionaryStructure.cpp +++ b/dbms/Dictionaries/DictionaryStructure.cpp @@ -356,4 +356,4 @@ std::vector DictionaryStructure::getAttributes( return res_attributes; } -} \ No newline at end of file +} From b666f60af87e2bbc86990cacbcbc23c760cce3f7 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev Date: Mon, 6 Apr 2020 22:27:57 +0300 Subject: [PATCH 131/752] Optional secured communication between ClickHouse and Zookeeper --- programs/server/config.xml | 2 +- src/Common/ZooKeeper/CMakeLists.txt | 4 ++ src/Common/ZooKeeper/ZooKeeper.cpp | 29 ++++++---- src/Common/ZooKeeper/ZooKeeper.h | 4 ++ src/Common/ZooKeeper/ZooKeeperImpl.cpp | 53 ++++++++++++++----- src/Common/ZooKeeper/ZooKeeperImpl.h | 13 +++-- .../tests/zkutil_test_commands_new_lib.cpp | 25 ++++++--- src/Common/ZooKeeper/tests/zookeeper_impl.cpp | 2 +- .../configs/config.xml | 2 +- tests/server-test.xml | 2 +- 10 files changed, 97 insertions(+), 39 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index f55ab02d903..fb2f9be6e24 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -37,7 +37,7 @@ true - + true true sslv2,sslv3 diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index aa6efcd3ca1..4dbf999419e 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -7,6 +7,10 @@ add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} $ target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common PRIVATE string_utils PUBLIC ${Poco_Util_LIBRARY}) target_include_directories(clickhouse_common_zookeeper PUBLIC ${DBMS_INCLUDE_DIR}) +if (USE_POCO_NETSSL) + target_link_libraries (clickhouse_common_zookeeper PRIVATE ${Poco_NetSSL_LIBRARY} ${Poco_Crypto_LIBRARY}) +endif() + if (ENABLE_TESTS) add_subdirectory (tests) endif () diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 99c3f115021..f2442f3f5c5 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -59,30 +59,36 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho if (implementation == "zookeeper") { if (hosts.empty()) - throw KeeperException("No addresses passed to ZooKeeper constructor.", Coordination::ZBADARGUMENTS); + throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::ZBADARGUMENTS); - std::vector addresses_strings; - splitInto<','>(addresses_strings, hosts); - Coordination::ZooKeeper::Addresses addresses; - addresses.reserve(addresses_strings.size()); + std::vector hosts_strings; + splitInto<','>(hosts_strings, hosts); + Coordination::ZooKeeper::Nodes nodes; + nodes.reserve(hosts_strings.size()); - for (const auto & address_string : addresses_strings) + for (auto & host_string : hosts_strings) { try { - addresses.emplace_back(address_string); + bool secure = bool(startsWith(host_string, "secure://")); + + if (secure) { + host_string.erase(0, strlen("secure://")); + } + + nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); } catch (const Poco::Net::DNSException & e) { - LOG_ERROR(log, "Cannot use ZooKeeper address " << address_string << ", reason: " << e.displayText()); + LOG_ERROR(log, "Cannot use ZooKeeper host " << host_string << ", reason: " << e.displayText()); } } - if (addresses.empty()) - throw KeeperException("Cannot use any of provided ZooKeeper addresses", Coordination::ZBADARGUMENTS); + if (nodes.empty()) + throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::ZBADARGUMENTS); impl = std::make_unique( - addresses, + nodes, chroot, identity_.empty() ? "" : "digest", identity_, @@ -130,6 +136,7 @@ struct ZooKeeperArgs if (startsWith(key, "node")) { hosts_strings.push_back( + (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181") ); diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 2d4d449b1a6..db166314a07 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -63,10 +63,14 @@ public: example1 2181 + + 1 example2 2181 + + 1 30000 10000 diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index b8700a93e35..2fba10b20e9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -11,6 +11,11 @@ #include #include +#include +#if USE_POCO_NETSSL +#include +#endif + #include @@ -44,6 +49,13 @@ namespace CurrentMetrics extern const Metric ZooKeeperWatch; } +namespace DB +{ + namespace ErrorCodes + { + extern const int SUPPORT_IS_DISABLED; + } +} /** ZooKeeper wire protocol. @@ -817,7 +829,7 @@ ZooKeeper::~ZooKeeper() ZooKeeper::ZooKeeper( - const Addresses & addresses, + const Nodes & nodes, const String & root_path_, const String & auth_scheme, const String & auth_data, @@ -851,7 +863,7 @@ ZooKeeper::ZooKeeper( default_acls.emplace_back(std::move(acl)); } - connect(addresses, connection_timeout); + connect(nodes, connection_timeout); if (!auth_scheme.empty()) sendAuth(auth_scheme, auth_data); @@ -864,11 +876,11 @@ ZooKeeper::ZooKeeper( void ZooKeeper::connect( - const Addresses & addresses, + const Nodes & nodes, Poco::Timespan connection_timeout) { - if (addresses.empty()) - throw Exception("No addresses passed to ZooKeeper constructor", ZBADARGUMENTS); + if (nodes.empty()) + throw Exception("No nodes passed to ZooKeeper constructor", ZBADARGUMENTS); static constexpr size_t num_tries = 3; bool connected = false; @@ -876,12 +888,25 @@ void ZooKeeper::connect( WriteBufferFromOwnString fail_reasons; for (size_t try_no = 0; try_no < num_tries; ++try_no) { - for (const auto & address : addresses) + for (const auto & node : nodes) { try { - socket = Poco::Net::StreamSocket(); /// Reset the state of previous attempt. - socket.connect(address, connection_timeout); + /// Reset the state of previous attempt. + if (node.secure) + { +#if USE_POCO_NETSSL + socket = Poco::Net::SecureStreamSocket(); +#else + throw Exception{"Communication with ZooKeeper over SSL is disabled because poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + } + else + { + socket = Poco::Net::StreamSocket(); + } + + socket.connect(node.address, connection_timeout); socket.setReceiveTimeout(operation_timeout); socket.setSendTimeout(operation_timeout); @@ -915,7 +940,7 @@ void ZooKeeper::connect( } catch (...) { - fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << address.toString(); + fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address.toString(); } } @@ -926,15 +951,19 @@ void ZooKeeper::connect( if (!connected) { WriteBufferFromOwnString message; - message << "All connection tries failed while connecting to ZooKeeper. Addresses: "; + message << "All connection tries failed while connecting to ZooKeeper. nodes: "; bool first = true; - for (const auto & address : addresses) + for (const auto & node : nodes) { if (first) first = false; else message << ", "; - message << address.toString(); + + if (node.secure) + message << "secure://"; + + message << node.address.toString(); } message << fail_reasons.str() << "\n"; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 88e949dbd45..069df723d43 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -93,17 +93,22 @@ struct ZooKeeperRequest; class ZooKeeper : public IKeeper { public: - using Addresses = std::vector; + struct Node { + Poco::Net::SocketAddress address; + bool secure; + }; + + using Nodes = std::vector; using XID = int32_t; using OpNum = int32_t; - /** Connection to addresses is performed in order. If you want, shuffle them manually. + /** Connection to nodes is performed in order. If you want, shuffle them manually. * Operation timeout couldn't be greater than session timeout. * Operation timeout applies independently for network read, network write, waiting for events and synchronization. */ ZooKeeper( - const Addresses & addresses, + const Nodes & nodes, const String & root_path, const String & auth_scheme, const String & auth_data, @@ -213,7 +218,7 @@ private: ThreadFromGlobalPool receive_thread; void connect( - const Addresses & addresses, + const Nodes & node, Poco::Timespan connection_timeout); void sendHandshake(); diff --git a/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp index aa348163adf..0bca8e0f561 100644 --- a/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -23,15 +24,23 @@ try Poco::Logger::root().setChannel(channel); Poco::Logger::root().setLevel("trace"); - std::string addresses_arg = argv[1]; - std::vector addresses_strings; - splitInto<','>(addresses_strings, addresses_arg); - ZooKeeper::Addresses addresses; - addresses.reserve(addresses_strings.size()); - for (const auto & address_string : addresses_strings) - addresses.emplace_back(address_string); + std::string hosts_arg = argv[1]; + std::vector hosts_strings; + splitInto<','>(hosts_strings, hosts_arg); + ZooKeeper::Nodes nodes; + nodes.reserve(hosts_strings.size()); + for (auto & host_string : hosts_strings) { + bool secure = bool(startsWith(host_string, "secure://")); - ZooKeeper zk(addresses, {}, {}, {}, {5, 0}, {0, 50000}, {0, 50000}); + if (secure) { + host_string.erase(0, strlen("secure://")); + } + + nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string},secure}); + } + + + ZooKeeper zk(nodes, {}, {}, {}, {5, 0}, {0, 50000}, {0, 50000}); Poco::Event event(true); diff --git a/src/Common/ZooKeeper/tests/zookeeper_impl.cpp b/src/Common/ZooKeeper/tests/zookeeper_impl.cpp index da609a7bc72..74ba63514f2 100644 --- a/src/Common/ZooKeeper/tests/zookeeper_impl.cpp +++ b/src/Common/ZooKeeper/tests/zookeeper_impl.cpp @@ -5,7 +5,7 @@ int main() try { - Coordination::ZooKeeper zookeeper({Poco::Net::SocketAddress{"localhost:2181"}}, "", "", "", {30, 0}, {0, 50000}, {0, 50000}); + Coordination::ZooKeeper zookeeper({Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{"localhost:2181"}, false}}, "", "", "", {30, 0}, {0, 50000}, {0, 50000}); zookeeper.create("/test", "hello", false, false, {}, [](const Coordination::CreateResponse & response) { diff --git a/tests/integration/test_config_corresponding_root/configs/config.xml b/tests/integration/test_config_corresponding_root/configs/config.xml index 154ebf6c35e..4e130afa84d 100644 --- a/tests/integration/test_config_corresponding_root/configs/config.xml +++ b/tests/integration/test_config_corresponding_root/configs/config.xml @@ -37,7 +37,7 @@ true - + true true sslv2,sslv3 diff --git a/tests/server-test.xml b/tests/server-test.xml index c2356ec1ba0..7f792479065 100644 --- a/tests/server-test.xml +++ b/tests/server-test.xml @@ -31,7 +31,7 @@ true - + true true sslv2,sslv3 From 540e9f7d0df12d772ca203cb36ed26bddf194f9a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 9 Apr 2020 01:58:15 +0300 Subject: [PATCH 132/752] Fix logged number of inserted rows into ReplicatedMergeTree --- .../MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index fda0a8eb5a8..72255081e6b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -147,11 +147,11 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) /// That is, do not insert the same data to the same partition twice. block_id = part->info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]); - LOG_DEBUG(log, "Wrote block with ID '" << block_id << "', " << block.rows() << " rows"); + LOG_DEBUG(log, "Wrote block with ID '" << block_id << "', " << current_block.block.rows() << " rows"); } else { - LOG_DEBUG(log, "Wrote block with " << block.rows() << " rows"); + LOG_DEBUG(log, "Wrote block with " << current_block.block.rows() << " rows"); } try From e544edd72643a804fe9fe75e723f1eadc3a6bae9 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Thu, 9 Apr 2020 09:43:02 +0800 Subject: [PATCH 133/752] Fix random scramble using seperator character issue during MySQL handshakes --- src/Core/MySQLProtocol.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/Core/MySQLProtocol.h b/src/Core/MySQLProtocol.h index 1fae57517c1..5adcf836c18 100644 --- a/src/Core/MySQLProtocol.h +++ b/src/Core/MySQLProtocol.h @@ -914,8 +914,16 @@ public: scramble.resize(SCRAMBLE_LENGTH + 1, 0); Poco::RandomInputStream generator; - for (size_t i = 0; i < SCRAMBLE_LENGTH; i++) + /** Generate a random string using ASCII characters but avoid seperator character, + * produce pseudo random numbers between with about 7 bit worth of entropty between 1-127. + * https://github.com/mysql/mysql-server/blob/8.0/mysys/crypt_genhash_impl.cc#L427 + */ + for (size_t i = 0; i < SCRAMBLE_LENGTH; i++){ generator >> scramble[i]; + scramble[i] &= 0x7f; + if (scramble[i] == '\0' || scramble[i] == '$') + scramble[i] = scramble[i] + 1; + } } String getName() override @@ -993,8 +1001,12 @@ public: scramble.resize(SCRAMBLE_LENGTH + 1, 0); Poco::RandomInputStream generator; - for (size_t i = 0; i < SCRAMBLE_LENGTH; i++) + for (size_t i = 0; i < SCRAMBLE_LENGTH; i++) { generator >> scramble[i]; + scramble[i] &= 0x7f; + if (scramble[i] == '\0' || scramble[i] == '$') + scramble[i] = scramble[i] + 1; + } } String getName() override From 5314b277aff685629bfa8ce50a62578cc49c0771 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 9 Apr 2020 05:50:37 +0300 Subject: [PATCH 134/752] Fixed test --- .../0_stateless/01114_mysql_database_engine_segfault.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql index 371df4f8dee..af88c5af53a 100644 --- a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql +++ b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql @@ -1 +1 @@ -CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError 1000 } +CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError 501 } From 17256e0f1e02111da6df9902d7c20be231cda8d9 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Thu, 9 Apr 2020 10:53:40 +0800 Subject: [PATCH 135/752] add java client integation tests --- tests/integration/test_mysql_protocol/test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index b5ee3cecec9..f75a168d5db 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -278,15 +278,29 @@ def test_java_client(server_address, java_container): with open(os.path.join(SCRIPT_DIR, 'clients', 'java', '0.reference')) as fp: reference = fp.read() + # database not exists exception. code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_empty_password --database ' 'abc'.format(host=server_address, port=server_port), demux=True) assert code == 1 + # empty password passed. code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_empty_password --database ' 'default'.format(host=server_address, port=server_port), demux=True) assert code == 0 assert stdout == reference + # non-empty password passed. + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user default --password 123 --database ' + 'default'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == reference + + # double-sha1 password passed. + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_double_sha1 --password abacaba --database ' + 'default'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == reference + def test_types(server_address): client = pymysql.connections.Connection(host=server_address, user='default', password='123', database='default', port=server_port) From eca178fd8eda9c273e49d1bb994684b1bf557b5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 9 Apr 2020 06:03:49 +0300 Subject: [PATCH 136/752] Added results from Jack Gao --- website/benchmark_hardware.html | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index ab75e7ca063..64eb576cc66 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -2427,6 +2427,57 @@ var results = [0.011, 0.007, 0.007] ] }, + + { + "system": "AMD EPYC 7702, 256 cores, 512 GiB, NVMe SSD, version 19.16", + "time": "2020-04-09 00:00:00", + "result": + [ +[0.103, 0.038, 0.043], +[0.072, 0.042, 0.044], +[0.118, 0.051, 0.057], +[0.222, 0.054, 0.051], +[0.339, 0.193, 0.215], +[0.376, 0.189, 0.175], +[0.114, 0.040, 0.052], +[0.085, 0.055, 0.049], +[0.354, 0.180, 0.168], +[0.372, 0.172, 0.161], +[0.276, 0.105, 0.100], +[0.259, 0.110, 0.115], +[0.399, 0.222, 0.207], +[0.586, 0.261, 0.262], +[0.394, 0.251, 0.228], +[0.350, 0.194, 0.189], +[0.705, 0.468, 0.462], +[0.653, 0.368, 0.381], +[1.285, 0.826, 0.922], +[0.223, 0.032, 0.036], +[1.690, 0.186, 0.178], +[1.916, 0.231, 0.189], +[3.551, 0.602, 0.595], +[3.198, 0.607, 0.478], +[0.530, 0.143, 0.138], +[0.311, 0.079, 0.090], +[0.554, 0.137, 0.134], +[1.775, 0.305, 0.293], +[1.480, 0.257, 0.276], +[0.864, 0.838, 0.795], +[0.529, 0.183, 0.177], +[1.051, 0.226, 0.230], +[1.719, 1.074, 1.075], +[2.134, 0.856, 0.873], +[2.123, 0.829, 0.846], +[0.380, 0.285, 0.280], +[0.193, 0.187, 0.183], +[0.080, 0.080, 0.080], +[0.077, 0.066, 0.068], +[0.432, 0.405, 0.444], +[0.050, 0.038, 0.037], +[0.032, 0.028, 0.025], +[0.010, 0.010, 0.008] + ] + }, ]; @@ -2862,6 +2913,7 @@ Results for Pinebook Pro are from Aleksey R. @kITerE.
Results for AMD Ryzen are from Alexey Milovidov. Firefox was running in background.
Results for Azure E32s are from Piotr Maśko.
Results for MacBook Pro are from Denis Glazachev. MacOS Catalina Version 10.15.4 (19E266). For "drop caches", the "Free Up RAM" in CleanMyMac is used.
+Results for AMD EPYC 7702 are from Peng Gao in sina.com.
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
From ccf5cb2a668499ad0fd9c275a4e63aeb02cd6d1c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 9 Apr 2020 06:24:09 +0300 Subject: [PATCH 137/752] Update MySQLProtocol.h --- src/Core/MySQLProtocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/MySQLProtocol.h b/src/Core/MySQLProtocol.h index 5adcf836c18..e73e1fddd3a 100644 --- a/src/Core/MySQLProtocol.h +++ b/src/Core/MySQLProtocol.h @@ -914,7 +914,7 @@ public: scramble.resize(SCRAMBLE_LENGTH + 1, 0); Poco::RandomInputStream generator; - /** Generate a random string using ASCII characters but avoid seperator character, + /** Generate a random string using ASCII characters but avoid separator character, * produce pseudo random numbers between with about 7 bit worth of entropty between 1-127. * https://github.com/mysql/mysql-server/blob/8.0/mysys/crypt_genhash_impl.cc#L427 */ From cb6c860d898c7e7b1c99b8f98921d51ff5146dd9 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 9 Apr 2020 06:25:20 +0300 Subject: [PATCH 138/752] Update MySQLProtocol.h --- src/Core/MySQLProtocol.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Core/MySQLProtocol.h b/src/Core/MySQLProtocol.h index e73e1fddd3a..5255c6f263e 100644 --- a/src/Core/MySQLProtocol.h +++ b/src/Core/MySQLProtocol.h @@ -918,7 +918,8 @@ public: * produce pseudo random numbers between with about 7 bit worth of entropty between 1-127. * https://github.com/mysql/mysql-server/blob/8.0/mysys/crypt_genhash_impl.cc#L427 */ - for (size_t i = 0; i < SCRAMBLE_LENGTH; i++){ + for (size_t i = 0; i < SCRAMBLE_LENGTH; ++i) + { generator >> scramble[i]; scramble[i] &= 0x7f; if (scramble[i] == '\0' || scramble[i] == '$') @@ -1001,7 +1002,8 @@ public: scramble.resize(SCRAMBLE_LENGTH + 1, 0); Poco::RandomInputStream generator; - for (size_t i = 0; i < SCRAMBLE_LENGTH; i++) { + for (size_t i = 0; i < SCRAMBLE_LENGTH; ++i) + { generator >> scramble[i]; scramble[i] &= 0x7f; if (scramble[i] == '\0' || scramble[i] == '$') From a24471233fce463ffd70bff561f7842d4d0b7bd9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 08:04:12 +0300 Subject: [PATCH 139/752] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 84af1e30a6b..e9ae2c2d2f4 100644 --- a/README.md +++ b/README.md @@ -16,5 +16,6 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events * [ClickHouse in Avito (online in Russian)](https://avitotech.timepad.ru/event/1290051/) on April 9, 2020. +* [ClickHouse Monitoring Round Table (online in English)](https://www.eventbrite.com/e/clickhouse-april-virtual-meetup-tickets-102272923066) on April 15, 2020. * [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date. * [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date. From ae6267070eb6b63d510a78e2558b7a3402a592da Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Apr 2020 09:28:13 +0300 Subject: [PATCH 140/752] Fix style. --- src/Processors/QueryPipeline.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index d20086e726f..a13547568d1 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -151,10 +151,10 @@ void QueryPipeline::init(Pipes pipes) totals.emplace_back(totals_port); } - if (auto * extremes_port_ = pipe.getExtremesPort()) + if (auto * port = pipe.getExtremesPort()) { - assertBlocksHaveEqualStructure(current_header, extremes_port_->getHeader(), "QueryPipeline"); - extremes.emplace_back(extremes_port_); + assertBlocksHaveEqualStructure(current_header, port->getHeader(), "QueryPipeline"); + extremes.emplace_back(port); } streams.addStream(&pipe.getPort(), pipe.maxParallelStreams()); From e28e5b24e967ac594240d9bd8e0584214b4fcda5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Apr 2020 09:29:38 +0300 Subject: [PATCH 141/752] Update test. --- tests/queries/0_stateless/01232_extremes.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01232_extremes.sql b/tests/queries/0_stateless/01232_extremes.sql index 80bf628d669..9379dc1cd38 100644 --- a/tests/queries/0_stateless/01232_extremes.sql +++ b/tests/queries/0_stateless/01232_extremes.sql @@ -27,6 +27,8 @@ drop table if exists shard_0.num_01232; drop table if exists shard_0.num2_01232; drop table if exists shard_1.num_01232; drop table if exists shard_1.num2_01232; +drop table if exists distr; +drop table if exists distr2; create table shard_0.num_01232 (number UInt64) engine = MergeTree order by number; create table shard_1.num_01232 (number UInt64) engine = MergeTree order by number; @@ -48,4 +50,6 @@ drop table if exists shard_0.num_01232; drop table if exists shard_0.num2_01232; drop table if exists shard_1.num_01232; drop table if exists shard_1.num2_01232; +drop table if exists distr; +drop table if exists distr2; From 36a5b57ac4eaeac7f2356d6811acd1b0d1892523 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 8 Apr 2020 02:57:14 +0300 Subject: [PATCH 142/752] Use "CREATE SETTINGS PROFILE name SETTINGS INHERIT parent" instead of "CREATE SETTINGS PROFILE name SETTINGS PROFILE parent". --- ...InterpreterShowCreateAccessEntityQuery.cpp | 3 +++ src/Parsers/ASTCreateSettingsProfileQuery.h | 2 ++ src/Parsers/ASTSettingsProfileElement.cpp | 10 +++++++++- src/Parsers/ASTSettingsProfileElement.h | 3 +++ src/Parsers/IParser.h | 2 +- .../ParserCreateSettingsProfileQuery.cpp | 2 +- .../ParserCreateSettingsProfileQuery.h | 4 ++-- src/Parsers/ParserSettingsProfileElement.cpp | 19 +++++++++++++++---- src/Parsers/ParserSettingsProfileElement.h | 8 +++++++- .../test_disk_access_storage/test.py | 4 ++-- .../integration/test_settings_profile/test.py | 11 +++++++++++ 11 files changed, 56 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index d2f435106a8..e579ade11ca 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -101,6 +102,8 @@ namespace query->settings = profile.elements.toAST(); else query->settings = profile.elements.toASTWithNames(*manager); + if (query->settings) + query->settings->setUseInheritKeyword(true); } if (!profile.to_roles.empty()) diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.h b/src/Parsers/ASTCreateSettingsProfileQuery.h index cc133397db4..eabe1ba441b 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/ASTCreateSettingsProfileQuery.h @@ -12,10 +12,12 @@ class ASTExtendedRoleSet; /** CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER SETTINGS PROFILE [IF EXISTS] name * [RENAME TO new_name] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ class ASTCreateSettingsProfileQuery : public IAST, public ASTQueryWithOnCluster { diff --git a/src/Parsers/ASTSettingsProfileElement.cpp b/src/Parsers/ASTSettingsProfileElement.cpp index b3f4032d14c..24f1aa60813 100644 --- a/src/Parsers/ASTSettingsProfileElement.cpp +++ b/src/Parsers/ASTSettingsProfileElement.cpp @@ -25,7 +25,8 @@ void ASTSettingsProfileElement::formatImpl(const FormatSettings & settings, Form { if (!parent_profile.empty()) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "PROFILE " << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_inherit_keyword ? "INHERIT" : "PROFILE") << " " + << (settings.hilite ? IAST::hilite_none : ""); formatProfileNameOrID(parent_profile, id_mode, settings); return; } @@ -85,4 +86,11 @@ void ASTSettingsProfileElements::formatImpl(const FormatSettings & settings, For } } + +void ASTSettingsProfileElements::setUseInheritKeyword(bool use_inherit_keyword_) +{ + for (auto & element : elements) + element->use_inherit_keyword = use_inherit_keyword_; +} + } diff --git a/src/Parsers/ASTSettingsProfileElement.h b/src/Parsers/ASTSettingsProfileElement.h index 0470b51cf85..ee1ee28c383 100644 --- a/src/Parsers/ASTSettingsProfileElement.h +++ b/src/Parsers/ASTSettingsProfileElement.h @@ -19,6 +19,7 @@ public: Field max_value; std::optional readonly; bool id_mode = false; /// If true then `parent_profile` keeps UUID, not a name. + bool use_inherit_keyword = false; /// If true then this element is a part of ASTCreateSettingsProfileQuery. bool empty() const { return parent_profile.empty() && name.empty(); } @@ -41,5 +42,7 @@ public: String getID(char) const override { return "SettingsProfileElements"; } ASTPtr clone() const override { return std::make_shared(*this); } void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + + void setUseInheritKeyword(bool use_inherit_keyword_); }; } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 925140bd25e..5bfbf1ed476 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -126,7 +126,7 @@ public: return parse(pos, node, expected); } - virtual ~IParser() {} + virtual ~IParser() = default; }; using ParserPtr = std::unique_ptr; diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/ParserCreateSettingsProfileQuery.cpp index 4d3ed2f6e63..5b33fed2fa0 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ParserCreateSettingsProfileQuery.cpp @@ -33,7 +33,7 @@ namespace return false; ASTPtr new_settings_ast; - if (!ParserSettingsProfileElements{}.useIDMode(id_mode).parse(pos, new_settings_ast, expected)) + if (!ParserSettingsProfileElements{}.useIDMode(id_mode).enableInheritKeyword(true).parse(pos, new_settings_ast, expected)) return false; if (!settings) diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.h b/src/Parsers/ParserCreateSettingsProfileQuery.h index 6797fc884fa..073a8ca75ae 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.h +++ b/src/Parsers/ParserCreateSettingsProfileQuery.h @@ -7,11 +7,11 @@ namespace DB { /** Parses queries like * CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name - * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] * * ALTER SETTINGS PROFILE [IF EXISTS] name * [RENAME TO new_name] - * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] */ class ParserCreateSettingsProfileQuery : public IParserBase { diff --git a/src/Parsers/ParserSettingsProfileElement.cpp b/src/Parsers/ParserSettingsProfileElement.cpp index 06fa58fde4e..31bc339f544 100644 --- a/src/Parsers/ParserSettingsProfileElement.cpp +++ b/src/Parsers/ParserSettingsProfileElement.cpp @@ -108,7 +108,8 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected Field max_value; std::optional readonly; - if (ParserKeyword{"PROFILE"}.ignore(pos, expected)) + if (ParserKeyword{"PROFILE"}.ignore(pos, expected) || + (enable_inherit_keyword && ParserKeyword{"INHERIT"}.ignore(pos, expected))) { if (!parseProfileNameOrID(pos, expected, id_mode, parent_profile)) return false; @@ -120,9 +121,15 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected return false; name = getIdentifierName(name_ast); + bool has_value_or_constraint = false; while (parseValue(pos, expected, value) || parseMinMaxValue(pos, expected, min_value, max_value) || parseReadonlyOrWritableKeyword(pos, expected, readonly)) - ; + { + has_value_or_constraint = true; + } + + if (!has_value_or_constraint) + return false; } auto result = std::make_shared(); @@ -133,6 +140,7 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected result->max_value = std::move(max_value); result->readonly = readonly; result->id_mode = id_mode; + result->use_inherit_keyword = enable_inherit_keyword; node = result; return true; } @@ -142,12 +150,15 @@ bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected { std::vector> elements; - if (!ParserKeyword{"NONE"}.ignore(pos, expected)) + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + { + } + else { do { ASTPtr ast; - if (!ParserSettingsProfileElement{}.useIDMode(id_mode).parse(pos, ast, expected)) + if (!ParserSettingsProfileElement{}.useIDMode(id_mode).enableInheritKeyword(enable_inherit_keyword).parse(pos, ast, expected)) return false; auto element = typeid_cast>(ast); elements.push_back(std::move(element)); diff --git a/src/Parsers/ParserSettingsProfileElement.h b/src/Parsers/ParserSettingsProfileElement.h index ec8e1abb5b5..309c797e645 100644 --- a/src/Parsers/ParserSettingsProfileElement.h +++ b/src/Parsers/ParserSettingsProfileElement.h @@ -12,6 +12,7 @@ class ParserSettingsProfileElement : public IParserBase { public: ParserSettingsProfileElement & useIDMode(bool enable_) { id_mode = enable_; return *this; } + ParserSettingsProfileElement & enableInheritKeyword(bool enable_) { enable_inherit_keyword = enable_; return *this; } protected: const char * getName() const override { return "SettingsProfileElement"; } @@ -19,6 +20,7 @@ protected: private: bool id_mode = false; + bool enable_inherit_keyword = false; }; @@ -26,6 +28,7 @@ class ParserSettingsProfileElements : public IParserBase { public: ParserSettingsProfileElements & useIDMode(bool enable_) { id_mode = enable_; return *this; } + ParserSettingsProfileElements & enableInheritKeyword(bool enable_) { enable_inherit_keyword = enable_; return *this; } protected: const char * getName() const override { return "SettingsProfileElements"; } @@ -33,4 +36,7 @@ protected: private: bool id_mode = false; -};} + bool enable_inherit_keyword = false; +}; + +} diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index 1f6577b9dd1..019c1073205 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -47,7 +47,7 @@ def test_create(): assert instance.query("SHOW CREATE ROLE rx") == "CREATE ROLE rx SETTINGS PROFILE s1\n" assert instance.query("SHOW GRANTS FOR rx") == "" assert instance.query("SHOW CREATE SETTINGS PROFILE s1") == "CREATE SETTINGS PROFILE s1 SETTINGS max_memory_usage = 123456789 MIN 100000000 MAX 200000000\n" - assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS PROFILE s1 TO u2\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS INHERIT s1 TO u2\n" check() instance.restart_clickhouse() # Check persistency @@ -77,7 +77,7 @@ def test_alter(): assert instance.query("SHOW GRANTS FOR rx") == "GRANT SELECT ON mydb.* TO rx WITH GRANT OPTION\n" assert instance.query("SHOW GRANTS FOR ry") == "GRANT rx TO ry WITH ADMIN OPTION\n" assert instance.query("SHOW CREATE SETTINGS PROFILE s1") == "CREATE SETTINGS PROFILE s1 SETTINGS max_memory_usage = 987654321 READONLY\n" - assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS PROFILE s1 TO u2\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS INHERIT s1 TO u2\n" check() instance.restart_clickhouse() # Check persistency diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 6866c6b3901..7ad3041b81e 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -31,22 +31,26 @@ def reset_after_test(): def test_settings_profile(): # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user instance.query("CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") instance.query("ALTER SETTINGS PROFILE xyz TO NONE") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") # Set settings and constraints via CREATE USER ... SETTINGS PROFILE instance.query("ALTER USER robin SETTINGS PROFILE xyz") + assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin SETTINGS PROFILE xyz\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") instance.query("ALTER USER robin SETTINGS NONE") + assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -57,6 +61,8 @@ def test_settings_profile_from_granted_role(): instance.query("CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000") instance.query("CREATE ROLE worker SETTINGS PROFILE xyz") instance.query("GRANT worker TO robin") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000\n" + assert instance.query("SHOW CREATE ROLE worker") == "CREATE ROLE worker SETTINGS PROFILE xyz\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") @@ -68,17 +74,20 @@ def test_settings_profile_from_granted_role(): instance.query("ALTER ROLE worker SETTINGS NONE") instance.query("GRANT worker TO robin") + assert instance.query("SHOW CREATE ROLE worker") == "CREATE ROLE worker\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") # Set settings and constraints via CREATE SETTINGS PROFILE ... TO granted role instance.query("ALTER SETTINGS PROFILE xyz TO worker") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO worker\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") instance.query("ALTER SETTINGS PROFILE xyz TO NONE") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -87,6 +96,8 @@ def test_settings_profile_from_granted_role(): def test_inheritance_of_settings_profile(): instance.query("CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000002 READONLY") instance.query("CREATE SETTINGS PROFILE alpha SETTINGS PROFILE xyz TO robin") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000002 READONLY\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE alpha") == "CREATE SETTINGS PROFILE alpha SETTINGS INHERIT xyz TO robin\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000002\n" assert "Setting max_memory_usage should not be changed" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") From c97d12a19c96f3857864c6f00a75d0c0ede2c341 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 8 Apr 2020 03:50:27 +0300 Subject: [PATCH 143/752] Enable access management by default for all integration tests. --- .../0_common_instance_users.xml} | 0 tests/integration/helpers/cluster.py | 1 + .../test_allowed_client_hosts/configs/users.xml | 13 ------------- .../configs/users.d/access_management.xml | 7 ------- tests/integration/test_authentication/test.py | 2 +- .../configs/users.d/access_management.xml | 7 ------- .../test_disk_access_storage/test.py | 2 +- .../configs/users.d/access_management.xml | 7 ------- tests/integration/test_grant_and_revoke/test.py | 2 +- ...access_management.xml => assign_myquota.xml} | 2 +- .../configs/users.d/drop_default_quota.xml | 5 +++++ tests/integration/test_quota/configs/users.xml | 17 ----------------- .../configs/users.d/access_management.xml | 7 ------- .../configs/{config.d => }/remote_servers.xml | 0 .../configs/users.d/access_management.xml | 7 ------- .../test.py | 6 +++--- .../configs/users.d/access_management.xml | 7 ------- tests/integration/test_settings_profile/test.py | 2 +- 18 files changed, 14 insertions(+), 80 deletions(-) rename tests/integration/{test_access_control_on_cluster/configs/users.d/access_management.xml => helpers/0_common_instance_users.xml} (100%) delete mode 100644 tests/integration/test_allowed_client_hosts/configs/users.xml delete mode 100644 tests/integration/test_authentication/configs/users.d/access_management.xml delete mode 100644 tests/integration/test_disk_access_storage/configs/users.d/access_management.xml delete mode 100644 tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml rename tests/integration/test_quota/configs/users.d/{access_management.xml => assign_myquota.xml} (60%) create mode 100644 tests/integration/test_quota/configs/users.d/drop_default_quota.xml delete mode 100644 tests/integration/test_quota/configs/users.xml delete mode 100644 tests/integration/test_row_policy/configs/users.d/access_management.xml rename tests/integration/test_settings_constraints_distributed/configs/{config.d => }/remote_servers.xml (100%) delete mode 100644 tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml delete mode 100644 tests/integration/test_settings_profile/configs/users.d/access_management.xml diff --git a/tests/integration/test_access_control_on_cluster/configs/users.d/access_management.xml b/tests/integration/helpers/0_common_instance_users.xml similarity index 100% rename from tests/integration/test_access_control_on_cluster/configs/users.d/access_management.xml rename to tests/integration/helpers/0_common_instance_users.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5dc93cb338a..69f8206b2c1 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -923,6 +923,7 @@ class ClickHouseInstance: # The file is named with 0_ prefix to be processed before other configuration overloads. shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir) + shutil.copy(p.join(HELPERS_DIR, '0_common_instance_users.xml'), users_d_dir) # Generate and write macros file macros = self.macros.copy() diff --git a/tests/integration/test_allowed_client_hosts/configs/users.xml b/tests/integration/test_allowed_client_hosts/configs/users.xml deleted file mode 100644 index 3142ec5355a..00000000000 --- a/tests/integration/test_allowed_client_hosts/configs/users.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - default - - - - diff --git a/tests/integration/test_authentication/configs/users.d/access_management.xml b/tests/integration/test_authentication/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_authentication/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_authentication/test.py b/tests/integration/test_authentication/test.py index b7ffd1ed35b..483b59813e5 100644 --- a/tests/integration/test_authentication/test.py +++ b/tests/integration/test_authentication/test.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir="configs") +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_disk_access_storage/configs/users.d/access_management.xml b/tests/integration/test_disk_access_storage/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_disk_access_storage/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index 019c1073205..0db0e21afef 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir='configs', stay_alive=True) +instance = cluster.add_instance('instance', stay_alive=True) @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml b/tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 25e0e9882de..6f4b0be5325 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster import re cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir="configs") +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_quota/configs/users.d/access_management.xml b/tests/integration/test_quota/configs/users.d/assign_myquota.xml similarity index 60% rename from tests/integration/test_quota/configs/users.d/access_management.xml rename to tests/integration/test_quota/configs/users.d/assign_myquota.xml index 7e799cb7b10..8b98ade8aeb 100644 --- a/tests/integration/test_quota/configs/users.d/access_management.xml +++ b/tests/integration/test_quota/configs/users.d/assign_myquota.xml @@ -1,7 +1,7 @@ - 1 + myQuota diff --git a/tests/integration/test_quota/configs/users.d/drop_default_quota.xml b/tests/integration/test_quota/configs/users.d/drop_default_quota.xml new file mode 100644 index 00000000000..5f53ecf5f49 --- /dev/null +++ b/tests/integration/test_quota/configs/users.d/drop_default_quota.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_quota/configs/users.xml b/tests/integration/test_quota/configs/users.xml deleted file mode 100644 index 4412345a731..00000000000 --- a/tests/integration/test_quota/configs/users.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - ::/0 - - default - myQuota - - - diff --git a/tests/integration/test_row_policy/configs/users.d/access_management.xml b/tests/integration/test_row_policy/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_row_policy/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml b/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml similarity index 100% rename from tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml rename to tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml diff --git a/tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml b/tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index a58c037a2fc..51999902e7d 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -8,9 +8,9 @@ from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', config_dir="configs") -node2 = cluster.add_instance('node2', config_dir="configs") -distributed = cluster.add_instance('distributed', config_dir="configs") +node1 = cluster.add_instance('node1') +node2 = cluster.add_instance('node2') +distributed = cluster.add_instance('distributed', main_configs=["configs/remote_servers.xml"]) @pytest.fixture(scope="module") diff --git a/tests/integration/test_settings_profile/configs/users.d/access_management.xml b/tests/integration/test_settings_profile/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_settings_profile/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 7ad3041b81e..8b9d023d56f 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir="configs") +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) From 23ac1ee87c87ae20152bf3593284203f01bacfdc Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 8 Apr 2020 04:35:15 +0300 Subject: [PATCH 144/752] readonly user now can execute SHOW CREATE for access entities. --- src/Access/ContextAccess.cpp | 3 ++- .../__init__.py | 0 .../configs/users.d/extra_users.xml | 13 ++++++++++ .../test_enabling_access_management/test.py | 24 +++++++++++++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_enabling_access_management/__init__.py create mode 100644 tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml create mode 100644 tests/integration/test_enabling_access_management/test.py diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 14775f7a4de..cf788a0a63e 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -408,9 +408,10 @@ boost::shared_ptr ContextAccess::calculateResultAccess(bool static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; + static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; if (readonly_) - merged_access->revoke(write_table_access | table_and_dictionary_ddl | AccessType::SYSTEM | AccessType::KILL_QUERY | AccessType::ACCESS_MANAGEMENT); + merged_access->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); if (readonly_ == 1) { diff --git a/tests/integration/test_enabling_access_management/__init__.py b/tests/integration/test_enabling_access_management/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml b/tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml new file mode 100644 index 00000000000..7d87a29a915 --- /dev/null +++ b/tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml @@ -0,0 +1,13 @@ + + + + + readonly + 1 + + + + default + + + diff --git a/tests/integration/test_enabling_access_management/test.py b/tests/integration/test_enabling_access_management/test.py new file mode 100644 index 00000000000..abb8cd6c07a --- /dev/null +++ b/tests/integration/test_enabling_access_management/test.py @@ -0,0 +1,24 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', config_dir="configs") + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_enabling_access_management(): + instance.query("CREATE USER Alex", user='default') + assert instance.query("SHOW CREATE USER Alex", user='default') == "CREATE USER Alex\n" + assert instance.query("SHOW CREATE USER Alex", user='readonly') == "CREATE USER Alex\n" + assert "Not enough privileges" in instance.query_and_get_error("SHOW CREATE USER Alex", user='xyz') + + assert "Cannot execute query in readonly mode" in instance.query_and_get_error("CREATE USER Robin", user='readonly') + assert "Not enough privileges" in instance.query_and_get_error("CREATE USER Robin", user='xyz') From d548c7e381e412c8f7d4e2d733bf92765699ac0a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 8 Apr 2020 06:09:40 +0300 Subject: [PATCH 145/752] Simplify DCL for creating quotas. --- .../InterpreterCreateQuotaQuery.cpp | 4 +- ...InterpreterShowCreateAccessEntityQuery.cpp | 2 +- src/Parsers/ASTCreateQuotaQuery.cpp | 25 +++++---- src/Parsers/ASTCreateQuotaQuery.h | 11 ++-- src/Parsers/ParserCreateQuotaQuery.cpp | 53 ++++++++----------- src/Parsers/ParserCreateQuotaQuery.h | 9 ++-- .../test_disk_access_storage/test.py | 4 +- tests/integration/test_quota/test.py | 22 ++++---- .../0_stateless/01033_quota_dcl.reference | 2 +- 9 files changed, 61 insertions(+), 71 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/InterpreterCreateQuotaQuery.cpp index 13e772965ff..80987993c96 100644 --- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuotaQuery.cpp @@ -34,7 +34,7 @@ void updateQuotaFromQueryImpl(Quota & quota, const ASTCreateQuotaQuery & query, auto duration = query_limits.duration; auto it = boost::range::find_if(quota_all_limits, [&](const Quota::Limits & x) { return x.duration == duration; }); - if (query_limits.unset_tracking) + if (query_limits.drop) { if (it != quota_all_limits.end()) quota_all_limits.erase(it); @@ -59,6 +59,8 @@ void updateQuotaFromQueryImpl(Quota & quota, const ASTCreateQuotaQuery & query, { if (query_limits.max[resource_type]) quota_limits.max[resource_type] = *query_limits.max[resource_type]; + else + quota_limits.max[resource_type] = Quota::UNLIMITED; } } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index e579ade11ca..4c2dcc19a88 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -136,7 +136,7 @@ namespace create_query_limits.duration = limits.duration; create_query_limits.randomize_interval = limits.randomize_interval; for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) - if (limits.max[resource_type]) + if (limits.max[resource_type] != Quota::UNLIMITED) create_query_limits.max[resource_type] = limits.max[resource_type]; query->all_limits.push_back(create_query_limits); } diff --git a/src/Parsers/ASTCreateQuotaQuery.cpp b/src/Parsers/ASTCreateQuotaQuery.cpp index 8fa0dbb0d31..cd064756fb6 100644 --- a/src/Parsers/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/ASTCreateQuotaQuery.cpp @@ -28,16 +28,17 @@ namespace } - void formatLimit(ResourceType resource_type, ResourceAmount max, const IAST::FormatSettings & settings) + void formatLimit(ResourceType resource_type, ResourceAmount max, bool first, const IAST::FormatSettings & settings) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << Quota::resourceTypeToKeyword(resource_type) - << (settings.hilite ? IAST::hilite_none : ""); + if (first) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX" << (settings.hilite ? IAST::hilite_none : ""); + else + settings.ostr << ","; - settings.ostr << (settings.hilite ? IAST::hilite_operator : "") << " = " << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << " " << (settings.hilite ? IAST::hilite_keyword : "") << Quota::resourceTypeToKeyword(resource_type) + << (settings.hilite ? IAST::hilite_none : "") << " "; - if (max == Quota::UNLIMITED) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ANY" << (settings.hilite ? IAST::hilite_none : ""); - else if (resource_type == Quota::EXECUTION_TIME) + if (resource_type == Quota::EXECUTION_TIME) settings.ostr << Quota::executionTimeToSeconds(max); else settings.ostr << max; @@ -59,9 +60,9 @@ namespace << interval_kind.toKeyword() << (settings.hilite ? IAST::hilite_none : ""); - if (limits.unset_tracking) + if (limits.drop) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " UNSET TRACKING" << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NO LIMITS" << (settings.hilite ? IAST::hilite_none : ""); } else { @@ -70,14 +71,12 @@ namespace { if (limits.max[resource_type]) { - if (limit_found) - settings.ostr << ","; + formatLimit(resource_type, *limits.max[resource_type], !limit_found, settings); limit_found = true; - formatLimit(resource_type, *limits.max[resource_type], settings); } } if (!limit_found) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TRACKING" << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TRACKING ONLY" << (settings.hilite ? IAST::hilite_none : ""); } } diff --git a/src/Parsers/ASTCreateQuotaQuery.h b/src/Parsers/ASTCreateQuotaQuery.h index 09ceaea9825..70f8cba6de0 100644 --- a/src/Parsers/ASTCreateQuotaQuery.h +++ b/src/Parsers/ASTCreateQuotaQuery.h @@ -13,17 +13,16 @@ class ASTExtendedRoleSet; /** CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER QUOTA [IF EXISTS] name * [RENAME TO new_name] * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING | - * UNSET TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ class ASTCreateQuotaQuery : public IAST, public ASTQueryWithOnCluster @@ -48,7 +47,7 @@ public: struct Limits { std::optional max[MAX_RESOURCE_TYPE]; - bool unset_tracking = false; + bool drop = false; std::chrono::seconds duration = std::chrono::seconds::zero(); bool randomize_interval = false; }; diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index 66e72ee4968..8bbd2127922 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -63,12 +63,22 @@ namespace }); } - bool parseLimit(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type, ResourceAmount & max) + bool parseLimit(IParserBase::Pos & pos, Expected & expected, bool first, ResourceType & resource_type, ResourceAmount & max) { return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserKeyword{"MAX"}.ignore(pos, expected)) - return false; + if (first) + { + if (!ParserKeyword{"MAX"}.ignore(pos, expected)) + return false; + } + else + { + if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) + return false; + + ParserKeyword{"MAX"}.ignore(pos, expected); + } bool resource_type_set = false; for (auto rt : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) @@ -83,9 +93,6 @@ namespace if (!resource_type_set) return false; - if (!ParserToken{TokenType::Equals}.ignore(pos, expected)) - return false; - ASTPtr max_ast; if (ParserNumber{}.parse(pos, max_ast, expected)) { @@ -95,10 +102,6 @@ namespace else max = applyVisitor(FieldVisitorConvertToNumber(), max_field); } - else if (ParserKeyword{"ANY"}.ignore(pos, expected)) - { - max = Quota::UNLIMITED; - } else return false; @@ -106,18 +109,7 @@ namespace }); } - bool parseCommaAndLimit(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type, ResourceAmount & max) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) - return false; - - return parseLimit(pos, expected, resource_type, max); - }); - } - - bool parseLimits(IParserBase::Pos & pos, Expected & expected, bool alter, ASTCreateQuotaQuery::Limits & limits) + bool parseLimits(IParserBase::Pos & pos, Expected & expected, ASTCreateQuotaQuery::Limits & limits) { return IParserBase::wrapParseImpl(pos, [&] { @@ -142,23 +134,22 @@ namespace new_limits.duration = std::chrono::seconds(static_cast(num_intervals * interval_kind.toAvgSeconds())); - if (alter && ParserKeyword{"UNSET TRACKING"}.ignore(pos, expected)) + if (ParserKeyword{"NO LIMITS"}.ignore(pos, expected)) { - new_limits.unset_tracking = true; + new_limits.drop = true; } - else if (ParserKeyword{"SET TRACKING"}.ignore(pos, expected) || ParserKeyword{"TRACKING"}.ignore(pos, expected)) + else if (ParserKeyword{"TRACKING ONLY"}.ignore(pos, expected)) { } else { - ParserKeyword{"SET"}.ignore(pos, expected); ResourceType resource_type; ResourceAmount max; - if (!parseLimit(pos, expected, resource_type, max)) + if (!parseLimit(pos, expected, true, resource_type, max)) return false; new_limits.max[resource_type] = max; - while (parseCommaAndLimit(pos, expected, resource_type, max)) + while (parseLimit(pos, expected, false, resource_type, max)) new_limits.max[resource_type] = max; } @@ -167,7 +158,7 @@ namespace }); } - bool parseAllLimits(IParserBase::Pos & pos, Expected & expected, bool alter, std::vector & all_limits) + bool parseAllLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits) { return IParserBase::wrapParseImpl(pos, [&] { @@ -175,7 +166,7 @@ namespace do { ASTCreateQuotaQuery::Limits limits; - if (!parseLimits(pos, expected, alter, limits)) + if (!parseLimits(pos, expected, limits)) { all_limits.resize(old_size); return false; @@ -257,7 +248,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!key_type && parseKeyType(pos, expected, key_type)) continue; - if (parseAllLimits(pos, expected, alter, all_limits)) + if (parseAllLimits(pos, expected, all_limits)) continue; break; diff --git a/src/Parsers/ParserCreateQuotaQuery.h b/src/Parsers/ParserCreateQuotaQuery.h index 18e6ef6f9f7..786c8292b15 100644 --- a/src/Parsers/ParserCreateQuotaQuery.h +++ b/src/Parsers/ParserCreateQuotaQuery.h @@ -9,17 +9,16 @@ namespace DB * CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER QUOTA [IF EXISTS] name * [RENAME TO new_name] * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING | - * UNSET TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} } [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ class ParserCreateQuotaQuery : public IParserBase diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index 0db0e21afef..babceee7c76 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -22,7 +22,7 @@ def create_entities(): instance.query("CREATE USER u2 IDENTIFIED BY 'qwerty' HOST LOCAL DEFAULT ROLE rx") instance.query("CREATE SETTINGS PROFILE s2 SETTINGS PROFILE s1 TO u2") instance.query("CREATE ROW POLICY p ON mydb.mytable FOR SELECT USING a<1000 TO u1, u2") - instance.query("CREATE QUOTA q FOR INTERVAL 1 HOUR SET MAX QUERIES = 100 TO ALL EXCEPT rx") + instance.query("CREATE QUOTA q FOR INTERVAL 1 HOUR MAX QUERIES 100 TO ALL EXCEPT rx") @pytest.fixture(autouse=True) @@ -41,7 +41,7 @@ def test_create(): assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS PROFILE s1\n" assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 HOST LOCAL DEFAULT ROLE rx\n" assert instance.query("SHOW CREATE ROW POLICY p ON mydb.mytable") == "CREATE ROW POLICY p ON mydb.mytable FOR SELECT USING a < 1000 TO u1, u2\n" - assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q KEYED BY \\'none\\' FOR INTERVAL 1 HOUR MAX QUERIES = 100 TO ALL EXCEPT rx\n" + assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q KEYED BY \\'none\\' FOR INTERVAL 1 HOUR MAX QUERIES 100 TO ALL EXCEPT rx\n" assert instance.query("SHOW GRANTS FOR u1") == "" assert instance.query("SHOW GRANTS FOR u2") == "GRANT rx TO u2\n" assert instance.query("SHOW CREATE ROLE rx") == "CREATE ROLE rx SETTINGS PROFILE s1\n" diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 85d2ded16c1..ae68a34a03e 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -180,7 +180,7 @@ def test_reload_users_xml_by_timer(): def test_dcl_introspection(): assert instance.query("SHOW QUOTAS") == "myQuota\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000 TO default\n" expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=0/1000 errors=0 result_rows=0 result_bytes=0 read_rows=0/1000 read_bytes=0 execution_time=0" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE CURRENT")) @@ -193,7 +193,7 @@ def test_dcl_introspection(): # Add interval. copy_quota_xml('two_intervals.xml') assert instance.query("SHOW QUOTAS") == "myQuota\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000, FOR RANDOMIZED INTERVAL 2 YEAR MAX RESULT BYTES = 30000, MAX READ BYTES = 20000, MAX EXECUTION TIME = 120 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000, FOR RANDOMIZED INTERVAL 2 YEAR MAX RESULT BYTES 30000, READ BYTES 20000, EXECUTION TIME 120 TO default\n" expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*\n"\ "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0/30000 read_rows=0 read_bytes=0/20000 execution_time=0/120" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -201,8 +201,8 @@ def test_dcl_introspection(): # Drop interval, add quota. copy_quota_xml('two_quotas.xml') assert instance.query("SHOW QUOTAS") == "myQuota\nmyQuota2\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000 TO default\n" - assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY \\'client key or user name\\' FOR RANDOMIZED INTERVAL 1 HOUR MAX RESULT ROWS = 4000, MAX RESULT BYTES = 400000, MAX READ ROWS = 4000, MAX READ BYTES = 400000, MAX EXECUTION TIME = 60, FOR INTERVAL 1 MONTH MAX EXECUTION TIME = 1800\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY \\'client key or user name\\' FOR RANDOMIZED INTERVAL 1 HOUR MAX RESULT ROWS 4000, RESULT BYTES 400000, READ ROWS 4000, READ BYTES 400000, EXECUTION TIME 60, FOR INTERVAL 1 MONTH MAX EXECUTION TIME 1800\n" expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -212,9 +212,9 @@ def test_dcl_management(): assert instance.query("SHOW QUOTAS") == "" assert instance.query("SHOW QUOTA USAGE") == "" - instance.query("CREATE QUOTA qA FOR INTERVAL 15 MONTH SET MAX QUERIES = 123 TO CURRENT_USER") + instance.query("CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER") assert instance.query("SHOW QUOTAS") == "qA\n" - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 5 QUARTER MAX QUERIES = 123 TO default\n" + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 5 QUARTER MAX QUERIES 123 TO default\n" expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0/123 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -222,14 +222,14 @@ def test_dcl_management(): expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=1/123 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) - instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES = 321, MAX ERRORS = 10, FOR INTERVAL 0.5 HOUR MAX EXECUTION TIME = 0.5") - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 30 MINUTE MAX EXECUTION TIME = 0.5, FOR INTERVAL 5 QUARTER MAX QUERIES = 321, MAX ERRORS = 10 TO default\n" + instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 321, MAX ERRORS 10, FOR INTERVAL 0.5 HOUR MAX EXECUTION TIME 0.5") + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 30 MINUTE MAX EXECUTION TIME 0.5, FOR INTERVAL 5 QUARTER MAX QUERIES 321, ERRORS 10 TO default\n" expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*/0.5\n"\ "qA key=\\\\'\\\\' interval=\[.*\] queries=1/321 errors=0/10 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) - instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH UNSET TRACKING, FOR RANDOMIZED INTERVAL 16 MONTH SET TRACKING, FOR INTERVAL 1800 SECOND UNSET TRACKING") - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING TO default\n" + instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH NO LIMITS, FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY, FOR INTERVAL 1800 SECOND NO LIMITS") + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY TO default\n" expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -238,7 +238,7 @@ def test_dcl_management(): assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) instance.query("ALTER QUOTA qA RENAME TO qB") - assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING TO default\n" + assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY TO default\n" expected_usage = "qB key=\\\\'\\\\' interval=\[.*\] queries=1 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) diff --git a/tests/queries/0_stateless/01033_quota_dcl.reference b/tests/queries/0_stateless/01033_quota_dcl.reference index 7f92f992dd5..7bd2d2923d2 100644 --- a/tests/queries/0_stateless/01033_quota_dcl.reference +++ b/tests/queries/0_stateless/01033_quota_dcl.reference @@ -1,2 +1,2 @@ default -CREATE QUOTA default KEYED BY \'user name\' FOR INTERVAL 1 HOUR TRACKING TO default, readonly +CREATE QUOTA default KEYED BY \'user name\' FOR INTERVAL 1 HOUR TRACKING ONLY TO default, readonly From d992e408d8432ba86289fe712096e1ac484086c3 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 8 Apr 2020 21:01:42 +0300 Subject: [PATCH 146/752] Disable creating row policies for insert, update, delete because those filters are not supported. --- src/Parsers/ParserCreateRowPolicyQuery.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index 8bfe54b87b2..75c21cd930a 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -83,14 +83,13 @@ namespace static constexpr char delete_op[] = "DELETE"; std::vector ops; - bool keyword_for = false; if (ParserKeyword{"FOR"}.ignore(pos, expected)) { - keyword_for = true; do { if (ParserKeyword{"SELECT"}.ignore(pos, expected)) ops.push_back(select_op); +#if 0 /// INSERT, UPDATE, DELETE are not supported yet else if (ParserKeyword{"INSERT"}.ignore(pos, expected)) ops.push_back(insert_op); else if (ParserKeyword{"UPDATE"}.ignore(pos, expected)) @@ -100,6 +99,7 @@ namespace else if (ParserKeyword{"ALL"}.ignore(pos, expected)) { } +#endif else return false; } @@ -109,9 +109,11 @@ namespace if (ops.empty()) { ops.push_back(select_op); +#if 0 /// INSERT, UPDATE, DELETE are not supported yet ops.push_back(insert_op); ops.push_back(update_op); ops.push_back(delete_op); +#endif } std::optional filter; @@ -123,14 +125,15 @@ namespace if (!parseConditionalExpression(pos, expected, filter)) return false; } +#if 0 /// INSERT, UPDATE, DELETE are not supported yet if (ParserKeyword{"WITH CHECK"}.ignore(pos, expected)) { keyword_with_check = true; if (!parseConditionalExpression(pos, expected, check)) return false; } - - if (!keyword_for && !keyword_using && !keyword_with_check) +#endif + if (!keyword_using && !keyword_with_check) return false; if (filter && !check && !alter) From 4d93577791414f62b586895644cacacd8e861ad8 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 9 Apr 2020 00:10:00 +0300 Subject: [PATCH 147/752] PREWHERE can be used now by user without row filtering. --- src/Access/EnabledRowPolicies.cpp | 28 +++--- src/Access/RowPolicyCache.cpp | 90 ++----------------- src/Parsers/makeASTForLogicalFunction.cpp | 103 ++++++++++++++++++++++ src/Parsers/makeASTForLogicalFunction.h | 19 ++++ tests/integration/test_row_policy/test.py | 3 + 5 files changed, 146 insertions(+), 97 deletions(-) create mode 100644 src/Parsers/makeASTForLogicalFunction.cpp create mode 100644 src/Parsers/makeASTForLogicalFunction.h diff --git a/src/Access/EnabledRowPolicies.cpp b/src/Access/EnabledRowPolicies.cpp index a525fb65606..56c73aaf40d 100644 --- a/src/Access/EnabledRowPolicies.cpp +++ b/src/Access/EnabledRowPolicies.cpp @@ -1,7 +1,5 @@ #include -#include -#include -#include +#include #include #include @@ -35,19 +33,17 @@ ASTPtr EnabledRowPolicies::getCondition(const String & database, const String & ASTPtr EnabledRowPolicies::getCondition(const String & database, const String & table_name, ConditionType type, const ASTPtr & extra_condition) const { - ASTPtr main_condition = getCondition(database, table_name, type); - if (!main_condition) - return extra_condition; - if (!extra_condition) - return main_condition; - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "and"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children.push_back(main_condition); - exp_list->children.push_back(extra_condition); - return function; + ASTPtr condition = getCondition(database, table_name, type); + if (condition && extra_condition) + condition = makeASTForLogicalAnd({condition, extra_condition}); + else if (!condition) + condition = extra_condition; + + bool value; + if (tryGetLiteralBool(condition.get(), value) && value) + condition = nullptr; /// The condition is always true, no need to check it. + + return condition; } diff --git a/src/Access/RowPolicyCache.cpp b/src/Access/RowPolicyCache.cpp index 9509923adbf..44f2cd160d4 100644 --- a/src/Access/RowPolicyCache.cpp +++ b/src/Access/RowPolicyCache.cpp @@ -1,97 +1,19 @@ #include #include #include -#include -#include #include #include +#include #include #include #include #include -#include -#include namespace DB { namespace { - bool tryGetLiteralBool(const IAST & ast, bool & value) - { - try - { - if (const ASTLiteral * literal = ast.as()) - { - value = !literal->value.isNull() && applyVisitor(FieldVisitorConvertToNumber(), literal->value); - return true; - } - return false; - } - catch (...) - { - return false; - } - } - - ASTPtr applyFunctionAND(ASTs arguments) - { - bool const_arguments = true; - boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool - { - bool b; - if (!tryGetLiteralBool(*argument, b)) - return false; - const_arguments &= b; - return true; - }); - - if (!const_arguments) - return std::make_shared(Field{UInt8(0)}); - if (arguments.empty()) - return std::make_shared(Field{UInt8(1)}); - if (arguments.size() == 1) - return arguments[0]; - - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "and"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children = std::move(arguments); - return function; - } - - - ASTPtr applyFunctionOR(ASTs arguments) - { - bool const_arguments = false; - boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool - { - bool b; - if (!tryGetLiteralBool(*argument, b)) - return false; - const_arguments |= b; - return true; - }); - - if (const_arguments) - return std::make_shared(Field{UInt8(1)}); - if (arguments.empty()) - return std::make_shared(Field{UInt8(0)}); - if (arguments.size() == 1) - return arguments[0]; - - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "or"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children = std::move(arguments); - return function; - } - - using ConditionType = RowPolicy::ConditionType; constexpr size_t MAX_CONDITION_TYPE = RowPolicy::MAX_CONDITION_TYPE; @@ -111,10 +33,16 @@ namespace ASTPtr getResult() && { /// Process permissive conditions. - restrictions.push_back(applyFunctionOR(std::move(permissions))); + restrictions.push_back(makeASTForLogicalOr(std::move(permissions))); /// Process restrictive conditions. - return applyFunctionAND(std::move(restrictions)); + auto condition = makeASTForLogicalAnd(std::move(restrictions)); + + bool value; + if (tryGetLiteralBool(condition.get(), value) && value) + condition = nullptr; /// The condition is always true, no need to check it. + + return condition; } private: diff --git a/src/Parsers/makeASTForLogicalFunction.cpp b/src/Parsers/makeASTForLogicalFunction.cpp new file mode 100644 index 00000000000..eaae38740aa --- /dev/null +++ b/src/Parsers/makeASTForLogicalFunction.cpp @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +ASTPtr makeASTForLogicalNot(ASTPtr argument) +{ + bool b; + if (tryGetLiteralBool(argument.get(), b)) + return std::make_shared(Field{UInt8(!b)}); + + auto function = std::make_shared(); + auto exp_list = std::make_shared(); + function->name = "not"; + function->arguments = exp_list; + function->children.push_back(exp_list); + exp_list->children.push_back(argument); + return function; +} + + +ASTPtr makeASTForLogicalAnd(ASTs && arguments) +{ + bool partial_result = true; + boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool + { + bool b; + if (!tryGetLiteralBool(argument.get(), b)) + return false; + partial_result &= b; + return true; + }); + + if (!partial_result) + return std::make_shared(Field{UInt8(0)}); + if (arguments.empty()) + return std::make_shared(Field{UInt8(1)}); + if (arguments.size() == 1) + return arguments[0]; + + auto function = std::make_shared(); + auto exp_list = std::make_shared(); + function->name = "and"; + function->arguments = exp_list; + function->children.push_back(exp_list); + exp_list->children = std::move(arguments); + return function; +} + + +ASTPtr makeASTForLogicalOr(ASTs && arguments) +{ + bool partial_result = false; + boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool + { + bool b; + if (!tryGetLiteralBool(argument.get(), b)) + return false; + partial_result |= b; + return true; + }); + + if (partial_result) + return std::make_shared(Field{UInt8(1)}); + if (arguments.empty()) + return std::make_shared(Field{UInt8(0)}); + if (arguments.size() == 1) + return arguments[0]; + + auto function = std::make_shared(); + auto exp_list = std::make_shared(); + function->name = "or"; + function->arguments = exp_list; + function->children.push_back(exp_list); + exp_list->children = std::move(arguments); + return function; +} + + +bool tryGetLiteralBool(const IAST * ast, bool & value) +{ + if (!ast) + return false; + + try + { + if (const ASTLiteral * literal = ast->as()) + { + value = !literal->value.isNull() && applyVisitor(FieldVisitorConvertToNumber(), literal->value); + return true; + } + return false; + } + catch (...) + { + return false; + } +} +} diff --git a/src/Parsers/makeASTForLogicalFunction.h b/src/Parsers/makeASTForLogicalFunction.h new file mode 100644 index 00000000000..5c1096cab6e --- /dev/null +++ b/src/Parsers/makeASTForLogicalFunction.h @@ -0,0 +1,19 @@ +#pragma once + +#include + + +namespace DB +{ +/// Makes an AST calculating NOT argument. +ASTPtr makeASTForLogicalNot(ASTPtr argument); + +/// Makes an AST calculating argument1 AND argument2 AND ... AND argumentN. +ASTPtr makeASTForLogicalAnd(ASTs && arguments); + +/// Makes an AST calculating argument1 OR argument2 OR ... OR argumentN. +ASTPtr makeASTForLogicalOr(ASTs && arguments); + +/// Tries to extract a literal bool from AST. +bool tryGetLiteralBool(const IAST * ast, bool & value); +} diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 7087e6aafae..3a5b7340528 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -113,6 +113,9 @@ def test_prewhere_not_supported(): assert expected_error in instance.query_and_get_error("SELECT * FROM mydb.filtered_table2 PREWHERE 1") assert expected_error in instance.query_and_get_error("SELECT * FROM mydb.filtered_table3 PREWHERE 1") + # However PREWHERE should still work for user without filtering. + assert instance.query("SELECT * FROM mydb.filtered_table1 PREWHERE 1", user="another") == "0\t0\n0\t1\n1\t0\n1\t1\n" + def test_single_table_name(): copy_policy_xml('tag_with_table_name.xml') From f0d3547b8f19ac0747849e28cfeb6b14b1f67896 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 9 Apr 2020 02:01:41 +0300 Subject: [PATCH 148/752] Parser can parse "ON CLUSTER" in other places of SQL commands too. --- src/Parsers/ParserCreateQuotaQuery.cpp | 22 +++++++++++------ src/Parsers/ParserCreateRoleQuery.cpp | 20 ++++++++++------ src/Parsers/ParserCreateRowPolicyQuery.cpp | 22 +++++++++++------ .../ParserCreateSettingsProfileQuery.cpp | 23 ++++++++++++------ src/Parsers/ParserCreateUserQuery.cpp | 19 +++++++++------ src/Parsers/ParserGrantQuery.cpp | 24 +++++++++++++++---- 6 files changed, 90 insertions(+), 40 deletions(-) diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index 8bbd2127922..6007d6206ec 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -190,6 +190,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -229,16 +237,10 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!parseIdentifierOrStringLiteral(pos, expected, name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::optional key_type; std::vector all_limits; + String cluster; while (true) { @@ -251,12 +253,18 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (parseAllLimits(pos, expected, all_limits)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } std::shared_ptr roles; parseToRoles(pos, expected, attach_mode, roles); + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserCreateRoleQuery.cpp b/src/Parsers/ParserCreateRoleQuery.cpp index 05143108480..2a6f2dd2c90 100644 --- a/src/Parsers/ParserCreateRoleQuery.cpp +++ b/src/Parsers/ParserCreateRoleQuery.cpp @@ -41,6 +41,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -80,15 +88,10 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parseRoleName(pos, expected, name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::shared_ptr settings; + String cluster; + while (true) { if (alter && parseRenameTo(pos, expected, new_name)) @@ -97,6 +100,9 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (parseSettings(pos, expected, attach_mode, settings)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index 75c21cd930a..b6840f0ed6a 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -203,6 +203,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -246,16 +254,10 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & || !parseDatabaseAndTableName(pos, expected, database, table_name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_policy_name; std::optional is_restrictive; std::vector> conditions; + String cluster; while (true) { @@ -268,12 +270,18 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (parseMultipleConditions(pos, expected, alter, conditions)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } std::shared_ptr roles; parseToRoles(pos, expected, attach_mode, roles); + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/ParserCreateSettingsProfileQuery.cpp index 5b33fed2fa0..83d0f0c1d91 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ParserCreateSettingsProfileQuery.cpp @@ -57,6 +57,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -96,15 +104,10 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (!parseIdentifierOrStringLiteral(pos, expected, name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::shared_ptr settings; + String cluster; + while (true) { if (alter && parseRenameTo(pos, expected, new_name)) @@ -113,12 +116,18 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (parseSettings(pos, expected, attach_mode, settings)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } std::shared_ptr to_roles; parseToRoles(pos, expected, attach_mode, to_roles); + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 3968c26d42e..28483cc76ec 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -250,6 +250,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -290,13 +298,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parseUserName(pos, expected, name, host_pattern)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::optional new_host_pattern; std::optional authentication; @@ -305,6 +306,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::optional remove_hosts; std::shared_ptr default_roles; std::shared_ptr settings; + String cluster; while (true) { @@ -320,6 +322,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!default_roles && parseDefaultRoles(pos, expected, attach_mode, default_roles)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + if (alter) { if (new_name.empty() && parseRenameTo(pos, expected, new_name, new_host_pattern)) diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index f6eecbe5dba..64dde8f6524 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -237,6 +237,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -260,11 +268,8 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); bool grant_option = false; bool admin_option = false; @@ -281,10 +286,16 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, attach, roles)) return false; + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + std::shared_ptr to_roles; if (!parseToRoles(pos, expected, kind, to_roles)) return false; + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + if (kind == Kind::GRANT) { if (ParserKeyword{"WITH GRANT OPTION"}.ignore(pos, expected)) @@ -293,6 +304,9 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) admin_option = true; } + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + if (grant_option && roles) throw Exception("GRANT OPTION should be specified for access types", ErrorCodes::SYNTAX_ERROR); if (admin_option && !elements.empty()) From ed2562b3f468ecab6b0bf49ea95ec9487a6524f2 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 9 Apr 2020 02:53:41 +0300 Subject: [PATCH 149/752] Add new words to client's suggest. --- programs/client/Suggest.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index f7141449f54..8fffbec4fab 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -67,16 +67,19 @@ void Suggest::load(const ConnectionParameters & connection_parameters, size_t su Suggest::Suggest() { /// Keywords may be not up to date with ClickHouse parser. - words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", - "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", - "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", - "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", - "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", - "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", - "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", - "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", - "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", - "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE"}; + words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", + "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", + "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", + "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", + "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", + "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", + "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", + "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", + "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", + "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", + "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", + "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "FOR", "RANDOMIZED", + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP"}; } void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) From 12336a9ece3d9b6d2073c6d6168a976bfec65b88 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 9 Apr 2020 02:57:45 +0300 Subject: [PATCH 150/752] Use "CREATE USER HOST REGEXP" instead of "CREATE USER HOST NAME REGEXP". --- src/Parsers/ASTCreateUserQuery.cpp | 2 +- src/Parsers/ASTCreateUserQuery.h | 4 ++-- src/Parsers/ParserCreateUserQuery.cpp | 2 +- src/Parsers/ParserCreateUserQuery.h | 4 ++-- .../0_stateless/01075_allowed_client_hosts.reference | 8 ++++---- tests/queries/0_stateless/01075_allowed_client_hosts.sql | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index d901ed8f5a1..c8e2a76dfa2 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -109,7 +109,7 @@ namespace { if (std::exchange(need_comma, true)) settings.ostr << ", "; - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NAME REGEXP " << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "REGEXP " << (settings.hilite ? IAST::hilite_none : ""); bool need_comma2 = false; for (const auto & host_regexp : name_regexps) { diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 5a5cc0d9550..54dc51d783b 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -13,14 +13,14 @@ class ASTSettingsProfileElements; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] - * [HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] - * [[ADD|DROP] HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 28483cc76ec..76a06a0282f 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -166,7 +166,7 @@ namespace { new_hosts.addLocalHost(); } - else if (ParserKeyword{"NAME REGEXP"}.ignore(pos, expected)) + else if (ParserKeyword{"REGEXP"}.ignore(pos, expected)) { ASTPtr ast; if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/ParserCreateUserQuery.h index 4b2af34c003..d609894a7ec 100644 --- a/src/Parsers/ParserCreateUserQuery.h +++ b/src/Parsers/ParserCreateUserQuery.h @@ -8,13 +8,13 @@ namespace DB /** Parses queries like * CREATE USER [IF NOT EXISTS | OR REPLACE] name * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] - * [HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] - * [[ADD|DROP] HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ class ParserCreateUserQuery : public IParserBase diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.reference b/tests/queries/0_stateless/01075_allowed_client_hosts.reference index 0082653059c..73f54c6027a 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.reference +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.reference @@ -8,10 +8,10 @@ CREATE USER test_user_01075 HOST LOCAL, IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765 CREATE USER test_user_01075 HOST LOCAL CREATE USER test_user_01075 HOST NONE CREATE USER test_user_01075 HOST LIKE \'@.somesite.com\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite.com\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite.com\', \'.*.anothersite.org\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite2.com\', \'.*.anothersite2.org\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite3.com\', \'.*.anothersite3.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite.com\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite.com\', \'.*.anothersite.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite2.com\', \'.*.anothersite2.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite3.com\', \'.*.anothersite3.org\' CREATE USER `test_user_01075_x@localhost` HOST LOCAL CREATE USER test_user_01075_x CREATE USER `test_user_01075_x@192.168.23.15` HOST LIKE \'192.168.23.15\' diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.sql b/tests/queries/0_stateless/01075_allowed_client_hosts.sql index e0b1c0f9905..2960a93f0f2 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.sql +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.sql @@ -30,16 +30,16 @@ SHOW CREATE USER test_user_01075; ALTER USER test_user_01075 HOST LIKE '@.somesite.com'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite\.com'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite\.com'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite\.com', '.*\.anothersite\.org'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite\.com', '.*\.anothersite\.org'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite2\.com', NAME REGEXP '.*\.anothersite2\.org'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite2\.com', REGEXP '.*\.anothersite2\.org'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite3\.com' HOST NAME REGEXP '.*\.anothersite3\.org'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite3\.com' HOST REGEXP '.*\.anothersite3\.org'; SHOW CREATE USER test_user_01075; DROP USER test_user_01075; From 3149f75430161b8feee13b6759ada45d63f151c9 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Thu, 9 Apr 2020 17:27:55 +0900 Subject: [PATCH 151/752] Replace a reference for Context with a copied bool value. --- src/Interpreters/ActionsVisitor.cpp | 4 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/Set.cpp | 7 ++++--- src/Interpreters/Set.h | 8 ++++---- src/Storages/StorageSet.cpp | 4 ++-- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index f7d64d54f27..38656c47765 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -186,7 +186,7 @@ SetPtr makeExplicitSet( else throw_unsupported_type(right_arg_type); - SetPtr set = std::make_shared(size_limits, create_ordered_set, context); + SetPtr set = std::make_shared(size_limits, create_ordered_set, context.getSettingsRef().transform_null_in); set->setHeader(block); set->insertFromBlock(block); @@ -654,7 +654,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su return subquery_for_set.set; } - SetPtr set = std::make_shared(data.set_size_limit, false, data.context); + SetPtr set = std::make_shared(data.set_size_limit, false, data.context.getSettingsRef().transform_null_in); /** The following happens for GLOBAL INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d0b44b91af7..ee3ba3c8b98 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -291,7 +291,7 @@ void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, {}, query_options); BlockIO res = interpreter_subquery->execute(); - SetPtr set = std::make_shared(settings.size_limits_for_set, true, context); + SetPtr set = std::make_shared(settings.size_limits_for_set, true, context.getSettingsRef().transform_null_in); set->setHeader(res.in->getHeader()); res.in->readPrefix(); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index e63eff37047..0504f9d9e6d 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -140,7 +140,7 @@ void Set::setHeader(const Block & header) /// We will insert to the Set only keys, where all components are not NULL. ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, context.getSettingsRef().transform_null_in); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, transform_null_in); if (fill_set_elements) { @@ -230,7 +230,7 @@ static Field extractValueFromNode(const ASTPtr & node, const IDataType & type, c throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); } -void Set::createFromAST(const DataTypes & types, ASTPtr node) +void Set::createFromAST(const DataTypes & types, ASTPtr node, const Context & context) { /// Will form a block with values from the set. @@ -350,7 +350,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const /// We will check existence in Set only for keys, where all components are not NULL. ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, context.getSettingsRef().transform_null_in); + + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, transform_null_in); executeOrdinary(key_columns, vec_res, negative, null_map); diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index da20ffc41b6..90ff6c48dec 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -30,9 +30,9 @@ public: /// (that is useful only for checking that some value is in the set and may not store the original values), /// store all set elements in explicit form. /// This is needed for subsequent use for index. - Set(const SizeLimits & limits_, bool fill_set_elements_, const Context & context_) + Set(const SizeLimits & limits_, bool fill_set_elements_, bool transform_null_in_) : log(&Logger::get("Set")), - limits(limits_), fill_set_elements(fill_set_elements_), context(context_) + limits(limits_), fill_set_elements(fill_set_elements_), transform_null_in(transform_null_in_) { } @@ -45,7 +45,7 @@ public: * 'types' - types of what are on the left hand side of IN. * 'node' - list of values: 1, 2, 3 or list of tuples: (1, 2), (3, 4), (5, 6). */ - void createFromAST(const DataTypes & types, ASTPtr node); + void createFromAST(const DataTypes & types, ASTPtr node, const Context & context); /** Create a Set from stream. * Call setHeader, then call insertFromBlock for each block. @@ -113,7 +113,7 @@ private: /// Do we need to additionally store all elements of the set in explicit form for subsequent use for index. bool fill_set_elements; - const Context & context; + bool transform_null_in; bool has_null = false; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 45e1f81b487..7d2a7ee128f 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -112,7 +112,7 @@ StorageSet::StorageSet( const ConstraintsDescription & constraints_, const Context & context_) : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_}, - set(std::make_shared(SizeLimits(), false, context_)) + set(std::make_shared(SizeLimits(), false, context_.getSettingsRef().transform_null_in)) { Block header = getSampleBlock(); header = header.sortColumns(); @@ -137,7 +137,7 @@ void StorageSet::truncate(const ASTPtr &, const Context & context, TableStructur header = header.sortColumns(); increment = 0; - set = std::make_shared(SizeLimits(), false, context); + set = std::make_shared(SizeLimits(), false, context.getSettingsRef().transform_null_in); set->setHeader(header); } From 4847914edad4a82caaba40092753fabc152a4c73 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Thu, 9 Apr 2020 13:15:54 +0300 Subject: [PATCH 152/752] CLICKHOUSEDOCS-475: Fixes. --- docs/en/operations/system_tables.md | 2 +- docs/ru/operations/system_tables.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 60d13b939fb..8905ca14569 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -153,7 +153,7 @@ Contains information about [external dictionaries](../sql_reference/dictionaries Columns: -- `database` ([String](../sql_reference/data_types/string.md)) — Database name where the dictionary is located. Only for dictionaries created by DDL query, for others is always an empty string. +- `database` ([String](../sql_reference/data_types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. - `name` ([String](../sql_reference/data_types/string.md)) — [Dictionary name](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md). - `status` ([Enum8](../sql_reference/data_types/enum.md)) — Dictionary status. Possible values: - `NOT_LOADED` — Dictionary was not loaded because it was not used. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index a70d7c97d0b..fac1e63264b 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -141,7 +141,7 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova' Столбцы: -- `database` ([String](../data_types/string.md)) — Имя базы данных, в которой находится словарь. Только для словарей, созданных с помощью DDL-запроса, для остальных — всегда пустая строка. +- `database` ([String](../data_types/string.md)) — Имя базы данных, в которой находится словарь, созданный с помощью DDL-запроса. Пустая строка для других словарей. - `name` ([String](../data_types/string.md)) — [Имя словаря](../query_language/dicts/external_dicts_dict.md). - `status` ([Enum8](../data_types/enum.md)) — Статус словаря. Возможные значения: - `NOT_LOADED` — Словарь не загружен, потому что не использовался. From 9f5a40e7004f8aaa233fea7e64c38cd2d948450b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Apr 2020 13:35:51 +0300 Subject: [PATCH 153/752] Added comment. --- src/Processors/NullSink.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/NullSink.h b/src/Processors/NullSink.h index b3c3bc1ac60..5d304a0d68e 100644 --- a/src/Processors/NullSink.h +++ b/src/Processors/NullSink.h @@ -5,6 +5,7 @@ namespace DB { +/// Sink which closes input port and reads nothing. class NullSink : public IProcessor { public: @@ -20,6 +21,7 @@ public: InputPort & getPort() { return inputs.front(); } }; +/// Sink which reads everything and do nothing with it. class EmptySink : public ISink { public: From 7cc0c99669bbec93fa7d57ed034199ba7db6c19c Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 9 Apr 2020 15:16:56 +0200 Subject: [PATCH 154/752] clickhouse-docker-util --- utils/clickhouse-docker | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 utils/clickhouse-docker diff --git a/utils/clickhouse-docker b/utils/clickhouse-docker new file mode 100755 index 00000000000..6f2d1197c0a --- /dev/null +++ b/utils/clickhouse-docker @@ -0,0 +1,57 @@ +#!/bin/bash + +if [ $# -lt 1 ] +then +cat << HELP + +clickhouse-docker -- open clickhouse-client of desired version in docker container (automatically removed after you exit bash shell). + +EXAMPLE: + - start latest version: + clickhouse-docker latest + + - start version 20.1: + clickhouse-docker 20.1 + + - list avaliable versions: + clickhouse-docker list +HELP +exit +fi + +param="$1" + +if [ "${param}" = "list" ] +then + # https://stackoverflow.com/a/39454426/1555175 + wget -q https://registry.hub.docker.com/v1/repositories/yandex/clickhouse-server/tags -O - | sed -e 's/[][]//g' -e 's/"//g' -e 's/ //g' | tr '}' '\n' | awk -F: '{print $3}' +else + docker pull yandex/clickhouse-server:${param} + tmp_dir=$(mktemp -d -t ci-XXXXXXXXXX) # older version require /nonexistent folder to exist to run clickhouse client :D + chmod 777 ${tmp_dir} + set -e + containerid=`docker run -v${tmp_dir}:/nonexistent -d yandex/clickhouse-server:${param}` + set +e + while : + do + # that trick with init-file allows to start clickhouse client inside bash shell (nice if you need exit to bash, check smth, and get back to clickhouse-client) + docker exec -it ${containerid} bash -c 'bash --init-file <(echo "clickhouse client -m")' + + printf "\n\nYou exited the session. What next?\n" + echo " [Q]uit and remove container." + echo " [R]estart clickhouse and run clickhouse-client in shell again." + echo "You can also hit Ctrl+C to exit and keep container running." + + while : + do + read -p "Quit or restart [Q/R]?" choice + case "$choice" in + q|Q|exit ) break 2;; + r|R|restart ) echo "Restarting container ..."; docker restart ${containerid} > /dev/null; break 1;; + * ) echo "I don't understand. Please type Q or R" ;; + esac + done + done + docker rm -f ${containerid} > /dev/null + rm -rf ${tmp_dir} +fi From a5ac19bf1a385473da57a4982c91f8557c1d216f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 9 Apr 2020 17:33:59 +0300 Subject: [PATCH 155/752] deleted debug output --- src/Dictionaries/ClickHouseDictionarySource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index a9aad0739b1..aa06e1b8660 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -230,7 +230,6 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) { Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); - std::cerr << "initialization: " << context_local_copy.getSettings().max_bytes_to_read << '\n'; /// Note that processors are not supported yet (see constructor), /// hence it is not possible to override experimental_use_processors setting return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context_local_copy); From 3c4ac2f04c66fab059bcd373b27bc80fe17bb4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 9 Apr 2020 17:38:01 +0300 Subject: [PATCH 156/752] minor changes --- src/Dictionaries/DictionaryStructure.cpp | 2 -- src/Dictionaries/DictionaryStructure.h | 7 ------- 2 files changed, 9 deletions(-) diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 5528be7a2bb..f8b8fbd6aab 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -20,7 +20,6 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int TYPE_MISMATCH; extern const int BAD_ARGUMENTS; - extern const int NO_ELEMENTS_IN_CONFIG; } namespace @@ -194,7 +193,6 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration } attributes = getAttributes(config, config_prefix); - if (attributes.empty()) throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS}; } diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index cd9d41f67ee..2893dea2e4f 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -102,7 +101,6 @@ struct DictionaryStructure final std::optional id; std::optional> key; std::vector attributes; - Settings settings; std::optional range_min; std::optional range_max; bool has_expressions = false; @@ -120,11 +118,6 @@ private: const std::string & config_prefix, const bool hierarchy_allowed = true, const bool allow_null_values = true); - - void getSettings( - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Settings & dict_settings); }; } From 10aad522787942f2066e1780e10f96d8b6e4678c Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Thu, 9 Apr 2020 17:38:32 +0300 Subject: [PATCH 157/752] fix crash --- src/Interpreters/JoinedTables.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index c00704ced9c..e6843a3089e 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -247,6 +247,9 @@ void JoinedTables::rewriteDistributedInAndJoins(ASTPtr & query) std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & select_query) { + if (tables_with_columns.size() < 2) + return {}; + auto settings = context.getSettingsRef(); auto table_join = std::make_shared(settings, context.getTemporaryVolume()); From 23e757bcb79e11756e72134fdf037052a0b295b5 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Thu, 9 Apr 2020 17:10:29 +0200 Subject: [PATCH 158/752] Fix link to prev changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0ea7f62b6c..d5301de8a23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -641,4 +641,4 @@ #### Security Fix * Fixed the possibility of reading directories structure in tables with `File` table engine. This fixes [#8536](https://github.com/ClickHouse/ClickHouse/issues/8536). [#8537](https://github.com/ClickHouse/ClickHouse/pull/8537) ([alexey-milovidov](https://github.com/alexey-milovidov)) -## [Changelog for 2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2019.md) +## [Changelog for 2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats_new/changelog/2019.md) From ee36750482d6ddca2a3247679293e5fb4a622b29 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Thu, 9 Apr 2020 18:36:13 +0300 Subject: [PATCH 159/752] Set storage policy explicitly in MergeTree over S3 tests. --- .../configs/config.d/storage_conf.xml | 4 ++-- tests/integration/test_merge_tree_s3/test.py | 4 +++- .../configs/config.d/storage_conf.xml | 11 ++--------- .../integration/test_replicated_merge_tree_s3/test.py | 10 ++++------ 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml index 5b292446c6b..d097675ca63 100644 --- a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml @@ -13,7 +13,7 @@
- +
s3 @@ -22,7 +22,7 @@ hdd - + diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index e12e31ebff2..4beb33604be 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -67,7 +67,9 @@ def create_table(cluster, table_name, additional_settings=None): PARTITION BY dt ORDER BY (dt, id) SETTINGS - old_parts_lifetime=0, index_granularity=512 + storage_policy = 's3', + old_parts_lifetime=0, + index_granularity=512 """.format(table_name) if additional_settings: diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml index 5b292446c6b..b32770095fc 100644 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml @@ -7,22 +7,15 @@ minio minio123 - - local - / - - +
s3
- - hdd -
-
+
diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py index a8b7cf63e38..8689e7ccf5d 100644 --- a/tests/integration/test_replicated_merge_tree_s3/test.py +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -67,6 +67,7 @@ def create_table(cluster): ) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/s3', '{instance}') PARTITION BY dt ORDER BY (dt, id) + SETTINGS storage_policy = 's3' """ for node in cluster.instances.values(): @@ -80,12 +81,9 @@ def drop_table(cluster): node.query("DROP TABLE IF EXISTS s3_test") minio = cluster.minio_client - try: - assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == 0 - finally: - # Remove extra objects to prevent tests cascade failing - for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')): - minio.remove_object(cluster.minio_bucket, obj.object_name) + # Remove extra objects to prevent tests cascade failing + for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')): + minio.remove_object(cluster.minio_bucket, obj.object_name) def test_insert_select_replicated(cluster): From 25eae6abe035bda78634f17d15c9f05348d97aad Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 18:51:36 +0300 Subject: [PATCH 160/752] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index e9ae2c2d2f4..3db5e08d2a9 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse in Avito (online in Russian)](https://avitotech.timepad.ru/event/1290051/) on April 9, 2020. * [ClickHouse Monitoring Round Table (online in English)](https://www.eventbrite.com/e/clickhouse-april-virtual-meetup-tickets-102272923066) on April 15, 2020. * [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date. * [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date. From 20d935566fc83d82a9ad1d47b00ece69b7efd01d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 9 Apr 2020 19:12:55 +0300 Subject: [PATCH 161/752] Enable access management in stateless tests #3. --- tests/users.d/access_management.xml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tests/users.d/access_management.xml diff --git a/tests/users.d/access_management.xml b/tests/users.d/access_management.xml new file mode 100644 index 00000000000..7e799cb7b10 --- /dev/null +++ b/tests/users.d/access_management.xml @@ -0,0 +1,7 @@ + + + + 1 + + + From a4e2fd24b05a0dc1e3fd661b02daed720c8eae48 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 21:06:07 +0300 Subject: [PATCH 162/752] Get rid of toc_ru.yml (#10152) --- ...sampling_query_profiler_example_result.txt | 4 - docs/ru/changelog/2017.md | 265 --- docs/ru/changelog/2018.md | 1060 --------- docs/ru/changelog/2019.md | 2071 ---------------- docs/ru/changelog/index.md | 1 - docs/ru/commercial/cloud.md | 24 +- docs/ru/commercial/index.md | 7 + docs/ru/development/architecture.md | 205 +- docs/ru/development/build.md | 79 +- docs/ru/development/build_cross_arm.md | 21 +- docs/ru/development/build_cross_osx.md | 25 +- docs/ru/development/build_osx.md | 37 +- docs/ru/development/index.md | 7 +- docs/ru/development/tests.md | 239 +- .../{ => engines}/database_engines/index.md | 2 +- .../ru/{ => engines}/database_engines/lazy.md | 0 .../{ => engines}/database_engines/mysql.md | 30 +- docs/ru/engines/index.md | 6 + .../table_engines/index.md | 56 +- .../table_engines/integrations}/hdfs.md | 6 +- .../table_engines/integrations/index.md | 5 + .../table_engines/integrations}/jdbc.md | 4 +- .../table_engines/integrations}/kafka.md | 4 +- .../table_engines/integrations}/mysql.md | 8 +- .../table_engines/integrations}/odbc.md | 10 +- .../engines/table_engines/log_family/index.md | 5 + .../table_engines/log_family}/log.md | 0 .../table_engines/log_family}/log_family.md | 2 +- .../table_engines/log_family}/stripelog.md | 2 +- .../table_engines/log_family}/tinylog.md | 0 .../mergetree_family}/aggregatingmergetree.md | 4 +- .../mergetree_family}/collapsingmergetree.md | 2 +- .../custom_partitioning_key.md | 10 +- .../mergetree_family}/graphitemergetree.md | 4 +- .../table_engines/mergetree_family/index.md | 5 + .../mergetree_family}/mergetree.md | 71 +- .../mergetree_family}/replacingmergetree.md | 2 +- .../mergetree_family}/replication.md | 8 +- .../mergetree_family}/summingmergetree.md | 8 +- .../versionedcollapsingmergetree.md | 2 +- .../table_engines/special}/buffer.md | 0 .../table_engines/special}/dictionary.md | 4 +- .../table_engines/special}/distributed.md | 10 +- .../table_engines/special}/external_data.md | 0 .../table_engines/special}/file.md | 8 +- .../engines/table_engines/special/generate.md | 59 + .../ru/engines/table_engines/special/index.md | 5 + .../table_engines/special}/join.md | 22 +- .../table_engines/special/materializedview.md | 5 + .../table_engines/special}/memory.md | 0 .../table_engines/special}/merge.md | 2 +- .../table_engines/special}/null.md | 0 .../table_engines/special}/set.md | 0 .../table_engines/special}/url.md | 4 +- .../table_engines/special}/view.md | 0 docs/ru/faq/general.md | 6 +- docs/ru/faq/index.md | 6 + docs/ru/getting_started/tutorial.md | 145 +- docs/ru/guides/apply_catboost_model.md | 6 +- docs/ru/interfaces/cli.md | 2 +- docs/ru/interfaces/formats.md | 92 +- docs/ru/interfaces/http.md | 2 +- docs/ru/interfaces/mysql.md | 2 +- docs/ru/interfaces/third-party/index.md | 5 + .../ru/interfaces/third-party/integrations.md | 4 +- docs/ru/introduction/adopters.md | 147 +- docs/ru/introduction/distinctive_features.md | 2 +- docs/ru/introduction/index.md | 6 + docs/ru/operations/access_rights.md | 2 +- docs/ru/operations/backup.md | 8 +- docs/ru/operations/configuration_files.md | 2 +- docs/ru/operations/index.md | 2 +- docs/ru/operations/monitoring.md | 4 +- .../optimizing_performance/index.md | 5 + .../sampling_query_profiler.md | 62 + .../performance/sampling_query_profiler.md | 61 - ...sampling_query_profiler_example_result.txt | 4 - docs/ru/operations/performance_test.md | 33 +- docs/ru/operations/requirements.md | 4 +- .../index.md | 2 +- .../settings.md | 100 +- .../operations/settings/query_complexity.md | 12 +- docs/ru/operations/settings/settings.md | 88 +- docs/ru/operations/settings/settings_users.md | 2 +- docs/ru/operations/system_tables.md | 170 +- docs/ru/operations/table_engines/generate.md | 58 - .../table_engines/materializedview.md | 5 - docs/ru/operations/troubleshooting.md | 2 +- .../utilities/clickhouse-benchmark.md | 154 ++ .../{utils => utilities}/clickhouse-copier.md | 0 .../{utils => utilities}/clickhouse-local.md | 2 +- .../operations/{utils => utilities}/index.md | 0 .../operations/utils/clickhouse-benchmark.md | 153 -- docs/ru/query_language/index.md | 9 - docs/ru/roadmap.md | 16 - .../aggregate_functions}/combinators.md | 6 +- .../aggregate_functions}/index.md | 0 .../parametric_functions.md | 16 +- .../aggregate_functions}/reference.md | 84 +- .../data_types}/aggregatefunction.md | 4 +- .../{ => sql_reference}/data_types/array.md | 2 +- .../{ => sql_reference}/data_types/boolean.md | 0 .../ru/{ => sql_reference}/data_types/date.md | 0 .../data_types/datetime.md | 20 +- .../data_types/datetime64.md | 12 +- .../{ => sql_reference}/data_types/decimal.md | 0 .../sql_reference/data_types/domains/index.md | 5 + .../data_types/domains/ipv4.md | 0 .../data_types/domains/ipv6.md | 0 .../data_types/domains/overview.md | 0 .../ru/{ => sql_reference}/data_types/enum.md | 2 +- .../data_types/fixedstring.md | 2 +- .../{ => sql_reference}/data_types/float.md | 2 +- .../{ => sql_reference}/data_types/index.md | 0 .../data_types/int_uint.md | 0 .../nested_data_structures/index.md | 0 .../nested_data_structures/nested.md | 0 .../data_types/nullable.md | 2 +- .../special_data_types/expression.md | 0 .../data_types/special_data_types/index.md | 0 .../data_types/special_data_types/interval.md | 8 +- .../data_types/special_data_types/nothing.md | 4 +- .../data_types/special_data_types/set.md | 0 .../{ => sql_reference}/data_types/string.md | 0 .../{ => sql_reference}/data_types/tuple.md | 4 +- .../ru/{ => sql_reference}/data_types/uuid.md | 6 +- .../external_dictionaries}/external_dicts.md | 12 +- .../external_dicts_dict.md | 2 +- .../external_dicts_dict_hierarchical.md | 2 +- .../external_dicts_dict_layout.md | 2 +- .../external_dicts_dict_lifetime.md | 0 .../external_dicts_dict_sources.md | 14 +- .../external_dicts_dict_structure.md | 6 +- .../external_dictionaries/index.md | 5 + .../dictionaries}/index.md | 4 +- .../dictionaries}/internal_dicts.md | 0 .../functions/arithmetic_functions.md | 0 .../functions/array_functions.md | 10 +- .../functions/array_join.md | 0 .../functions/bit_functions.md | 2 +- .../functions/bitmap_functions.md | 6 +- .../functions/comparison_functions.md | 0 .../functions/conditional_functions.md | 2 +- .../functions/date_time_functions.md | 4 +- .../functions/encoding_functions.md | 4 +- .../functions/ext_dict_functions.md | 24 +- .../functions/functions_for_nulls.md | 2 +- .../functions/geo.md | 34 +- .../functions/hash_functions.md | 40 +- .../functions/higher_order_functions.md | 0 .../functions/in_functions.md | 2 +- .../functions/index.md | 0 .../functions/introspection.md | 12 +- .../functions/ip_address_functions.md | 4 +- .../functions/json_functions.md | 0 .../functions/logical_functions.md | 0 .../functions/machine_learning_functions.md | 4 +- .../functions/math_functions.md | 0 .../functions/other_functions.md | 28 +- .../functions/random_functions.md | 0 .../functions/rounding_functions.md | 4 +- .../functions/splitting_merging_functions.md | 0 .../functions/string_functions.md | 16 +- .../functions/string_replace_functions.md | 0 .../functions/string_search_functions.md | 0 .../functions/type_conversion_functions.md | 28 +- .../functions/url_functions.md | 4 +- .../functions/uuid_functions.md | 6 +- .../functions/ym_dict_functions.md | 4 +- docs/ru/sql_reference/index.md | 9 + .../operators.md | 16 +- .../statements}/alter.md | 32 +- .../statements}/create.md | 26 +- docs/ru/sql_reference/statements/index.md | 5 + .../statements}/insert_into.md | 8 +- .../statements}/misc.md | 26 +- .../statements}/select.md | 70 +- .../statements}/show.md | 4 +- .../statements}/system.md | 4 +- .../syntax.md | 10 +- .../table_functions/file.md | 2 +- .../table_functions/generate.md | 0 .../table_functions/hdfs.md | 0 .../table_functions/index.md | 20 +- .../table_functions/input.md | 0 .../table_functions/jdbc.md | 0 .../table_functions/merge.md | 0 .../table_functions/mysql.md | 4 +- .../table_functions/numbers.md | 0 .../table_functions/odbc.md | 4 +- .../table_functions/remote.md | 0 .../table_functions/url.md | 0 docs/ru/whats_new/changelog/2017.md | 266 +++ docs/ru/whats_new/changelog/2018.md | 1061 +++++++++ docs/ru/whats_new/changelog/2019.md | 2072 +++++++++++++++++ docs/ru/whats_new/changelog/index.md | 650 ++++++ docs/ru/whats_new/index.md | 6 + docs/ru/whats_new/roadmap.md | 17 + docs/ru/{ => whats_new}/security_changelog.md | 0 docs/toc_ru.yml | 253 -- docs/tools/convert_toc.py | 2 +- 201 files changed, 5657 insertions(+), 5181 deletions(-) delete mode 100644 docs/ru/changelog/2017.md delete mode 100644 docs/ru/changelog/2018.md delete mode 100644 docs/ru/changelog/2019.md delete mode 120000 docs/ru/changelog/index.md create mode 100644 docs/ru/commercial/index.md rename docs/ru/{ => engines}/database_engines/index.md (78%) rename docs/ru/{ => engines}/database_engines/lazy.md (100%) rename docs/ru/{ => engines}/database_engines/mysql.md (68%) create mode 100644 docs/ru/engines/index.md rename docs/ru/{operations => engines}/table_engines/index.md (64%) rename docs/ru/{operations/table_engines => engines/table_engines/integrations}/hdfs.md (94%) create mode 100644 docs/ru/engines/table_engines/integrations/index.md rename docs/ru/{operations/table_engines => engines/table_engines/integrations}/jdbc.md (95%) rename docs/ru/{operations/table_engines => engines/table_engines/integrations}/kafka.md (95%) rename docs/ru/{operations/table_engines => engines/table_engines/integrations}/mysql.md (89%) rename docs/ru/{operations/table_engines => engines/table_engines/integrations}/odbc.md (90%) create mode 100644 docs/ru/engines/table_engines/log_family/index.md rename docs/ru/{operations/table_engines => engines/table_engines/log_family}/log.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/log_family}/log_family.md (97%) rename docs/ru/{operations/table_engines => engines/table_engines/log_family}/stripelog.md (98%) rename docs/ru/{operations/table_engines => engines/table_engines/log_family}/tinylog.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/aggregatingmergetree.md (95%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/collapsingmergetree.md (99%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/custom_partitioning_key.md (91%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/graphitemergetree.md (93%) create mode 100644 docs/ru/engines/table_engines/mergetree_family/index.md rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/mergetree.md (87%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/replacingmergetree.md (96%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/replication.md (97%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/summingmergetree.md (91%) rename docs/ru/{operations/table_engines => engines/table_engines/mergetree_family}/versionedcollapsingmergetree.md (99%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/buffer.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/dictionary.md (94%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/distributed.md (92%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/external_data.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/file.md (81%) create mode 100644 docs/ru/engines/table_engines/special/generate.md create mode 100644 docs/ru/engines/table_engines/special/index.md rename docs/ru/{operations/table_engines => engines/table_engines/special}/join.md (70%) create mode 100644 docs/ru/engines/table_engines/special/materializedview.md rename docs/ru/{operations/table_engines => engines/table_engines/special}/memory.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/merge.md (98%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/null.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/set.md (100%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/url.md (95%) rename docs/ru/{operations/table_engines => engines/table_engines/special}/view.md (100%) create mode 100644 docs/ru/faq/index.md create mode 100644 docs/ru/interfaces/third-party/index.md create mode 100644 docs/ru/introduction/index.md create mode 100644 docs/ru/operations/optimizing_performance/index.md create mode 100644 docs/ru/operations/optimizing_performance/sampling_query_profiler.md delete mode 100644 docs/ru/operations/performance/sampling_query_profiler.md rename docs/ru/operations/{server_settings => server_configuration_parameters}/index.md (93%) rename docs/ru/operations/{server_settings => server_configuration_parameters}/settings.md (84%) delete mode 100644 docs/ru/operations/table_engines/generate.md delete mode 100644 docs/ru/operations/table_engines/materializedview.md create mode 100644 docs/ru/operations/utilities/clickhouse-benchmark.md rename docs/ru/operations/{utils => utilities}/clickhouse-copier.md (100%) rename docs/ru/operations/{utils => utilities}/clickhouse-local.md (97%) rename docs/ru/operations/{utils => utilities}/index.md (100%) delete mode 100644 docs/ru/operations/utils/clickhouse-benchmark.md delete mode 100644 docs/ru/query_language/index.md delete mode 100644 docs/ru/roadmap.md rename docs/ru/{query_language/agg_functions => sql_reference/aggregate_functions}/combinators.md (96%) rename docs/ru/{query_language/agg_functions => sql_reference/aggregate_functions}/index.md (100%) rename docs/ru/{query_language/agg_functions => sql_reference/aggregate_functions}/parametric_functions.md (94%) rename docs/ru/{query_language/agg_functions => sql_reference/aggregate_functions}/reference.md (91%) rename docs/ru/{data_types/nested_data_structures => sql_reference/data_types}/aggregatefunction.md (87%) rename docs/ru/{ => sql_reference}/data_types/array.md (92%) rename docs/ru/{ => sql_reference}/data_types/boolean.md (100%) rename docs/ru/{ => sql_reference}/data_types/date.md (100%) rename docs/ru/{ => sql_reference}/data_types/datetime.md (86%) rename docs/ru/{ => sql_reference}/data_types/datetime64.md (92%) rename docs/ru/{ => sql_reference}/data_types/decimal.md (100%) create mode 100644 docs/ru/sql_reference/data_types/domains/index.md rename docs/ru/{ => sql_reference}/data_types/domains/ipv4.md (100%) rename docs/ru/{ => sql_reference}/data_types/domains/ipv6.md (100%) rename docs/ru/{ => sql_reference}/data_types/domains/overview.md (100%) rename docs/ru/{ => sql_reference}/data_types/enum.md (99%) rename docs/ru/{ => sql_reference}/data_types/fixedstring.md (89%) rename docs/ru/{ => sql_reference}/data_types/float.md (97%) rename docs/ru/{ => sql_reference}/data_types/index.md (100%) rename docs/ru/{ => sql_reference}/data_types/int_uint.md (100%) rename docs/ru/{ => sql_reference}/data_types/nested_data_structures/index.md (100%) rename docs/ru/{ => sql_reference}/data_types/nested_data_structures/nested.md (100%) rename docs/ru/{ => sql_reference}/data_types/nullable.md (83%) rename docs/ru/{ => sql_reference}/data_types/special_data_types/expression.md (100%) rename docs/ru/{ => sql_reference}/data_types/special_data_types/index.md (100%) rename docs/ru/{ => sql_reference}/data_types/special_data_types/interval.md (84%) rename docs/ru/{ => sql_reference}/data_types/special_data_types/nothing.md (63%) rename docs/ru/{ => sql_reference}/data_types/special_data_types/set.md (100%) rename docs/ru/{ => sql_reference}/data_types/string.md (100%) rename docs/ru/{ => sql_reference}/data_types/tuple.md (87%) rename docs/ru/{ => sql_reference}/data_types/uuid.md (82%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts.md (79%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts_dict.md (91%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts_dict_hierarchical.md (90%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts_dict_layout.md (99%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts_dict_lifetime.md (100%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts_dict_sources.md (96%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries/external_dictionaries}/external_dicts_dict_structure.md (90%) create mode 100644 docs/ru/sql_reference/dictionaries/external_dictionaries/index.md rename docs/ru/{query_language/dicts => sql_reference/dictionaries}/index.md (82%) rename docs/ru/{query_language/dicts => sql_reference/dictionaries}/internal_dicts.md (100%) rename docs/ru/{query_language => sql_reference}/functions/arithmetic_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/array_functions.md (97%) rename docs/ru/{query_language => sql_reference}/functions/array_join.md (100%) rename docs/ru/{query_language => sql_reference}/functions/bit_functions.md (94%) rename docs/ru/{query_language => sql_reference}/functions/bitmap_functions.md (97%) rename docs/ru/{query_language => sql_reference}/functions/comparison_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/conditional_functions.md (98%) rename docs/ru/{query_language => sql_reference}/functions/date_time_functions.md (97%) rename docs/ru/{query_language => sql_reference}/functions/encoding_functions.md (91%) rename docs/ru/{query_language => sql_reference}/functions/ext_dict_functions.md (85%) rename docs/ru/{query_language => sql_reference}/functions/functions_for_nulls.md (97%) rename docs/ru/{query_language => sql_reference}/functions/geo.md (86%) rename docs/ru/{query_language => sql_reference}/functions/hash_functions.md (88%) rename docs/ru/{query_language => sql_reference}/functions/higher_order_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/in_functions.md (93%) rename docs/ru/{query_language => sql_reference}/functions/index.md (100%) rename docs/ru/{query_language => sql_reference}/functions/introspection.md (94%) rename docs/ru/{query_language => sql_reference}/functions/ip_address_functions.md (95%) rename docs/ru/{query_language => sql_reference}/functions/json_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/logical_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/machine_learning_functions.md (51%) rename docs/ru/{query_language => sql_reference}/functions/math_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/other_functions.md (97%) rename docs/ru/{query_language => sql_reference}/functions/random_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/rounding_functions.md (98%) rename docs/ru/{query_language => sql_reference}/functions/splitting_merging_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/string_functions.md (97%) rename docs/ru/{query_language => sql_reference}/functions/string_replace_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/string_search_functions.md (100%) rename docs/ru/{query_language => sql_reference}/functions/type_conversion_functions.md (90%) rename docs/ru/{query_language => sql_reference}/functions/url_functions.md (98%) rename docs/ru/{query_language => sql_reference}/functions/uuid_functions.md (92%) rename docs/ru/{query_language => sql_reference}/functions/ym_dict_functions.md (98%) create mode 100644 docs/ru/sql_reference/index.md rename docs/ru/{query_language => sql_reference}/operators.md (89%) rename docs/ru/{query_language => sql_reference/statements}/alter.md (93%) rename docs/ru/{query_language => sql_reference/statements}/create.md (94%) create mode 100644 docs/ru/sql_reference/statements/index.md rename docs/ru/{query_language => sql_reference/statements}/insert_into.md (88%) rename docs/ru/{query_language => sql_reference/statements}/misc.md (87%) rename docs/ru/{query_language => sql_reference/statements}/select.md (91%) rename docs/ru/{query_language => sql_reference/statements}/show.md (90%) rename docs/ru/{query_language => sql_reference/statements}/system.md (80%) rename docs/ru/{query_language => sql_reference}/syntax.md (97%) rename docs/ru/{query_language => sql_reference}/table_functions/file.md (92%) rename docs/ru/{query_language => sql_reference}/table_functions/generate.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/hdfs.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/index.md (62%) rename docs/ru/{query_language => sql_reference}/table_functions/input.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/jdbc.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/merge.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/mysql.md (93%) rename docs/ru/{query_language => sql_reference}/table_functions/numbers.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/odbc.md (95%) rename docs/ru/{query_language => sql_reference}/table_functions/remote.md (100%) rename docs/ru/{query_language => sql_reference}/table_functions/url.md (100%) create mode 100644 docs/ru/whats_new/changelog/2017.md create mode 100644 docs/ru/whats_new/changelog/2018.md create mode 100644 docs/ru/whats_new/changelog/2019.md create mode 100644 docs/ru/whats_new/changelog/index.md create mode 100644 docs/ru/whats_new/index.md create mode 100644 docs/ru/whats_new/roadmap.md rename docs/ru/{ => whats_new}/security_changelog.md (100%) delete mode 100644 docs/toc_ru.yml diff --git a/docs/fa/operations/performance/sampling_query_profiler_example_result.txt b/docs/fa/operations/performance/sampling_query_profiler_example_result.txt index a5f6d71ca95..56c2fdf9c65 100644 --- a/docs/fa/operations/performance/sampling_query_profiler_example_result.txt +++ b/docs/fa/operations/performance/sampling_query_profiler_example_result.txt @@ -1,7 +1,3 @@ ---- -en_copy: true ---- - Row 1: ────── count(): 6344 diff --git a/docs/ru/changelog/2017.md b/docs/ru/changelog/2017.md deleted file mode 100644 index 95156754100..00000000000 --- a/docs/ru/changelog/2017.md +++ /dev/null @@ -1,265 +0,0 @@ ---- -en_copy: true ---- - -### ClickHouse release 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} - -This release contains bug fixes for the previous release 1.1.54318: - -- Fixed bug with possible race condition in replication that could lead to data loss. This issue affects versions 1.1.54310 and 1.1.54318. If you use one of these versions with Replicated tables, the update is strongly recommended. This issue shows in logs in Warning messages like `Part ... from own log doesn't exist.` The issue is relevant even if you don’t see these messages in logs. - -### ClickHouse release 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30} - -This release contains bug fixes for the previous release 1.1.54310: - -- Fixed incorrect row deletions during merges in the SummingMergeTree engine -- Fixed a memory leak in unreplicated MergeTree engines -- Fixed performance degradation with frequent inserts in MergeTree engines -- Fixed an issue that was causing the replication queue to stop running -- Fixed rotation and archiving of server logs - -### ClickHouse release 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01} - -#### New features: {#new-features} - -- Custom partitioning key for the MergeTree family of table engines. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) table engine. -- Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. -- Added support for time zones with non-integer offsets from UTC. -- Added support for arithmetic operations with time intervals. -- The range of values for the Date and DateTime types is extended to the year 2105. -- Added the `CREATE MATERIALIZED VIEW x TO y` query (specifies an existing table for storing the data of a materialized view). -- Added the `ATTACH TABLE` query without arguments. -- The processing logic for Nested columns with names ending in -Map in a SummingMergeTree table was extracted to the sumMap aggregate function. You can now specify such columns explicitly. -- Max size of the IP trie dictionary is increased to 128M entries. -- Added the getSizeOfEnumType function. -- Added the sumWithOverflow aggregate function. -- Added support for the Cap’n Proto input format. -- You can now customize compression level when using the zstd algorithm. - -#### Backward incompatible changes: {#backward-incompatible-changes} - -- Creation of temporary tables with an engine other than Memory is not allowed. -- Explicit creation of tables with the View or MaterializedView engine is not allowed. -- During table creation, a new check verifies that the sampling key expression is included in the primary key. - -#### Bug fixes: {#bug-fixes} - -- Fixed hangups when synchronously inserting into a Distributed table. -- Fixed nonatomic adding and removing of parts in Replicated tables. -- Data inserted into a materialized view is not subjected to unnecessary deduplication. -- Executing a query to a Distributed table for which the local replica is lagging and remote replicas are unavailable does not result in an error anymore. -- Users don’t need access permissions to the `default` database to create temporary tables anymore. -- Fixed crashing when specifying the Array type without arguments. -- Fixed hangups when the disk volume containing server logs is full. -- Fixed an overflow in the toRelativeWeekNum function for the first week of the Unix epoch. - -#### Build improvements: {#build-improvements} - -- Several third-party libraries (notably Poco) were updated and converted to git submodules. - -### ClickHouse release 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19} - -#### New features: {#new-features-1} - -- TLS support in the native protocol (to enable, set `tcp_ssl_port` in `config.xml` ). - -#### Bug fixes: {#bug-fixes-1} - -- `ALTER` for replicated tables now tries to start running as soon as possible. -- Fixed crashing when reading data with the setting `preferred_block_size_bytes=0.` -- Fixed crashes of `clickhouse-client` when pressing `Page Down` -- Correct interpretation of certain complex queries with `GLOBAL IN` and `UNION ALL` -- `FREEZE PARTITION` always works atomically now. -- Empty POST requests now return a response with code 411. -- Fixed interpretation errors for expressions like `CAST(1 AS Nullable(UInt8)).` -- Fixed an error when reading `Array(Nullable(String))` columns from `MergeTree` tables. -- Fixed crashing when parsing queries like `SELECT dummy AS dummy, dummy AS b` -- Users are updated correctly with invalid `users.xml` -- Correct handling when an executable dictionary returns a non-zero response code. - -### ClickHouse release 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20} - -#### New features: {#new-features-2} - -- Added the `pointInPolygon` function for working with coordinates on a coordinate plane. -- Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. -- Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. -- The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. There is still a dependency when using compiled queries (with the setting `compile = 1` , which is not used by default). -- Reduced the time needed for dynamic compilation of queries. - -#### Bug fixes: {#bug-fixes-2} - -- Fixed an error that sometimes produced `part ... intersects previous part` messages and weakened replica consistency. -- Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. -- Removed excessive logging when restoring replicas. -- Fixed an error in the UNION ALL implementation. -- Fixed an error in the concat function that occurred if the first column in a block has the Array type. -- Progress is now displayed correctly in the system.merges table. - -### ClickHouse release 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13} - -#### New features: {#new-features-3} - -- `SYSTEM` queries for server administration: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. -- Added functions for working with arrays: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. -- Added `root` and `identity` parameters for the ZooKeeper configuration. This allows you to isolate individual users on the same ZooKeeper cluster. -- Added aggregate functions `groupBitAnd`, `groupBitOr`, and `groupBitXor` (for compatibility, they are also available under the names `BIT_AND`, `BIT_OR`, and `BIT_XOR`). -- External dictionaries can be loaded from MySQL by specifying a socket in the filesystem. -- External dictionaries can be loaded from MySQL over SSL (`ssl_cert`, `ssl_key`, `ssl_ca` parameters). -- Added the `max_network_bandwidth_for_user` setting to restrict the overall bandwidth use for queries per user. -- Support for `DROP TABLE` for temporary tables. -- Support for reading `DateTime` values in Unix timestamp format from the `CSV` and `JSONEachRow` formats. -- Lagging replicas in distributed queries are now excluded by default (the default threshold is 5 minutes). -- FIFO locking is used during ALTER: an ALTER query isn’t blocked indefinitely for continuously running queries. -- Option to set `umask` in the config file. -- Improved performance for queries with `DISTINCT` . - -#### Bug fixes: {#bug-fixes-3} - -- Improved the process for deleting old nodes in ZooKeeper. Previously, old nodes sometimes didn’t get deleted if there were very frequent inserts, which caused the server to be slow to shut down, among other things. -- Fixed randomization when choosing hosts for the connection to ZooKeeper. -- Fixed the exclusion of lagging replicas in distributed queries if the replica is localhost. -- Fixed an error where a data part in a `ReplicatedMergeTree` table could be broken after running `ALTER MODIFY` on an element in a `Nested` structure. -- Fixed an error that could cause SELECT queries to “hang”. -- Improvements to distributed DDL queries. -- Fixed the query `CREATE TABLE ... AS `. -- Resolved the deadlock in the `ALTER ... CLEAR COLUMN IN PARTITION` query for `Buffer` tables. -- Fixed the invalid default value for `Enum` s (0 instead of the minimum) when using the `JSONEachRow` and `TSKV` formats. -- Resolved the appearance of zombie processes when using a dictionary with an `executable` source. -- Fixed segfault for the HEAD query. - -#### Improved workflow for developing and assembling ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse} - -- You can use `pbuilder` to build ClickHouse. -- You can use `libc++` instead of `libstdc++` for builds on Linux. -- Added instructions for using static code analysis tools: `Coverage`, `clang-tidy`, `cppcheck`. - -#### Please note when upgrading: {#please-note-when-upgrading} - -- There is now a higher default value for the MergeTree setting `max_bytes_to_merge_at_max_space_in_pool` (the maximum total size of data parts to merge, in bytes): it has increased from 100 GiB to 150 GiB. This might result in large merges running after the server upgrade, which could cause an increased load on the disk subsystem. If the free space available on the server is less than twice the total amount of the merges that are running, this will cause all other merges to stop running, including merges of small data parts. As a result, INSERT queries will fail with the message “Merges are processing significantly slower than inserts.” Use the `SELECT * FROM system.merges` query to monitor the situation. You can also check the `DiskSpaceReservedForMerge` metric in the `system.metrics` table, or in Graphite. You don’t need to do anything to fix this, since the issue will resolve itself once the large merges finish. If you find this unacceptable, you can restore the previous value for the `max_bytes_to_merge_at_max_space_in_pool` setting. To do this, go to the section in config.xml, set ``` ``107374182400 ``` and restart the server. - -### ClickHouse release 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29} - -- This is a bugfix release for the previous 1.1.54282 release. It fixes leaks in the parts directory in ZooKeeper. - -### ClickHouse release 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23} - -This release contains bug fixes for the previous release 1.1.54276: - -- Fixed `DB::Exception: Assertion violation: !_path.empty()` when inserting into a Distributed table. -- Fixed parsing when inserting in RowBinary format if input data starts with’;’. -- Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). - -### Clickhouse Release 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16} - -#### New features: {#new-features-4} - -- Added an optional WITH section for a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a` -- INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert\_distributed\_sync=1. -- Added the UUID data type for working with 16-byte identifiers. -- Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau. -- Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers. -- You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries. -- Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).` -- Added the max\_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance. - -#### Main changes: {#main-changes} - -- Security improvements: all server files are created with 0640 permissions (can be changed via config parameter). -- Improved error messages for queries with invalid syntax. -- Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data. -- Significantly increased the performance of data merges for the ReplacingMergeTree engine. -- Improved performance for asynchronous inserts from a Distributed table by combining multiple source inserts. To enable this functionality, use the setting distributed\_directory\_monitor\_batch\_inserts=1. - -#### Backward incompatible changes: {#backward-incompatible-changes-1} - -- Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays. - -#### Complete list of changes: {#complete-list-of-changes} - -- Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format. -- Optimized stream allocation when reading from a Distributed table. -- Settings can be configured in readonly mode if the value doesn’t change. -- Added the ability to retrieve non-integer granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred\_block\_size\_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns. -- Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.` -- Added new settings for MergeTree engines (the merge\_tree section in config.xml): - - replicated\_deduplication\_window\_seconds sets the number of seconds allowed for deduplicating inserts in Replicated tables. - - cleanup\_delay\_period sets how often to start cleanup to remove outdated data. - - replicated\_can\_become\_leader can prevent a replica from becoming the leader (and assigning merges). -- Accelerated cleanup to remove outdated data from ZooKeeper. -- Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed\_ddl\_task\_timeout, which limits the time to wait for a response from the servers in the cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. -- Improved display of stack traces in the server logs. -- Added the “none” value for the compression method. -- You can use multiple dictionaries\_config sections in config.xml. -- It is possible to connect to MySQL through a socket in the file system. -- The system.parts table has a new column with information about the size of marks, in bytes. - -#### Bug fixes: {#bug-fixes-4} - -- Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the `_table` field. -- Fixed a rare race condition in ReplicatedMergeTree when checking data parts. -- Fixed possible freezing on “leader election” when starting a server. -- The max\_replica\_delay\_for\_distributed\_queries setting was ignored when using a local replica of the data source. This has been fixed. -- Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column. -- Fixed an exception in the multiIf function when using empty arrays or strings. -- Fixed excessive memory allocations when deserializing Native format. -- Fixed incorrect auto-update of Trie dictionaries. -- Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE. -- Fixed a crash of GROUP BY when using distributed\_aggregation\_memory\_efficient=1. -- Now you can specify the database.table in the right side of IN and JOIN. -- Too many threads were used for parallel aggregation. This has been fixed. -- Fixed how the “if” function works with FixedString arguments. -- SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed. -- Running `CREATE VIEW IF EXISTS no longer causes crashes.` -- Fixed incorrect behavior when input\_format\_skip\_unknown\_fields=1 is set and there are negative numbers. -- Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary. -- Fixed `Syntax error: unexpected (...)` errors when running distributed queries with subqueries in an IN or JOIN clause and Merge tables. -- Fixed an incorrect interpretation of a SELECT query from Dictionary tables. -- Fixed the “Cannot mremap” error when using arrays in IN and JOIN clauses with more than 2 billion elements. -- Fixed the failover for dictionaries with MySQL as the source. - -#### Improved workflow for developing and assembling ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse-1} - -- Builds can be assembled in Arcadia. -- You can use gcc 7 to compile ClickHouse. -- Parallel builds using ccache+distcc are faster now. - -### ClickHouse release 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04} - -#### New features: {#new-features-5} - -- Distributed DDL (for example, `CREATE TABLE ON CLUSTER`) -- The replicated query `ALTER TABLE CLEAR COLUMN IN PARTITION.` -- The engine for Dictionary tables (access to dictionary data in the form of a table). -- Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries). -- You can check for updates to the dictionary by sending a request to the source. -- Qualified column names -- Quoting identifiers using double quotation marks. -- Sessions in the HTTP interface. -- The OPTIMIZE query for a Replicated table can can run not only on the leader. - -#### Backward incompatible changes: {#backward-incompatible-changes-2} - -- Removed SET GLOBAL. - -#### Minor changes: {#minor-changes} - -- Now after an alert is triggered, the log prints the full stack trace. -- Relaxed the verification of the number of damaged/extra data parts at startup (there were too many false positives). - -#### Bug fixes: {#bug-fixes-5} - -- Fixed a bad connection “sticking” when inserting into a Distributed table. -- GLOBAL IN now works for a query from a Merge table that looks at a Distributed table. -- The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed. -- Changes in how an executable source of cached external dictionaries works. -- Fixed the comparison of strings containing null characters. -- Fixed the comparison of Float32 primary key fields with constants. -- Previously, an incorrect estimate of the size of a field could lead to overly large allocations. -- Fixed a crash when querying a Nullable column added to a table using ALTER. -- Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT. -- Fixed an ORDER BY subquery consisting of only constant values. -- Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE. -- Aliases for scalar subqueries with empty results are no longer lost. -- Now a query that used compilation does not fail with an error if the .so file gets damaged. diff --git a/docs/ru/changelog/2018.md b/docs/ru/changelog/2018.md deleted file mode 100644 index 49bef18cbf3..00000000000 --- a/docs/ru/changelog/2018.md +++ /dev/null @@ -1,1060 +0,0 @@ ---- -en_copy: true ---- - -## ClickHouse release 18.16 {#clickhouse-release-18-16} - -### ClickHouse release 18.16.1, 2018-12-21 {#clickhouse-release-18-16-1-2018-12-21} - -#### Bug fixes: {#bug-fixes} - -- Fixed an error that led to problems with updating dictionaries with the ODBC source. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -- JIT compilation of aggregate functions now works with LowCardinality columns. [\#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) - -#### Improvements: {#improvements} - -- Added the `low_cardinality_allow_in_native_format` setting (enabled by default). When disabled, LowCardinality columns will be converted to ordinary columns for SELECT queries and ordinary columns will be expected for INSERT queries. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) - -#### Build improvements: {#build-improvements} - -- Fixes for builds on macOS and ARM. - -### ClickHouse release 18.16.0, 2018-12-14 {#clickhouse-release-18-16-0-2018-12-14} - -#### New features: {#new-features} - -- `DEFAULT` expressions are evaluated for missing fields when loading data in semi-structured input formats (`JSONEachRow`, `TSKV`). The feature is enabled with the `insert_sample_with_metadata` setting. [\#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) -- The `ALTER TABLE` query now has the `MODIFY ORDER BY` action for changing the sorting key when adding or removing a table column. This is useful for tables in the `MergeTree` family that perform additional tasks when merging based on this sorting key, such as `SummingMergeTree`, `AggregatingMergeTree`, and so on. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [\#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) -- For tables in the `MergeTree` family, now you can specify a different sorting key (`ORDER BY`) and index (`PRIMARY KEY`). The sorting key can be longer than the index. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) -- Added the `hdfs` table function and the `HDFS` table engine for importing and exporting data to HDFS. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) -- Added functions for working with base64: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3350) -- Now you can use a parameter to configure the precision of the `uniqCombined` aggregate function (select the number of HyperLogLog cells). [\#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) -- Added the `system.contributors` table that contains the names of everyone who made commits in ClickHouse. [\#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) -- Added the ability to omit the partition for the `ALTER TABLE ... FREEZE` query in order to back up all partitions at once. [\#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) -- Added `dictGet` and `dictGetOrDefault` functions that don’t require specifying the type of return value. The type is determined automatically from the dictionary description. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3564) -- Now you can specify comments for a column in the table description and change it using `ALTER`. [\#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) -- Reading is supported for `Join` type tables with simple keys. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Now you can specify the options `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`, and `join_overflow_mode` when creating a `Join` type table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Added the `joinGet` function that allows you to use a `Join` type table like a dictionary. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Added the `partition_key`, `sorting_key`, `primary_key`, and `sampling_key` columns to the `system.tables` table in order to provide information about table keys. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -- Added the `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`, and `is_in_sampling_key` columns to the `system.columns` table. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -- Added the `min_time` and `max_time` columns to the `system.parts` table. These columns are populated when the partitioning key is an expression consisting of `DateTime` columns. [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) - -#### Bug fixes: {#bug-fixes-1} - -- Fixes and performance improvements for the `LowCardinality` data type. `GROUP BY` using `LowCardinality(Nullable(...))`. Getting the values of `extremes`. Processing high-order functions. `LEFT ARRAY JOIN`. Distributed `GROUP BY`. Functions that return `Array`. Execution of `ORDER BY`. Writing to `Distributed` tables (nicelulu). Backward compatibility for `INSERT` queries from old clients that implement the `Native` protocol. Support for `LowCardinality` for `JOIN`. Improved performance when working in a single stream. [\#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [\#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [\#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [\#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [\#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [\#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [\#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [\#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [\#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [\#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [\#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [\#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [\#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) -- Fixed how the `select_sequential_consistency` option works. Previously, when this setting was enabled, an incomplete result was sometimes returned after beginning to write to a new partition. [\#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) -- Databases are correctly specified when executing DDL `ON CLUSTER` queries and `ALTER UPDATE/DELETE`. [\#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -- Databases are correctly specified for subqueries inside a VIEW. [\#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) -- Fixed a bug in `PREWHERE` with `FINAL` for `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) -- Now you can use `KILL QUERY` to cancel queries that have not started yet because they are waiting for the table to be locked. [\#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) -- Corrected date and time calculations if the clocks were moved back at midnight (this happens in Iran, and happened in Moscow from 1981 to 1983). Previously, this led to the time being reset a day earlier than necessary, and also caused incorrect formatting of the date and time in text format. [\#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) -- Fixed bugs in some cases of `VIEW` and subqueries that omit the database. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3521) -- Fixed a race condition when simultaneously reading from a `MATERIALIZED VIEW` and deleting a `MATERIALIZED VIEW` due to not locking the internal `MATERIALIZED VIEW`. [\#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [\#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) -- Fixed the error `Lock handler cannot be nullptr.` [\#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) -- Fixed query processing when the `compile_expressions` option is enabled (it’s enabled by default). Nondeterministic constant expressions like the `now` function are no longer unfolded. [\#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) -- Fixed a crash when specifying a non-constant scale argument in `toDecimal32/64/128` functions. -- Fixed an error when trying to insert an array with `NULL` elements in the `Values` format into a column of type `Array` without `Nullable` (if `input_format_values_interpret_expressions` = 1). [\#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [\#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) -- Fixed continuous error logging in `DDLWorker` if ZooKeeper is not available. [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) -- Fixed the return type for `quantile*` functions from `Date` and `DateTime` types of arguments. [\#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) -- Fixed the `WITH` clause if it specifies a simple alias without expressions. [\#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) -- Fixed processing of queries with named sub-queries and qualified column names when `enable_optimize_predicate_expression` is enabled. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3588) -- Fixed the error `Attempt to attach to nullptr thread group` when working with materialized views. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) -- Fixed a crash when passing certain incorrect arguments to the `arrayReverse` function. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -- Fixed the buffer overflow in the `extractURLParameter` function. Improved performance. Added correct processing of strings containing zero bytes. [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) -- Fixed buffer overflow in the `lowerUTF8` and `upperUTF8` functions. Removed the ability to execute these functions over `FixedString` type arguments. [\#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) -- Fixed a rare race condition when deleting `MergeTree` tables. [\#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) -- Fixed a race condition when reading from `Buffer` tables and simultaneously performing `ALTER` or `DROP` on the target tables. [\#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) -- Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Improvements: {#improvements-1} - -- The server does not write the processed configuration files to the `/etc/clickhouse-server/` directory. Instead, it saves them in the `preprocessed_configs` directory inside `path`. This means that the `/etc/clickhouse-server/` directory doesn’t have write access for the `clickhouse` user, which improves security. [\#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) -- The `min_merge_bytes_to_use_direct_io` option is set to 10 GiB by default. A merge that forms large parts of tables from the MergeTree family will be performed in `O_DIRECT` mode, which prevents excessive page cache eviction. [\#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) -- Accelerated server start when there is a very large number of tables. [\#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) -- Added a connection pool and HTTP `Keep-Alive` for connections between replicas. [\#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) -- If the query syntax is invalid, the `400 Bad Request` code is returned in the `HTTP` interface (500 was returned previously). [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) -- The `join_default_strictness` option is set to `ALL` by default for compatibility. [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) -- Removed logging to `stderr` from the `re2` library for invalid or complex regular expressions. [\#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) -- Added for the `Kafka` table engine: checks for subscriptions before beginning to read from Kafka; the kafka\_max\_block\_size setting for the table. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) -- The `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`, and `murmurHash3_64` functions now work for any number of arguments and for arguments in the form of tuples. [\#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [\#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) -- The `arrayReverse` function now works with any types of arrays. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -- Added an optional parameter: the slot size for the `timeSlots` function. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3724) -- For `FULL` and `RIGHT JOIN`, the `max_block_size` setting is used for a stream of non-joined data from the right table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3699) -- Added the `--secure` command line parameter in `clickhouse-benchmark` and `clickhouse-performance-test` to enable TLS. [\#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [\#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) -- Type conversion when the structure of a `Buffer` type table does not match the structure of the destination table. [Vitaly Baranov](https://github.com/ClickHouse/ClickHouse/pull/3603) -- Added the `tcp_keep_alive_timeout` option to enable keep-alive packets after inactivity for the specified time interval. [\#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) -- Removed unnecessary quoting of values for the partition key in the `system.parts` table if it consists of a single column. [\#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) -- The modulo function works for `Date` and `DateTime` data types. [\#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) -- Added synonyms for the `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`, and `MID` functions. [\#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [\#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) Some function names are case-insensitive for compatibility with the SQL standard. Added syntactic sugar `SUBSTRING(expr FROM start FOR length)` for compatibility with SQL. [\#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) -- Added the ability to `mlock` memory pages corresponding to `clickhouse-server` executable code to prevent it from being forced out of memory. This feature is disabled by default. [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) -- Improved performance when reading from `O_DIRECT` (with the `min_bytes_to_use_direct_io` option enabled). [\#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) -- Improved performance of the `dictGet...OrDefault` function for a constant key argument and a non-constant default argument. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3563) -- The `firstSignificantSubdomain` function now processes the domains `gov`, `mil`, and `edu`. [Igor Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) Improved performance. [\#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) -- Ability to specify custom environment variables for starting `clickhouse-server` using the `SYS-V init.d` script by defining `CLICKHOUSE_PROGRAM_ENV` in `/etc/default/clickhouse`. - [Pavlo Bashynskyi](https://github.com/ClickHouse/ClickHouse/pull/3612) -- Correct return code for the clickhouse-server init script. [\#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) -- The `system.metrics` table now has the `VersionInteger` metric, and `system.build_options` has the added line `VERSION_INTEGER`, which contains the numeric form of the ClickHouse version, such as `18016000`. [\#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) -- Removed the ability to compare the `Date` type with a number to avoid potential errors like `date = 2018-12-17`, where quotes around the date are omitted by mistake. [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) -- Fixed the behavior of stateful functions like `rowNumberInAllBlocks`. They previously output a result that was one number larger due to starting during query analysis. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3729) -- If the `force_restore_data` file can’t be deleted, an error message is displayed. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3794) - -#### Build improvements: {#build-improvements-1} - -- Updated the `jemalloc` library, which fixes a potential memory leak. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3557) -- Profiling with `jemalloc` is enabled by default in order to debug builds. [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) -- Added the ability to run integration tests when only `Docker` is installed on the system. [\#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) -- Added the fuzz expression test in SELECT queries. [\#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) -- Added a stress test for commits, which performs functional tests in parallel and in random order to detect more race conditions. [\#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) -- Improved the method for starting clickhouse-server in a Docker image. [Elghazal Ahmed](https://github.com/ClickHouse/ClickHouse/pull/3663) -- For a Docker image, added support for initializing databases using files in the `/docker-entrypoint-initdb.d` directory. [Konstantin Lebedev](https://github.com/ClickHouse/ClickHouse/pull/3695) -- Fixes for builds on ARM. [\#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) - -#### Backward incompatible changes: {#backward-incompatible-changes} - -- Removed the ability to compare the `Date` type with a number. Instead of `toDate('2018-12-18') = 17883`, you must use explicit type conversion `= toDate(17883)` [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) - -## ClickHouse release 18.14 {#clickhouse-release-18-14} - -### ClickHouse release 18.14.19, 2018-12-19 {#clickhouse-release-18-14-19-2018-12-19} - -#### Bug fixes: {#bug-fixes-2} - -- Fixed an error that led to problems with updating dictionaries with the ODBC source. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -- Databases are correctly specified when executing DDL `ON CLUSTER` queries. [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -- Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Build improvements: {#build-improvements-2} - -- Fixes for builds on ARM. - -### ClickHouse release 18.14.18, 2018-12-04 {#clickhouse-release-18-14-18-2018-12-04} - -#### Bug fixes: {#bug-fixes-3} - -- Fixed error in `dictGet...` function for dictionaries of type `range`, if one of the arguments is constant and other is not. [\#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) -- Fixed error that caused messages `netlink: '...': attribute type 1 has an invalid length` to be printed in Linux kernel log, that was happening only on fresh enough versions of Linux kernel. [\#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) -- Fixed segfault in function `empty` for argument of `FixedString` type. [Daniel, Dao Quang Minh](https://github.com/ClickHouse/ClickHouse/pull/3703) -- Fixed excessive memory allocation when using large value of `max_query_size` setting (a memory chunk of `max_query_size` bytes was preallocated at once). [\#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) - -#### Build changes: {#build-changes} - -- Fixed build with LLVM/Clang libraries of version 7 from the OS packages (these libraries are used for runtime query compilation). [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse release 18.14.17, 2018-11-30 {#clickhouse-release-18-14-17-2018-11-30} - -#### Bug fixes: {#bug-fixes-4} - -- Fixed cases when the ODBC bridge process did not terminate with the main server process. [\#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) -- Fixed synchronous insertion into the `Distributed` table with a columns list that differs from the column list of the remote table. [\#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) -- Fixed a rare race condition that can lead to a crash when dropping a MergeTree table. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -- Fixed a query deadlock in case when query thread creation fails with the `Resource temporarily unavailable` error. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -- Fixed parsing of the `ENGINE` clause when the `CREATE AS table` syntax was used and the `ENGINE` clause was specified before the `AS table` (the error resulted in ignoring the specified engine). [\#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) - -### ClickHouse release 18.14.15, 2018-11-21 {#clickhouse-release-18-14-15-2018-11-21} - -#### Bug fixes: {#bug-fixes-5} - -- The size of memory chunk was overestimated while deserializing the column of type `Array(String)` that leads to “Memory limit exceeded” errors. The issue appeared in version 18.12.13. [\#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) - -### ClickHouse release 18.14.14, 2018-11-20 {#clickhouse-release-18-14-14-2018-11-20} - -#### Bug fixes: {#bug-fixes-6} - -- Fixed `ON CLUSTER` queries when cluster configured as secure (flag ``). [\#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) - -#### Build changes: {#build-changes-1} - -- Fixed problems (llvm-7 from system, macos) [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse release 18.14.13, 2018-11-08 {#clickhouse-release-18-14-13-2018-11-08} - -#### Bug fixes: {#bug-fixes-7} - -- Fixed the `Block structure mismatch in MergingSorted stream` error. [\#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) -- Fixed `ON CLUSTER` queries in case when secure connections were turned on in the cluster config (the `` flag). [\#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) -- Fixed an error in queries that used `SAMPLE`, `PREWHERE` and alias columns. [\#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) -- Fixed a rare `unknown compression method` error when the `min_bytes_to_use_direct_io` setting was enabled. [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) - -#### Performance improvements: {#performance-improvements} - -- Fixed performance regression of queries with `GROUP BY` of columns of UInt16 or Date type when executing on AMD EPYC processors. [Igor Lapko](https://github.com/ClickHouse/ClickHouse/pull/3512) -- Fixed performance regression of queries that process long strings. [\#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) - -#### Build improvements: {#build-improvements-3} - -- Improvements for simplifying the Arcadia build. [\#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [\#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) - -### ClickHouse release 18.14.12, 2018-11-02 {#clickhouse-release-18-14-12-2018-11-02} - -#### Bug fixes: {#bug-fixes-8} - -- Fixed a crash on joining two unnamed subqueries. [\#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) -- Fixed generating incorrect queries (with an empty `WHERE` clause) when querying external databases. [hotid](https://github.com/ClickHouse/ClickHouse/pull/3477) -- Fixed using an incorrect timeout value in ODBC dictionaries. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) - -### ClickHouse release 18.14.11, 2018-10-29 {#clickhouse-release-18-14-11-2018-10-29} - -#### Bug fixes: {#bug-fixes-9} - -- Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [\#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) -- Fixed errors when merging data in tables containing arrays inside Nested structures. [\#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) -- Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [\#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) -- Fixed an error on inserts to a Distributed table in Native format. [\#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) - -### ClickHouse release 18.14.10, 2018-10-23 {#clickhouse-release-18-14-10-2018-10-23} - -- The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [\#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) -- The `enable_optimize_predicate_expression` setting is disabled by default. - -### ClickHouse release 18.14.9, 2018-10-16 {#clickhouse-release-18-14-9-2018-10-16} - -#### New features: {#new-features-1} - -- The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [\#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) -- Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2770) -- Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) -- Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [\#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) -- Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -- Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/ClickHouse/ClickHouse/pull/3144) -- Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) -- Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [\#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) -- Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [\#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) -- Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [\#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) - -#### Experimental features: {#experimental-features} - -- Optimization of the GROUP BY clause for `LowCardinality data types.` [\#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) -- Optimized calculation of expressions for `LowCardinality data types.` [\#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) - -#### Improvements: {#improvements-2} - -- Significantly reduced memory consumption for queries with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -- In the absence of `JOIN` (`LEFT`, `INNER`, …), `INNER JOIN` is assumed. [\#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) -- Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3202) -- The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) -- The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. -- Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message “File … already exists”, and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [\#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) -- LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -- Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -- Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3257). -- `ON CLUSTER` can be specified for `ALTER UPDATE` queries. -- Improved performance for reading data in `JSONEachRow` format. [\#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) -- Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [\#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) -- Added the `TIMESTAMP` synonym for the `DateTime` type. [\#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) -- There is always space reserved for query\_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. -- Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -- Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [\#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) -- In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [\#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) -- The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -- Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) -- The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/ClickHouse/ClickHouse/pull/3219) -- In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [\#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) -- Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) -- Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [\#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) -- Support for the `Decimal` data type in external dictionaries. [\#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) -- Support for the `Decimal` data type in `SummingMergeTree` tables. [\#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) -- Added specializations for `UUID` in `if`. [\#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) -- Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [\#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) -- A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3375) - -#### Bug fixes: {#bug-fixes-10} - -- Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [\#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) -- Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -- Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [\#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [\#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) -- If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn’t be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [\#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) -- Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) -- Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [\#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) -- Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the query if the `JOIN` is only performed on remote servers. [\#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) -- Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -- For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) -- If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn’t start. [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) -- If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) -- Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [\#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) -- Corrected type conversion between `Decimal` and integer numbers. [\#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) -- Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3231) -- Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [\#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) -- Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/3163) -- Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [\#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) -- Bug fixes in the `ALTER UPDATE` query. -- Fixed bugs in the `odbc` table function that appeared in version 18.12. [\#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) -- Fixed the operation of aggregate functions with `StateArray` combinators. [\#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) -- Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) -- Fixed output of types for operations using `Decimal` and integer arguments. [\#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) -- Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) -- The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [\#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) -- Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) -- Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [\#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) -- Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [\#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) -- Fixed an error when using `FINAL` with `PREWHERE`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -- Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -- Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [\#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) -- Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [\#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) -- Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/ClickHouse/ClickHouse/pull/3339) -- Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [\#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [\#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) - -#### Backward incompatible changes: {#backward-incompatible-changes-1} - -- Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [\#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) - -## ClickHouse release 18.12 {#clickhouse-release-18-12} - -### ClickHouse release 18.12.17, 2018-09-16 {#clickhouse-release-18-12-17-2018-09-16} - -#### New features: {#new-features-2} - -- `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [\#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) -- Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/ClickHouse/ClickHouse/pull/3123) -- The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -- The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -- The `system.part_log` table now has the `partition_id` column. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Bug fixes: {#bug-fixes-11} - -- `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3159) -- Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [\#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) -- Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) -- Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn’t happen). [\#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) - -#### Backward incompatible changes: {#backward-incompatible-changes-2} - -- The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) - -### ClickHouse release 18.12.14, 2018-09-13 {#clickhouse-release-18-12-14-2018-09-13} - -#### New features: {#new-features-3} - -- Added support for `ALTER UPDATE` queries. [\#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) -- Added the `allow_ddl` option, which restricts the user’s access to DDL queries. [\#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) -- Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O\_DIRECT). [\#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) -- The `system.merges` system table now contains the `partition_id` column. [\#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) - -#### Improvements {#improvements-3} - -- If a data part remains unchanged during mutation, it isn’t downloaded by replicas. [\#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) -- Autocomplete is available for names of settings when working with `clickhouse-client`. [\#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) - -#### Bug fixes: {#bug-fixes-12} - -- Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [\#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) -- Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. -- Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3098) -- Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3097) - -### ClickHouse release 18.12.13, 2018-09-10 {#clickhouse-release-18-12-13-2018-09-10} - -#### New features: {#new-features-4} - -- Added the `DECIMAL(digits, scale)` data type (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). To enable it, use the setting `allow_experimental_decimal_type`. [\#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [\#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [\#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [\#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) -- New `WITH ROLLUP` modifier for `GROUP BY` (alternative syntax: `GROUP BY ROLLUP(...)`). [\#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) -- In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2787) -- Added support for JOIN with table functions. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) -- Autocomplete by pressing Tab in clickhouse-client. [Sergey Shcherbin](https://github.com/ClickHouse/ClickHouse/pull/2447) -- Ctrl+C in clickhouse-client clears a query that was entered. [\#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) -- Added the `join_default_strictness` setting (values: `"`, `'any'`, `'all'`). This allows you to not specify `ANY` or `ALL` for `JOIN`. [\#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) -- Each line of the server log related to query processing shows the query ID. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Now you can get query execution logs in clickhouse-client (use the `send_logs_level` setting). With distributed query processing, logs are cascaded from all the servers. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `system.query_log` and `system.processes` (`SHOW PROCESSLIST`) tables now have information about all changed settings when you run a query (the nested structure of the `Settings` data). Added the `log_query_settings` setting. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `system.query_log` and `system.processes` tables now show information about the number of threads that are participating in query execution (see the `thread_numbers` column). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Added `ProfileEvents` counters that measure the time spent on reading and writing over the network and reading and writing to disk, the number of network errors, and the time spent waiting when network bandwidth is limited. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Added `ProfileEvents`counters that contain the system metrics from rusage (you can use them to get information about CPU usage in userspace and the kernel, page faults, and context switches), as well as taskstats metrics (use these to obtain information about I/O wait time, CPU wait time, and the amount of data read and recorded, both with and without page cache). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `ProfileEvents` counters are applied globally and for each query, as well as for each query execution thread, which allows you to profile resource consumption by query in detail. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Added the `system.query_thread_log` table, which contains information about each query execution thread. Added the `log_query_threads` setting. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `system.metrics` and `system.events` tables now have built-in documentation. [\#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) -- Added the `arrayEnumerateDense` function. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2975) -- Added the `arrayCumSumNonNegative` and `arrayDifference` functions. [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/2942) -- Added the `retention` aggregate function. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2887) -- Now you can add (merge) states of aggregate functions by using the plus operator, and multiply the states of aggregate functions by a nonnegative constant. [\#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [\#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) -- Tables in the MergeTree family now have the virtual column `_partition_id`. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Experimental features: {#experimental-features-1} - -- Added the `LowCardinality(T)` data type. This data type automatically creates a local dictionary of values and allows data processing without unpacking the dictionary. [\#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) -- Added a cache of JIT-compiled functions and a counter for the number of uses before compiling. To JIT compile expressions, enable the `compile_expressions` setting. [\#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [\#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) - -#### Improvements: {#improvements-4} - -- Fixed the problem with unlimited accumulation of the replication log when there are abandoned replicas. Added an effective recovery mode for replicas with a long lag. -- Improved performance of `GROUP BY` with multiple aggregation fields when one of them is string and the others are fixed length. -- Improved performance when using `PREWHERE` and with implicit transfer of expressions in `PREWHERE`. -- Improved parsing performance for text formats (`CSV`, `TSV`). [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2977) [\#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) -- Improved performance of reading strings and arrays in binary formats. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2955) -- Increased performance and reduced memory consumption for queries to `system.tables` and `system.columns` when there is a very large number of tables on a single server. [\#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) -- Fixed a performance problem in the case of a large stream of queries that result in an error (the `_dl_addr` function is visible in `perf top`, but the server isn’t using much CPU). [\#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) -- Conditions are cast into the View (when `enable_optimize_predicate_expression` is enabled). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) -- Improvements to the functionality for the `UUID` data type. [\#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [\#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) -- The `UUID` data type is supported in The-Alchemist dictionaries. [\#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) -- The `visitParamExtractRaw` function works correctly with nested structures. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2974) -- When the `input_format_skip_unknown_fields` setting is enabled, object fields in `JSONEachRow` format are skipped correctly. [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) -- For a `CASE` expression with conditions, you can now omit `ELSE`, which is equivalent to `ELSE NULL`. [\#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) -- The operation timeout can now be configured when working with ZooKeeper. [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) -- You can specify an offset for `LIMIT n, m` as `LIMIT n OFFSET m`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -- You can use the `SELECT TOP n` syntax as an alternative for `LIMIT`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -- Increased the size of the queue to write to system tables, so the `SystemLog parameter queue is full` error doesn’t happen as often. -- The `windowFunnel` aggregate function now supports events that meet multiple conditions. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2801) -- Duplicate columns can be used in a `USING` clause for `JOIN`. [\#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) -- `Pretty` formats now have a limit on column alignment by width. Use the `output_format_pretty_max_column_pad_width` setting. If a value is wider, it will still be displayed in its entirety, but the other cells in the table will not be too wide. [\#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) -- The `odbc` table function now allows you to specify the database/schema name. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2885) -- Added the ability to use a username specified in the `clickhouse-client` config file. [Vladimir Kozbin](https://github.com/ClickHouse/ClickHouse/pull/2909) -- The `ZooKeeperExceptions` counter has been split into three counters: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, and `ZooKeeperOtherExceptions`. -- `ALTER DELETE` queries work for materialized views. -- Added randomization when running the cleanup thread periodically for `ReplicatedMergeTree` tables in order to avoid periodic load spikes when there are a very large number of `ReplicatedMergeTree` tables. -- Support for `ATTACH TABLE ... ON CLUSTER` queries. [\#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) - -#### Bug fixes: {#bug-fixes-13} - -- Fixed an issue with `Dictionary` tables (throws the `Size of offsets doesn't match size of column` or `Unknown compression method` exception). This bug appeared in version 18.10.3. [\#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) -- Fixed a bug when merging `CollapsingMergeTree` tables if one of the data parts is empty (these parts are formed during merge or `ALTER DELETE` if all data was deleted), and the `vertical` algorithm was used for the merge. [\#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) -- Fixed a race condition during `DROP` or `TRUNCATE` for `Memory` tables with a simultaneous `SELECT`, which could lead to server crashes. This bug appeared in version 1.1.54388. [\#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) -- Fixed the possibility of data loss when inserting in `Replicated` tables if the `Session is expired` error is returned (data loss can be detected by the `ReplicatedDataLoss` metric). This error occurred in version 1.1.54378. [\#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [\#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [\#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) -- Fixed a segfault during `JOIN ... ON`. [\#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) -- Fixed the error searching column names when the `WHERE` expression consists entirely of a qualified column name, such as `WHERE table.column`. [\#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) -- Fixed the “Not found column” error that occurred when executing distributed queries if a single column consisting of an IN expression with a subquery is requested from a remote server. [\#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) -- Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for distributed queries if one of the shards is local and the other is not, and optimization of the move to `PREWHERE` is triggered. [\#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [\#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [\#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [\#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [\#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [\#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [\#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) -- Fixed the `pointInPolygon` function for certain cases of non-convex polygons. [\#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) -- Fixed the incorrect result when comparing `nan` with integers. [\#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) -- Fixed an error in the `zlib-ng` library that could lead to segfault in rare cases. [\#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) -- Fixed a memory leak when inserting into a table with `AggregateFunction` columns, if the state of the aggregate function is not simple (allocates memory separately), and if a single insertion request results in multiple small blocks. [\#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) -- Fixed a race condition when creating and deleting the same `Buffer` or `MergeTree` table simultaneously. -- Fixed the possibility of a segfault when comparing tuples made up of certain non-trivial types, such as tuples. [\#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) -- Fixed the possibility of a segfault when running certain `ON CLUSTER` queries. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2960) -- Fixed an error in the `arrayDistinct` function for `Nullable` array elements. [\#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [\#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) -- The `enable_optimize_predicate_expression` option now correctly supports cases with `SELECT *`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2929) -- Fixed the segfault when re-initializing the ZooKeeper session. [\#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) -- Fixed potential blocking when working with ZooKeeper. -- Fixed incorrect code for adding nested data structures in a `SummingMergeTree`. -- When allocating memory for states of aggregate functions, alignment is correctly taken into account, which makes it possible to use operations that require alignment when implementing states of aggregate functions. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) - -#### Security fix: {#security-fix} - -- Safe use of ODBC data sources. Interaction with ODBC drivers uses a separate `clickhouse-odbc-bridge` process. Errors in third-party ODBC drivers no longer cause problems with server stability or vulnerabilities. [\#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [\#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [\#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [\#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [\#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) -- Fixed incorrect validation of the file path in the `catBoostPool` table function. [\#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) -- The contents of system tables (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, and `replication_queue`) are filtered according to the user’s configured access to databases (`allow_databases`). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2856) - -#### Backward incompatible changes: {#backward-incompatible-changes-3} - -- In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. - -#### Build changes: {#build-changes-2} - -- Most integration tests can now be run by commit. -- Code style checks can also be run by commit. -- The `memcpy` implementation is chosen correctly when building on CentOS7/Fedora. [Etienne Champetier](https://github.com/ClickHouse/ClickHouse/pull/2912) -- When using clang to build, some warnings from `-Weverything` have been added, in addition to the regular `-Wall-Wextra -Werror`. [\#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) -- Debugging the build uses the `jemalloc` debug option. -- The interface of the library for interacting with ZooKeeper is declared abstract. [\#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) - -## ClickHouse release 18.10 {#clickhouse-release-18-10} - -### ClickHouse release 18.10.3, 2018-08-13 {#clickhouse-release-18-10-3-2018-08-13} - -#### New features: {#new-features-5} - -- HTTPS can be used for replication. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) -- Added the functions `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, and `murmurHash3_128` in addition to the existing `murmurHash2_32`. [\#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) -- Support for Nullable types in the ClickHouse ODBC driver (`ODBCDriver2` output format). [\#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) -- Support for `UUID` in the key columns. - -#### Improvements: {#improvements-5} - -- Clusters can be removed without restarting the server when they are deleted from the config files. [\#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) -- External dictionaries can be removed without restarting the server when they are removed from config files. [\#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) -- Added `SETTINGS` support for the `Kafka` table engine. [Alexander Marshalov](https://github.com/ClickHouse/ClickHouse/pull/2781) -- Improvements for the `UUID` data type (not yet complete). [\#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) -- Support for empty parts after merges in the `SummingMergeTree`, `CollapsingMergeTree` and `VersionedCollapsingMergeTree` engines. [\#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) -- Old records of completed mutations are deleted (`ALTER DELETE`). [\#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) -- Added the `system.merge_tree_settings` table. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/2841) -- The `system.tables` table now has dependency columns: `dependencies_database` and `dependencies_table`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2851) -- Added the `max_partition_size_to_drop` config option. [\#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) -- Added the `output_format_json_escape_forward_slashes` option. [Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2812) -- Added the `max_fetch_partition_retries_count` setting. [\#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) -- Added the `prefer_localhost_replica` setting for disabling the preference for a local replica and going to a local replica without inter-process interaction. [\#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) -- The `quantileExact` aggregate function returns `nan` in the case of aggregation on an empty `Float32` or `Float64` set. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2855) - -#### Bug fixes: {#bug-fixes-14} - -- Removed unnecessary escaping of the connection string parameters for ODBC, which made it impossible to establish a connection. This error occurred in version 18.6.0. -- Fixed the logic for processing `REPLACE PARTITION` commands in the replication queue. If there are two `REPLACE` commands for the same partition, the incorrect logic could cause one of them to remain in the replication queue and not be executed. [\#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) -- Fixed a merge bug when all data parts were empty (parts that were formed from a merge or from `ALTER DELETE` if all data was deleted). This bug appeared in version 18.1.0. [\#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) -- Fixed an error for concurrent `Set` or `Join`. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2823) -- Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for `UNION ALL` queries inside a sub-query if one of the `SELECT` queries contains duplicate column names. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2094) -- Fixed a memory leak if an exception occurred when connecting to a MySQL server. -- Fixed incorrect clickhouse-client response code in case of a query error. -- Fixed incorrect behavior of materialized views containing DISTINCT. [\#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) - -#### Backward incompatible changes {#backward-incompatible-changes-4} - -- Removed support for CHECK TABLE queries for Distributed tables. - -#### Build changes: {#build-changes-3} - -- The allocator has been replaced: `jemalloc` is now used instead of `tcmalloc`. In some scenarios, this increases speed up to 20%. However, there are queries that have slowed by up to 20%. Memory consumption has been reduced by approximately 10% in some scenarios, with improved stability. With highly competitive loads, CPU usage in userspace and in system shows just a slight increase. [\#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) -- Use of libressl from a submodule. [\#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [\#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) -- Use of unixodbc from a submodule. [\#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) -- Use of mariadb-connector-c from a submodule. [\#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) -- Added functional test files to the repository that depend on the availability of test data (for the time being, without the test data itself). - -## ClickHouse release 18.6 {#clickhouse-release-18-6} - -### ClickHouse release 18.6.0, 2018-08-02 {#clickhouse-release-18-6-0-2018-08-02} - -#### New features: {#new-features-6} - -- Added support for ON expressions for the JOIN ON syntax: - `JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` - The expression must be a chain of equalities joined by the AND operator. Each side of the equality can be an arbitrary expression over the columns of one of the tables. The use of fully qualified column names is supported (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) for the right table. [\#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) -- HTTPS can be enabled for replication. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) - -#### Improvements: {#improvements-6} - -- The server passes the patch component of its version to the client. Data about the patch version component is in `system.processes` and `query_log`. [\#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) - -## ClickHouse release 18.5 {#clickhouse-release-18-5} - -### ClickHouse release 18.5.1, 2018-07-31 {#clickhouse-release-18-5-1-2018-07-31} - -#### New features: {#new-features-7} - -- Added the hash function `murmurHash2_32` [\#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). - -#### Improvements: {#improvements-7} - -- Now you can use the `from_env` [\#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) attribute to set values in config files from environment variables. -- Added case-insensitive versions of the `coalesce`, `ifNull`, and `nullIf functions` [\#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). - -#### Bug fixes: {#bug-fixes-15} - -- Fixed a possible bug when starting a replica [\#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). - -## ClickHouse release 18.4 {#clickhouse-release-18-4} - -### ClickHouse release 18.4.0, 2018-07-28 {#clickhouse-release-18-4-0-2018-07-28} - -#### New features: {#new-features-8} - -- Added system tables: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [\#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). -- Added the ability to use a table function instead of a table as an argument of a `remote` or `cluster table function` [\#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). -- Support for `HTTP Basic` authentication in the replication protocol [\#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). -- The `has` function now allows searching for a numeric value in an array of `Enum` values [Maxim Khrisanfov](https://github.com/ClickHouse/ClickHouse/pull/2699). -- Support for adding arbitrary message separators when reading from `Kafka` [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2701). - -#### Improvements: {#improvements-8} - -- The `ALTER TABLE t DELETE WHERE` query does not rewrite data parts that were not affected by the WHERE condition [\#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). -- The `use_minimalistic_checksums_in_zookeeper` option for `ReplicatedMergeTree` tables is enabled by default. This setting was added in version 1.1.54378, 2018-04-16. Versions that are older than 1.1.54378 can no longer be installed. -- Support for running `KILL` and `OPTIMIZE` queries that specify `ON CLUSTER` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2689). - -#### Bug fixes: {#bug-fixes-16} - -- Fixed the error `Column ... is not under an aggregate function and not in GROUP BY` for aggregation with an IN expression. This bug appeared in version 18.1.0. ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) -- Fixed a bug in the `windowFunnel aggregate function` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2735). -- Fixed a bug in the `anyHeavy` aggregate function ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) -- Fixed server crash when using the `countArray()` aggregate function. - -#### Backward incompatible changes: {#backward-incompatible-changes-5} - -- Parameters for `Kafka` engine was changed from `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` to `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. If your tables use `kafka_schema` or `kafka_num_consumers` parameters, you have to manually edit the metadata files `path/metadata/database/table.sql` and add `kafka_row_delimiter` parameter with `''` value. - -## ClickHouse release 18.1 {#clickhouse-release-18-1} - -### ClickHouse release 18.1.0, 2018-07-23 {#clickhouse-release-18-1-0-2018-07-23} - -#### New features: {#new-features-9} - -- Support for the `ALTER TABLE t DELETE WHERE` query for non-replicated MergeTree tables ([\#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). -- Support for arbitrary types for the `uniq*` family of aggregate functions ([\#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). -- Support for arbitrary types in comparison operators ([\#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). -- The `users.xml` file allows setting a subnet mask in the format `10.0.0.1/255.255.255.0`. This is necessary for using masks for IPv6 networks with zeros in the middle ([\#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). -- Added the `arrayDistinct` function ([\#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). -- The SummingMergeTree engine can now work with AggregateFunction type columns ([Constantin S. Pan](https://github.com/ClickHouse/ClickHouse/pull/2566)). - -#### Improvements: {#improvements-9} - -- Changed the numbering scheme for release versions. Now the first part contains the year of release (A.D., Moscow timezone, minus 2000), the second part contains the number for major changes (increases for most releases), and the third part is the patch version. Releases are still backward compatible, unless otherwise stated in the changelog. -- Faster conversions of floating-point numbers to a string ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2664)). -- If some rows were skipped during an insert due to parsing errors (this is possible with the `input_allow_errors_num` and `input_allow_errors_ratio` settings enabled), the number of skipped rows is now written to the server log ([Leonardo Cecchi](https://github.com/ClickHouse/ClickHouse/pull/2669)). - -#### Bug fixes: {#bug-fixes-17} - -- Fixed the TRUNCATE command for temporary tables ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2624)). -- Fixed a rare deadlock in the ZooKeeper client library that occurred when there was a network error while reading the response ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). -- Fixed an error during a CAST to Nullable types ([\#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). -- Fixed the incorrect result of the `maxIntersection()` function when the boundaries of intervals coincided ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2657)). -- Fixed incorrect transformation of the OR expression chain in a function argument ([chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). -- Fixed performance degradation for queries containing `IN (subquery)` expressions inside another subquery ([\#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). -- Fixed incompatibility between servers with different versions in distributed queries that use a `CAST` function that isn’t in uppercase letters ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). -- Added missing quoting of identifiers for queries to an external DBMS ([\#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). - -#### Backward incompatible changes: {#backward-incompatible-changes-6} - -- Converting a string containing the number zero to DateTime does not work. Example: `SELECT toDateTime('0')`. This is also the reason that `DateTime DEFAULT '0'` does not work in tables, as well as `0` in dictionaries. Solution: replace `0` with `0000-00-00 00:00:00`. - -## ClickHouse release 1.1 {#clickhouse-release-1-1} - -### ClickHouse release 1.1.54394, 2018-07-12 {#clickhouse-release-1-1-54394-2018-07-12} - -#### New features: {#new-features-10} - -- Added the `histogram` aggregate function ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2521)). -- Now `OPTIMIZE TABLE ... FINAL` can be used without specifying partitions for `ReplicatedMergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2600)). - -#### Bug fixes: {#bug-fixes-18} - -- Fixed a problem with a very small timeout for sockets (one second) for reading and writing when sending and downloading replicated data, which made it impossible to download larger parts if there is a load on the network or disk (it resulted in cyclical attempts to download parts). This error occurred in version 1.1.54388. -- Fixed issues when using chroot in ZooKeeper if you inserted duplicate data blocks in the table. -- The `has` function now works correctly for an array with Nullable elements ([\#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). -- The `system.tables` table now works correctly when used in distributed queries. The `metadata_modification_time` and `engine_full` columns are now non-virtual. Fixed an error that occurred if only these columns were queried from the table. -- Fixed how an empty `TinyLog` table works after inserting an empty data block ([\#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). -- The `system.zookeeper` table works if the value of the node in ZooKeeper is NULL. - -### ClickHouse release 1.1.54390, 2018-07-06 {#clickhouse-release-1-1-54390-2018-07-06} - -#### New features: {#new-features-11} - -- Queries can be sent in `multipart/form-data` format (in the `query` field), which is useful if external data is also sent for query processing ([Olga Hvostikova](https://github.com/ClickHouse/ClickHouse/pull/2490)). -- Added the ability to enable or disable processing single or double quotes when reading data in CSV format. You can configure this in the `format_csv_allow_single_quotes` and `format_csv_allow_double_quotes` settings ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2574)). -- Now `OPTIMIZE TABLE ... FINAL` can be used without specifying the partition for non-replicated variants of `MergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2599)). - -#### Improvements: {#improvements-10} - -- Improved performance, reduced memory consumption, and correct memory consumption tracking with use of the IN operator when a table index could be used ([\#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). -- Removed redundant checking of checksums when adding a data part. This is important when there are a large number of replicas, because in these cases the total number of checks was equal to N^2. -- Added support for `Array(Tuple(...))` arguments for the `arrayEnumerateUniq` function ([\#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). -- Added `Nullable` support for the `runningDifference` function ([\#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). -- Improved query analysis performance when there is a very large number of expressions ([\#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). -- Faster selection of data parts for merging in `ReplicatedMergeTree` tables. Faster recovery of the ZooKeeper session ([\#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). -- The `format_version.txt` file for `MergeTree` tables is re-created if it is missing, which makes sense if ClickHouse is launched after copying the directory structure without files ([Ciprian Hacman](https://github.com/ClickHouse/ClickHouse/pull/2593)). - -#### Bug fixes: {#bug-fixes-19} - -- Fixed a bug when working with ZooKeeper that could make it impossible to recover the session and readonly states of tables before restarting the server. -- Fixed a bug when working with ZooKeeper that could result in old nodes not being deleted if the session is interrupted. -- Fixed an error in the `quantileTDigest` function for Float arguments (this bug was introduced in version 1.1.54388) ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2553)). -- Fixed a bug in the index for MergeTree tables if the primary key column is located inside the function for converting types between signed and unsigned integers of the same size ([\#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). -- Fixed segfault if `macros` are used but they aren’t in the config file ([\#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). -- Fixed switching to the default database when reconnecting the client ([\#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). -- Fixed a bug that occurred when the `use_index_for_in_with_subqueries` setting was disabled. - -#### Security fix: {#security-fix-1} - -- Sending files is no longer possible when connected to MySQL (`LOAD DATA LOCAL INFILE`). - -### ClickHouse release 1.1.54388, 2018-06-28 {#clickhouse-release-1-1-54388-2018-06-28} - -#### New features: {#new-features-12} - -- Support for the `ALTER TABLE t DELETE WHERE` query for replicated tables. Added the `system.mutations` table to track progress of this type of queries. -- Support for the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` query for \*MergeTree tables. -- Support for the `TRUNCATE TABLE` query ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2260)) -- Several new `SYSTEM` queries for replicated tables (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). -- Added the ability to write to a table with the MySQL engine and the corresponding table function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2294)). -- Added the `url()` table function and the `URL` table engine ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2501)). -- Added the `windowFunnel` aggregate function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2352)). -- New `startsWith` and `endsWith` functions for strings ([Vadim Plakhtinsky](https://github.com/ClickHouse/ClickHouse/pull/2429)). -- The `numbers()` table function now allows you to specify the offset ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2535)). -- The password to `clickhouse-client` can be entered interactively. -- Server logs can now be sent to syslog ([Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2459)). -- Support for logging in dictionaries with a shared library source ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2472)). -- Support for custom CSV delimiters ([Ivan Zhukov](https://github.com/ClickHouse/ClickHouse/pull/2263)) -- Added the `date_time_input_format` setting. If you switch this setting to `'best_effort'`, DateTime values will be read in a wide range of formats. -- Added the `clickhouse-obfuscator` utility for data obfuscation. Usage example: publishing data used in performance tests. - -#### Experimental features: {#experimental-features-2} - -- Added the ability to calculate `and` arguments only where they are needed ([Anastasia Tsarkova](https://github.com/ClickHouse/ClickHouse/pull/2272)) -- JIT compilation to native code is now available for some expressions ([pyos](https://github.com/ClickHouse/ClickHouse/pull/2277)). - -#### Bug fixes: {#bug-fixes-20} - -- Duplicates no longer appear for a query with `DISTINCT` and `ORDER BY`. -- Queries with `ARRAY JOIN` and `arrayFilter` no longer return an incorrect result. -- Fixed an error when reading an array column from a Nested structure ([\#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). -- Fixed an error when analyzing queries with a HAVING clause like `HAVING tuple IN (...)`. -- Fixed an error when analyzing queries with recursive aliases. -- Fixed an error when reading from ReplacingMergeTree with a condition in PREWHERE that filters all rows ([\#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). -- User profile settings were not applied when using sessions in the HTTP interface. -- Fixed how settings are applied from the command line parameters in clickhouse-local. -- The ZooKeeper client library now uses the session timeout received from the server. -- Fixed a bug in the ZooKeeper client library when the client waited for the server response longer than the timeout. -- Fixed pruning of parts for queries with conditions on partition key columns ([\#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). -- Merges are now possible after `CLEAR COLUMN IN PARTITION` ([\#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). -- Type mapping in the ODBC table function has been fixed ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2268)). -- Type comparisons have been fixed for `DateTime` with and without the time zone ([Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2400)). -- Fixed syntactic parsing and formatting of the `CAST` operator. -- Fixed insertion into a materialized view for the Distributed table engine ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). -- Fixed a race condition when writing data from the `Kafka` engine to materialized views ([Yangkuan Liu](https://github.com/ClickHouse/ClickHouse/pull/2448)). -- Fixed SSRF in the remote() table function. -- Fixed exit behavior of `clickhouse-client` in multiline mode ([\#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). - -#### Improvements: {#improvements-11} - -- Background tasks in replicated tables are now performed in a thread pool instead of in separate threads ([Silviu Caragea](https://github.com/ClickHouse/ClickHouse/pull/1722)). -- Improved LZ4 compression performance. -- Faster analysis for queries with a large number of JOINs and sub-queries. -- The DNS cache is now updated automatically when there are too many network errors. -- Table inserts no longer occur if the insert into one of the materialized views is not possible because it has too many parts. -- Corrected the discrepancy in the event counters `Query`, `SelectQuery`, and `InsertQuery`. -- Expressions like `tuple IN (SELECT tuple)` are allowed if the tuple types match. -- A server with replicated tables can start even if you haven’t configured ZooKeeper. -- When calculating the number of available CPU cores, limits on cgroups are now taken into account ([Atri Sharma](https://github.com/ClickHouse/ClickHouse/pull/2325)). -- Added chown for config directories in the systemd config file ([Mikhail Shiryaev](https://github.com/ClickHouse/ClickHouse/pull/2421)). - -#### Build changes: {#build-changes-4} - -- The gcc8 compiler can be used for builds. -- Added the ability to build llvm from submodule. -- The version of the librdkafka library has been updated to v0.11.4. -- Added the ability to use the system libcpuid library. The library version has been updated to 0.4.0. -- Fixed the build using the vectorclass library ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). -- Cmake now generates files for ninja by default (like when using `-G Ninja`). -- Added the ability to use the libtinfo library instead of libtermcap ([Georgy Kondratiev](https://github.com/ClickHouse/ClickHouse/pull/2519)). -- Fixed a header file conflict in Fedora Rawhide ([\#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). - -#### Backward incompatible changes: {#backward-incompatible-changes-7} - -- Removed escaping in `Vertical` and `Pretty*` formats and deleted the `VerticalRaw` format. -- If servers with version 1.1.54388 (or newer) and servers with an older version are used simultaneously in a distributed query and the query has the `cast(x, 'Type')` expression without the `AS` keyword and doesn’t have the word `cast` in uppercase, an exception will be thrown with a message like `Not found column cast(0, 'UInt8') in block`. Solution: Update the server on the entire cluster. - -### ClickHouse release 1.1.54385, 2018-06-01 {#clickhouse-release-1-1-54385-2018-06-01} - -#### Bug fixes: {#bug-fixes-21} - -- Fixed an error that in some cases caused ZooKeeper operations to block. - -### ClickHouse release 1.1.54383, 2018-05-22 {#clickhouse-release-1-1-54383-2018-05-22} - -#### Bug fixes: {#bug-fixes-22} - -- Fixed a slowdown of replication queue if a table has many replicas. - -### ClickHouse release 1.1.54381, 2018-05-14 {#clickhouse-release-1-1-54381-2018-05-14} - -#### Bug fixes: {#bug-fixes-23} - -- Fixed a nodes leak in ZooKeeper when ClickHouse loses connection to ZooKeeper server. - -### ClickHouse release 1.1.54380, 2018-04-21 {#clickhouse-release-1-1-54380-2018-04-21} - -#### New features: {#new-features-13} - -- Added the table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: ``` ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10" ```. - -#### Improvements: {#improvements-12} - -- Subqueries can be wrapped in `()` brackets to enhance query readability. For example: `(SELECT 1) UNION ALL (SELECT 1)`. -- Simple `SELECT` queries from the `system.processes` table are not included in the `max_concurrent_queries` limit. - -#### Bug fixes: {#bug-fixes-24} - -- Fixed incorrect behavior of the `IN` operator when select from `MATERIALIZED VIEW`. -- Fixed incorrect filtering by partition index in expressions like `partition_key_column IN (...)`. -- Fixed inability to execute `OPTIMIZE` query on non-leader replica if `REANAME` was performed on the table. -- Fixed the authorization error when executing `OPTIMIZE` or `ALTER` queries on a non-leader replica. -- Fixed freezing of `KILL QUERY`. -- Fixed an error in ZooKeeper client library which led to loss of watches, freezing of distributed DDL queue, and slowdowns in the replication queue if a non-empty `chroot` prefix is used in the ZooKeeper configuration. - -#### Backward incompatible changes: {#backward-incompatible-changes-8} - -- Removed support for expressions like `(a, b) IN (SELECT (a, b))` (you can use the equivalent expression `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undetermined `WHERE` filtering or caused errors. - -### ClickHouse release 1.1.54378, 2018-04-16 {#clickhouse-release-1-1-54378-2018-04-16} - -#### New features: {#new-features-14} - -- Logging level can be changed without restarting the server. -- Added the `SHOW CREATE DATABASE` query. -- The `query_id` can be passed to `clickhouse-client` (elBroom). -- New setting: `max_network_bandwidth_for_all_users`. -- Added support for `ALTER TABLE ... PARTITION ...` for `MATERIALIZED VIEW`. -- Added information about the size of data parts in uncompressed form in the system table. -- Server-to-server encryption support for distributed tables (`1` in the replica config in ``). -- Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` -- Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server’s display name can be changed. It’s also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). -- Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson) -- When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was canceled` exception instead of an incomplete result. - -#### Improvements: {#improvements-13} - -- `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. -- `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. -- A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). -- The `lengthUTF8` function runs faster (zhang2014). -- Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. -- The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket’s `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). -- More robust crash recovery for asynchronous insertion into `Distributed` tables. -- The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). - -#### Bug fixes: {#bug-fixes-25} - -- Fixed an error with `IN` when the left side of the expression is `Nullable`. -- Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. -- The `max_execution_time` limit now works correctly with distributed queries. -- Fixed errors when calculating the size of composite columns in the `system.columns` table. -- Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS.` -- Fixed errors in `StorageKafka` (\#\#2075) -- Fixed server crashes from invalid arguments of certain aggregate functions. -- Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -- `Too many parts` state is less likely to happen when inserting into aggregated materialized views (\#\#2084). -- Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. -- Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. -- `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. -- Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. - -#### Build changes: {#build-changes-5} - -- The build supports `ninja` instead of `make` and uses `ninja` by default for building releases. -- Renamed packages: `clickhouse-server-base` in `clickhouse-common-static`; `clickhouse-server-common` in `clickhouse-server`; `clickhouse-common-dbg` in `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. - -#### Backward incompatible changes: {#backward-incompatible-changes-9} - -- Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as “at least one `arr` element belongs to the `set`”. To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. -- Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. - -### ClickHouse release 1.1.54370, 2018-03-16 {#clickhouse-release-1-1-54370-2018-03-16} - -#### New features: {#new-features-15} - -- Added the `system.macros` table and auto updating of macros when the config file is changed. -- Added the `SYSTEM RELOAD CONFIG` query. -- Added the `maxIntersections(left_col, right_col)` aggregate function, which returns the maximum number of simultaneously intersecting intervals `[left; right]`. The `maxIntersectionsPosition(left, right)` function returns the beginning of the “maximum” interval. ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2012)). - -#### Improvements: {#improvements-14} - -- When inserting data in a `Replicated` table, fewer requests are made to `ZooKeeper` (and most of the user-level errors have disappeared from the `ZooKeeper` log). -- Added the ability to create aliases for data sets. Example: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. - -#### Bug fixes: {#bug-fixes-26} - -- Fixed the `Illegal PREWHERE` error when reading from Merge tables for `Distributed`tables. -- Added fixes that allow you to start clickhouse-server in IPv4-only Docker containers. -- Fixed a race condition when reading from system `system.parts_columns tables.` -- Removed double buffering during a synchronous insert to a `Distributed` table, which could have caused the connection to timeout. -- Fixed a bug that caused excessively long waits for an unavailable replica before beginning a `SELECT` query. -- Fixed incorrect dates in the `system.parts` table. -- Fixed a bug that made it impossible to insert data in a `Replicated` table if `chroot` was non-empty in the configuration of the `ZooKeeper` cluster. -- Fixed the vertical merging algorithm for an empty `ORDER BY` table. -- Restored the ability to use dictionaries in queries to remote tables, even if these dictionaries are not present on the requestor server. This functionality was lost in release 1.1.54362. -- Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. -- Removed extraneous error-level logging of `Not found column ... in block`. - -### Clickhouse Release 1.1.54362, 2018-03-11 {#clickhouse-release-1-1-54362-2018-03-11} - -#### New features: {#new-features-16} - -- Aggregation without `GROUP BY` for an empty set (such as `SELECT count(*) FROM table WHERE 0`) now returns a result with one row with null values for aggregate functions, in compliance with the SQL standard. To restore the old behavior (return an empty result), set `empty_result_for_aggregation_by_empty_set` to 1. -- Added type conversion for `UNION ALL`. Different alias names are allowed in `SELECT` positions in `UNION ALL`, in compliance with the SQL standard. -- Arbitrary expressions are supported in `LIMIT BY` clauses. Previously, it was only possible to use columns resulting from `SELECT`. -- An index of `MergeTree` tables is used when `IN` is applied to a tuple of expressions from the columns of the primary key. Example: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova). -- Added the `clickhouse-copier` tool for copying between clusters and resharding data (beta). -- Added consistent hashing functions: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. They can be used as a sharding key in order to reduce the amount of network traffic during subsequent reshardings. -- Added functions: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. -- Added the `arrayCumSum` function (Javi Santana). -- Added the `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, and `parseDateTimeBestEffortOrNull` functions to read the DateTime from a string containing text in a wide variety of possible formats. -- Data can be partially reloaded from external dictionaries during updating (load just the records in which the value of the specified field greater than in the previous download) (Arsen Hakobyan). -- Added the `cluster` table function. Example: `cluster(cluster_name, db, table)`. The `remote` table function can accept the cluster name as the first argument, if it is specified as an identifier. -- The `remote` and `cluster` table functions can be used in `INSERT` queries. -- Added the `create_table_query` and `engine_full` virtual columns to the `system.tables`table . The `metadata_modification_time` column is virtual. -- Added the `data_path` and `metadata_path` columns to `system.tables`and`system.databases` tables, and added the `path` column to the `system.parts` and `system.parts_columns` tables. -- Added additional information about merges in the `system.part_log` table. -- An arbitrary partitioning key can be used for the `system.query_log` table (Kirill Shvakov). -- The `SHOW TABLES` query now also shows temporary tables. Added temporary tables and the `is_temporary` column to `system.tables` (zhang2014). -- Added `DROP TEMPORARY TABLE` and `EXISTS TEMPORARY TABLE` queries (zhang2014). -- Support for `SHOW CREATE TABLE` for temporary tables (zhang2014). -- Added the `system_profile` configuration parameter for the settings used by internal processes. -- Support for loading `object_id` as an attribute in `MongoDB` dictionaries (Pavel Litvinenko). -- Reading `null` as the default value when loading data for an external dictionary with the `MongoDB` source (Pavel Litvinenko). -- Reading `DateTime` values in the `Values` format from a Unix timestamp without single quotes. -- Failover is supported in `remote` table functions for cases when some of the replicas are missing the requested table. -- Configuration settings can be overridden in the command line when you run `clickhouse-server`. Example: `clickhouse-server -- --logger.level=information`. -- Implemented the `empty` function from a `FixedString` argument: the function returns 1 if the string consists entirely of null bytes (zhang2014). -- Added the `listen_try`configuration parameter for listening to at least one of the listen addresses without quitting, if some of the addresses can’t be listened to (useful for systems with disabled support for IPv4 or IPv6). -- Added the `VersionedCollapsingMergeTree` table engine. -- Support for rows and arbitrary numeric types for the `library` dictionary source. -- `MergeTree` tables can be used without a primary key (you need to specify `ORDER BY tuple()`). -- A `Nullable` type can be `CAST` to a non-`Nullable` type if the argument is not `NULL`. -- `RENAME TABLE` can be performed for `VIEW`. -- Added the `throwIf` function. -- Added the `odbc_default_field_size` option, which allows you to extend the maximum size of the value loaded from an ODBC source (by default, it is 1024). -- The `system.processes` table and `SHOW PROCESSLIST` now have the `is_cancelled` and `peak_memory_usage` columns. - -#### Improvements: {#improvements-15} - -- Limits and quotas on the result are no longer applied to intermediate data for `INSERT SELECT` queries or for `SELECT` subqueries. -- Fewer false triggers of `force_restore_data` when checking the status of `Replicated` tables when the server starts. -- Added the `allow_distributed_ddl` option. -- Nondeterministic functions are not allowed in expressions for `MergeTree` table keys. -- Files with substitutions from `config.d` directories are loaded in alphabetical order. -- Improved performance of the `arrayElement` function in the case of a constant multidimensional array with an empty array as one of the elements. Example: `[[1], []][x]`. -- The server starts faster now when using configuration files with very large substitutions (for instance, very large lists of IP networks). -- When running a query, table valued functions run once. Previously, `remote` and `mysql` table valued functions performed the same query twice to retrieve the table structure from a remote server. -- The `MkDocs` documentation generator is used. -- When you try to delete a table column that `DEFAULT`/`MATERIALIZED` expressions of other columns depend on, an exception is thrown (zhang2014). -- Added the ability to parse an empty line in text formats as the number 0 for `Float` data types. This feature was previously available but was lost in release 1.1.54342. -- `Enum` values can be used in `min`, `max`, `sum` and some other functions. In these cases, it uses the corresponding numeric values. This feature was previously available but was lost in the release 1.1.54337. -- Added `max_expanded_ast_elements` to restrict the size of the AST after recursively expanding aliases. - -#### Bug fixes: {#bug-fixes-27} - -- Fixed cases when unnecessary columns were removed from subqueries in error, or not removed from subqueries containing `UNION ALL`. -- Fixed a bug in merges for `ReplacingMergeTree` tables. -- Fixed synchronous insertions in `Distributed` tables (`insert_distributed_sync = 1`). -- Fixed segfault for certain uses of `FULL` and `RIGHT JOIN` with duplicate columns in subqueries. -- Fixed segfault for certain uses of `replace_running_query` and `KILL QUERY`. -- Fixed the order of the `source` and `last_exception` columns in the `system.dictionaries` table. -- Fixed a bug when the `DROP DATABASE` query did not delete the file with metadata. -- Fixed the `DROP DATABASE` query for `Dictionary` databases. -- Fixed the low precision of `uniqHLL12` and `uniqCombined` functions for cardinalities greater than 100 million items (Alex Bocharov). -- Fixed the calculation of implicit default values when necessary to simultaneously calculate default explicit expressions in `INSERT` queries (zhang2014). -- Fixed a rare case when a query to a `MergeTree` table couldn’t finish (chenxing-xc). -- Fixed a crash that occurred when running a `CHECK` query for `Distributed` tables if all shards are local (chenxing.xc). -- Fixed a slight performance regression with functions that use regular expressions. -- Fixed a performance regression when creating multidimensional arrays from complex expressions. -- Fixed a bug that could cause an extra `FORMAT` section to appear in an `.sql` file with metadata. -- Fixed a bug that caused the `max_table_size_to_drop` limit to apply when trying to delete a `MATERIALIZED VIEW` looking at an explicitly specified table. -- Fixed incompatibility with old clients (old clients were sometimes sent data with the `DateTime('timezone')` type, which they do not understand). -- Fixed a bug when reading `Nested` column elements of structures that were added using `ALTER` but that are empty for the old partitions, when the conditions for these columns moved to `PREWHERE`. -- Fixed a bug when filtering tables by virtual `_table` columns in queries to `Merge` tables. -- Fixed a bug when using `ALIAS` columns in `Distributed` tables. -- Fixed a bug that made dynamic compilation impossible for queries with aggregate functions from the `quantile` family. -- Fixed a race condition in the query execution pipeline that occurred in very rare cases when using `Merge` tables with a large number of tables, and when using `GLOBAL` subqueries. -- Fixed a crash when passing arrays of different sizes to an `arrayReduce` function when using aggregate functions from multiple arguments. -- Prohibited the use of queries with `UNION ALL` in a `MATERIALIZED VIEW`. -- Fixed an error during initialization of the `part_log` system table when the server starts (by default, `part_log` is disabled). - -#### Backward incompatible changes: {#backward-incompatible-changes-10} - -- Removed the `distributed_ddl_allow_replicated_alter` option. This behavior is enabled by default. -- Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. -- Removed the `UnsortedMergeTree` engine. - -### Clickhouse Release 1.1.54343, 2018-02-05 {#clickhouse-release-1-1-54343-2018-02-05} - -- Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. -- Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. -- Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. - -### Clickhouse Release 1.1.54342, 2018-01-22 {#clickhouse-release-1-1-54342-2018-01-22} - -This release contains bug fixes for the previous release 1.1.54337: - -- Fixed a regression in 1.1.54337: if the default user has readonly access, then the server refuses to start up with the message `Cannot create database in readonly mode`. -- Fixed a regression in 1.1.54337: on systems with systemd, logs are always written to syslog regardless of the configuration; the watchdog script still uses init.d. -- Fixed a regression in 1.1.54337: wrong default configuration in the Docker image. -- Fixed nondeterministic behavior of GraphiteMergeTree (you can see it in log messages `Data after merge is not byte-identical to the data on another replicas`). -- Fixed a bug that may lead to inconsistent merges after OPTIMIZE query to Replicated tables (you may see it in log messages `Part ... intersects the previous part`). -- Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). -- Fixed a bug in implementation of NULL. - -### Clickhouse Release 1.1.54337, 2018-01-18 {#clickhouse-release-1-1-54337-2018-01-18} - -#### New features: {#new-features-17} - -- Added support for storage of multi-dimensional arrays and tuples (`Tuple` data type) in tables. -- Support for table functions for `DESCRIBE` and `INSERT` queries. Added support for subqueries in `DESCRIBE`. Examples: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Support for `INSERT INTO TABLE` in addition to `INSERT INTO`. -- Improved support for time zones. The `DateTime` data type can be annotated with the timezone that is used for parsing and formatting in text formats. Example: `DateTime('Europe/Moscow')`. When timezones are specified in functions for `DateTime` arguments, the return type will track the timezone, and the value will be displayed as expected. -- Added the functions `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. The `toRelativeHour`/`Minute`/`Second` functions can take a value of type `Date` as an argument. The `now` function name is case-sensitive. -- Added the `toStartOfFifteenMinutes` function (Kirill Shvakov). -- Added the `clickhouse format` tool for formatting queries. -- Added the `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` format. Schema files can be located only in the specified directory. -- Added support for config substitutions (`incl` and `conf.d`) for configuration of external dictionaries and models (Pavel Yakunin). -- Added a column with documentation for the `system.settings` table (Kirill Shvakov). -- Added the `system.parts_columns` table with information about column sizes in each data part of `MergeTree` tables. -- Added the `system.models` table with information about loaded `CatBoost` machine learning models. -- Added the `mysql` and `odbc` table function and corresponding `MySQL` and `ODBC` table engines for accessing remote databases. This functionality is in the beta stage. -- Added the possibility to pass an argument of type `AggregateFunction` for the `groupArray` aggregate function (so you can create an array of states of some aggregate function). -- Removed restrictions on various combinations of aggregate function combinators. For example, you can use `avgForEachIf` as well as `avgIfForEach` aggregate functions, which have different behaviors. -- The `-ForEach` aggregate function combinator is extended for the case of aggregate functions of multiple arguments. -- Added support for aggregate functions of `Nullable` arguments even for cases when the function returns a non-`Nullable` result (added with the contribution of Silviu Caragea). Example: `groupArray`, `groupUniqArray`, `topK`. -- Added the `max_client_network_bandwidth` for `clickhouse-client` (Kirill Shvakov). -- Users with the `readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT…) (Kirill Shvakov). -- Added support for using multiple consumers with the `Kafka` engine. Extended configuration options for `Kafka` (Marek Vavruša). -- Added the `intExp3` and `intExp4` functions. -- Added the `sumKahan` aggregate function. -- Added the to \* Number\* OrNull functions, where \* Number\* is a numeric type. -- Added support for `WITH` clauses for an `INSERT SELECT` query (author: zhang2014). -- Added settings: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. In particular, these settings are used for downloading data parts for replication. Changing these settings allows for faster failover if the network is overloaded. -- Added support for `ALTER` for tables of type `Null` (Anastasiya Tsarkova). -- The `reinterpretAsString` function is extended for all data types that are stored contiguously in memory. -- Added the `--silent` option for the `clickhouse-local` tool. It suppresses printing query execution info in stderr. -- Added support for reading values of type `Date` from text in a format where the month and/or day of the month is specified using a single digit instead of two digits (Amos Bird). - -#### Performance optimizations: {#performance-optimizations} - -- Improved performance of aggregate functions `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` from string arguments. -- Improved performance of the functions `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. -- Improved performance of parsing and formatting `Date` and `DateTime` type values in text format. -- Improved performance and precision of parsing floating point numbers. -- Lowered memory usage for `JOIN` in the case when the left and right parts have columns with identical names that are not contained in `USING` . -- Improved performance of aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` by reducing computational stability. The old functions are available under the names `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. - -#### Bug fixes: {#bug-fixes-28} - -- Fixed data deduplication after running a `DROP` or `DETACH PARTITION` query. In the previous version, dropping a partition and inserting the same data again was not working because inserted blocks were considered duplicates. -- Fixed a bug that could lead to incorrect interpretation of the `WHERE` clause for `CREATE MATERIALIZED VIEW` queries with `POPULATE` . -- Fixed a bug in using the `root_path` parameter in the `zookeeper_servers` configuration. -- Fixed unexpected results of passing the `Date` argument to `toStartOfDay` . -- Fixed the `addMonths` and `subtractMonths` functions and the arithmetic for `INTERVAL n MONTH` in cases when the result has the previous year. -- Added missing support for the `UUID` data type for `DISTINCT` , `JOIN` , and `uniq` aggregate functions and external dictionaries (Evgeniy Ivanov). Support for `UUID` is still incomplete. -- Fixed `SummingMergeTree` behavior in cases when the rows summed to zero. -- Various fixes for the `Kafka` engine (Marek Vavruša). -- Fixed incorrect behavior of the `Join` table engine (Amos Bird). -- Fixed incorrect allocator behavior under FreeBSD and OS X. -- The `extractAll` function now supports empty matches. -- Fixed an error that blocked usage of `libressl` instead of `openssl` . -- Fixed the `CREATE TABLE AS SELECT` query from temporary tables. -- Fixed non-atomicity of updating the replication queue. This could lead to replicas being out of sync until the server restarts. -- Fixed possible overflow in `gcd` , `lcm` and `modulo` (`%` operator) (Maks Skorokhod). -- `-preprocessed` files are now created after changing `umask` (`umask` can be changed in the config). -- Fixed a bug in the background check of parts (`MergeTreePartChecker` ) when using a custom partition key. -- Fixed parsing of tuples (values of the `Tuple` data type) in text formats. -- Improved error messages about incompatible types passed to `multiIf` , `array` and some other functions. -- Redesigned support for `Nullable` types. Fixed bugs that may lead to a server crash. Fixed almost all other bugs related to `NULL` support: incorrect type conversions in INSERT SELECT, insufficient support for Nullable in HAVING and PREWHERE, `join_use_nulls` mode, Nullable types as arguments of `OR` operator, etc. -- Fixed various bugs related to internal semantics of data types. Examples: unnecessary summing of `Enum` type fields in `SummingMergeTree` ; alignment of `Enum` types in `Pretty` formats, etc. -- Stricter checks for allowed combinations of composite columns. -- Fixed the overflow when specifying a very large parameter for the `FixedString` data type. -- Fixed a bug in the `topK` aggregate function in a generic case. -- Added the missing check for equality of array sizes in arguments of n-ary variants of aggregate functions with an `-Array` combinator. -- Fixed a bug in `--pager` for `clickhouse-client` (author: ks1322). -- Fixed the precision of the `exp10` function. -- Fixed the behavior of the `visitParamExtract` function for better compliance with documentation. -- Fixed the crash when incorrect data types are specified. -- Fixed the behavior of `DISTINCT` in the case when all columns are constants. -- Fixed query formatting in the case of using the `tupleElement` function with a complex constant expression as the tuple element index. -- Fixed a bug in `Dictionary` tables for `range_hashed` dictionaries. -- Fixed a bug that leads to excessive rows in the result of `FULL` and `RIGHT JOIN` (Amos Bird). -- Fixed a server crash when creating and removing temporary files in `config.d` directories during config reload. -- Fixed the `SYSTEM DROP DNS CACHE` query: the cache was flushed but addresses of cluster nodes were not updated. -- Fixed the behavior of `MATERIALIZED VIEW` after executing `DETACH TABLE` for the table under the view (Marek Vavruša). - -#### Build improvements: {#build-improvements-4} - -- The `pbuilder` tool is used for builds. The build process is almost completely independent of the build host environment. -- A single build is used for different OS versions. Packages and binaries have been made compatible with a wide range of Linux systems. -- Added the `clickhouse-test` package. It can be used to run functional tests. -- The source tarball can now be published to the repository. It can be used to reproduce the build without using GitHub. -- Added limited integration with Travis CI. Due to limits on build time in Travis, only the debug build is tested and a limited subset of tests are run. -- Added support for `Cap'n'Proto` in the default build. -- Changed the format of documentation sources from `Restricted Text` to `Markdown`. -- Added support for `systemd` (Vladimir Smirnov). It is disabled by default due to incompatibility with some OS images and can be enabled manually. -- For dynamic code generation, `clang` and `lld` are embedded into the `clickhouse` binary. They can also be invoked as `clickhouse clang` and `clickhouse lld` . -- Removed usage of GNU extensions from the code. Enabled the `-Wextra` option. When building with `clang` the default is `libc++` instead of `libstdc++`. -- Extracted `clickhouse_parsers` and `clickhouse_common_io` libraries to speed up builds of various tools. - -#### Backward incompatible changes: {#backward-incompatible-changes-11} - -- The format for marks in `Log` type tables that contain `Nullable` columns was changed in a backward incompatible way. If you have these tables, you should convert them to the `TinyLog` type before starting up the new server version. To do this, replace `ENGINE = Log` with `ENGINE = TinyLog` in the corresponding `.sql` file in the `metadata` directory. If your table doesn’t have `Nullable` columns or if the type of your table is not `Log`, then you don’t need to do anything. -- Removed the `experimental_allow_extended_storage_definition_syntax` setting. Now this feature is enabled by default. -- The `runningIncome` function was renamed to `runningDifferenceStartingWithFirstvalue` to avoid confusion. -- Removed the `FROM ARRAY JOIN arr` syntax when ARRAY JOIN is specified directly after FROM with no table (Amos Bird). -- Removed the `BlockTabSeparated` format that was used solely for demonstration purposes. -- Changed the state format for aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. If you have stored states of these aggregate functions in tables (using the `AggregateFunction` data type or materialized views with corresponding states), please write to clickhouse-feedback@yandex-team.com. -- In previous server versions there was an undocumented feature: if an aggregate function depends on parameters, you can still specify it without parameters in the AggregateFunction data type. Example: `AggregateFunction(quantiles, UInt64)` instead of `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. This feature was lost. Although it was undocumented, we plan to support it again in future releases. -- Enum data types cannot be used in min/max aggregate functions. This ability will be returned in the next release. - -#### Please note when upgrading: {#please-note-when-upgrading} - -- When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message `unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. -- If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. - -## [Changelog for 2017](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2017.md) diff --git a/docs/ru/changelog/2019.md b/docs/ru/changelog/2019.md deleted file mode 100644 index 01a0756af14..00000000000 --- a/docs/ru/changelog/2019.md +++ /dev/null @@ -1,2071 +0,0 @@ ---- -en_copy: true ---- - -## ClickHouse release v19.17 {#clickhouse-release-v19-17} - -### ClickHouse release v19.17.6.36, 2019-12-27 {#clickhouse-release-v19-17-6-36-2019-12-27} - -#### Bug Fix {#bug-fix} - -- Fixed potential buffer overflow in decompress. Malicious user can pass fabricated compressed data that could cause read after buffer. This issue was found by Eldar Zaitov from Yandex information security team. [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed possible server crash (`std::terminate`) when the server cannot send or write data in JSON or XML format with values of String data type (that require UTF-8 validation) or when compressing result data with Brotli algorithm or in some other rare cases. [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed dictionaries with source from a clickhouse `VIEW`, now reading such dictionaries doesn’t cause the error `There is no query`. [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed checking if a client host is allowed by host\_regexp specified in users.xml. [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Vitaly Baranov](https://github.com/vitlibar)) -- `RENAME TABLE` for a distributed table now renames the folder containing inserted data before sending to shards. This fixes an issue with successive renames `tableA->tableB`, `tableC->tableA`. [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) -- `range_hashed` external dictionaries created by DDL queries now allow ranges of arbitrary numeric types. [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([alesapin](https://github.com/alesapin)) -- Fixed `INSERT INTO table SELECT ... FROM mysql(...)` table function. [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) -- Fixed segfault in `INSERT INTO TABLE FUNCTION file()` while inserting into a file which doesn’t exist. Now in this case file would be created and then insert would be processed. [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed bitmapAnd error when intersecting an aggregated bitmap and a scalar bitmap. [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Yue Huang](https://github.com/moon03432)) -- Fixed segfault when `EXISTS` query was used without `TABLE` or `DICTIONARY` qualifier, just like `EXISTS t`. [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed return type for functions `rand` and `randConstant` in case of nullable argument. Now functions always return `UInt32` and never `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed `DROP DICTIONARY IF EXISTS db.dict`, now it doesn’t throw exception if `db` doesn’t exist. [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Vitaly Baranov](https://github.com/vitlibar)) -- If a table wasn’t completely dropped because of server crash, the server will try to restore and load it [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) -- Fixed a trivial count query for a distributed table if there are more than two shard local table. [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) -- Fixed bug that lead to a data race in DB::BlockStreamProfileInfo::calculateRowsBeforeLimit() [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Alexander Kazakov](https://github.com/Akazz)) -- Fixed `ALTER table MOVE part` executed immediately after merging the specified part, which could cause moving a part which the specified part merged into. Now it correctly moves the specified part. [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Expressions for dictionaries can be specified as strings now. This is useful for calculation of attributes while extracting data from non-ClickHouse sources because it allows to use non-ClickHouse syntax for those expressions. [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([alesapin](https://github.com/alesapin)) -- Fixed a very rare race in `clickhouse-copier` because of an overflow in ZXid. [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) -- Fixed the bug when after the query failed (due to “Too many simultaneous queries” for example) it would not read external tables info, and the - next request would interpret this info as the beginning of the next query causing an error like `Unknown packet from client`. [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) -- Avoid null dereference after “Unknown packet X from server” [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) -- Restore support of all ICU locales, add the ability to apply collations for constant expressions and add language name to system.collations table. [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([alesapin](https://github.com/alesapin)) -- Number of streams for read from `StorageFile` and `StorageHDFS` is now limited, to avoid exceeding the memory limit. [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin)) -- Fixed `CHECK TABLE` query for `*MergeTree` tables without key. [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([alesapin](https://github.com/alesapin)) -- Removed the mutation number from a part name in case there were no mutations. This removing improved the compatibility with older versions. [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([alesapin](https://github.com/alesapin)) -- Fixed the bug that mutations are skipped for some attached parts due to their data\_version are larger than the table mutation version. [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Yu](https://github.com/yuzhichang)) -- Allow starting the server with redundant copies of parts after moving them to another device. [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed the error “Sizes of columns doesn’t match” that might appear when using aggregate function columns. [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) -- Now an exception will be thrown in case of using WITH TIES alongside LIMIT BY. And now it’s possible to use TOP with LIMIT BY. [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Fix dictionary reload if it has `invalidate_query`, which stopped updates and some exception on previous update tries. [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([alesapin](https://github.com/alesapin)) - -### ClickHouse release v19.17.4.11, 2019-11-22 {#clickhouse-release-v19-17-4-11-2019-11-22} - -#### Backward Incompatible Change {#backward-incompatible-change} - -- Using column instead of AST to store scalar subquery results for better performance. Setting `enable_scalar_subquery_optimization` was added in 19.17 and it was enabled by default. It leads to errors like [this](https://github.com/ClickHouse/ClickHouse/issues/7851) during upgrade to 19.17.2 or 19.17.3 from previous versions. This setting was disabled by default in 19.17.4, to make possible upgrading from 19.16 and older versions without errors. [\#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([Amos Bird](https://github.com/amosbird)) - -#### New Feature {#new-feature} - -- Add the ability to create dictionaries with DDL queries. [\#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([alesapin](https://github.com/alesapin)) -- Make `bloom_filter` type of index supporting `LowCardinality` and `Nullable` [\#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [\#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Add function `isValidJSON` to check that passed string is a valid json. [\#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [\#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Vdimir](https://github.com/Vdimir)) -- Implement `arrayCompact` function [\#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([Memo](https://github.com/Joeywzr)) -- Created function `hex` for Decimal numbers. It works like `hex(reinterpretAsString())`, but doesn’t delete last zero bytes. [\#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([Mikhail Korotov](https://github.com/millb)) -- Add `arrayFill` and `arrayReverseFill` functions, which replace elements by other elements in front/back of them in the array. [\#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) -- Add `CRC32IEEE()`/`CRC64()` support [\#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Azat Khuzhin](https://github.com/azat)) -- Implement `char` function similar to one in [mysql](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [\#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([sundyli](https://github.com/sundy-li)) -- Add `bitmapTransform` function. It transforms an array of values in a bitmap to another array of values, the result is a new bitmap [\#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([Zhichang Yu](https://github.com/yuzhichang)) -- Implemented `javaHashUTF16LE()` function [\#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([achimbab](https://github.com/achimbab)) -- Add `_shard_num` virtual column for the Distributed engine [\#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Azat Khuzhin](https://github.com/azat)) - -#### Experimental Feature {#experimental-feature} - -- Support for processors (new query execution pipeline) in `MergeTree`. [\#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Bug Fix {#bug-fix-1} - -- Fix incorrect float parsing in `Values` [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) -- Fix rare deadlock which can happen when trace\_log is enabled. [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) -- Prevent message duplication when producing Kafka table has any MVs selecting from it [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) -- Support for `Array(LowCardinality(Nullable(String)))` in `IN`. Resolves [\#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [\#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([achimbab](https://github.com/achimbab)) -- Add handling of `SQL_TINYINT` and `SQL_BIGINT`, and fix handling of `SQL_FLOAT` data source types in ODBC Bridge. [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) -- Fix aggregation (`avg` and quantiles) over empty decimal columns [\#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([Andrey Konyaev](https://github.com/akonyaev90)) -- Fix `INSERT` into Distributed with `MATERIALIZED` columns [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) -- Make `MOVE PARTITION` work if some parts of partition are already on destination disk or volume [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed bug with hardlinks failing to be created during mutations in `ReplicatedMergeTree` in multi-disk configurations. [\#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed a bug with a mutation on a MergeTree when whole part remains unchanged and best space is being found on another disk [\#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed bug with `keep_free_space_ratio` not being read from disks configuration [\#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fix bug with table contains only `Tuple` columns or columns with complex paths. Fixes [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [\#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([alesapin](https://github.com/alesapin)) -- Do not account memory for Buffer engine in max\_memory\_usage limit [\#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Azat Khuzhin](https://github.com/azat)) -- Fix final mark usage in `MergeTree` tables ordered by `tuple()`. In rare cases it could lead to `Can't adjust last granule` error while select. [\#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([Anton Popov](https://github.com/CurtizJ)) -- Fix bug in mutations that have predicate with actions that require context (for example functions for json), which may lead to crashes or strange exceptions. [\#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([alesapin](https://github.com/alesapin)) -- Fix mismatch of database and table names escaping in `data/` and `shadow/` directories [\#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Alexander Burmak](https://github.com/Alex-Burmak)) -- Support duplicated keys in RIGHT\|FULL JOINs, e.g. `ON t.x = u.x AND t.x = u.y`. Fix crash in this case. [\#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix `Not found column in block` when joining on expression with RIGHT or FULL JOIN. [\#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Artem Zuikov](https://github.com/4ertus2)) -- One more attempt to fix infinite loop in `PrettySpace` format [\#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fix bug in `concat` function when all arguments were `FixedString` of the same size. [\#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([alesapin](https://github.com/alesapin)) -- Fixed exception in case of using 1 argument while defining S3, URL and HDFS storages. [\#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fix scope of the InterpreterSelectQuery for views with query [\#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Azat Khuzhin](https://github.com/azat)) - -#### Improvement {#improvement} - -- `Nullable` columns recognized and NULL-values handled correctly by ODBC-bridge [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) -- Write current batch for distributed send atomically [\#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Azat Khuzhin](https://github.com/azat)) -- Throw an exception if we cannot detect table for column name in query. [\#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Artem Zuikov](https://github.com/4ertus2)) -- Add `merge_max_block_size` setting to `MergeTreeSettings` [\#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Artem Zuikov](https://github.com/4ertus2)) -- Queries with `HAVING` and without `GROUP BY` assume group by constant. So, `SELECT 1 HAVING 1` now returns a result. [\#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([Amos Bird](https://github.com/amosbird)) -- Support parsing `(X,)` as tuple similar to python. [\#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [\#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([Amos Bird](https://github.com/amosbird)) -- Make `range` function behaviors almost like pythonic one. [\#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([sundyli](https://github.com/sundy-li)) -- Add `constraints` columns to table `system.settings` [\#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([Vitaly Baranov](https://github.com/vitlibar)) -- Better Null format for tcp handler, so that it’s possible to use `select ignore() from table format Null` for perf measure via clickhouse-client [\#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([Amos Bird](https://github.com/amosbird)) -- Queries like `CREATE TABLE ... AS (SELECT (1, 2))` are parsed correctly [\#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) - -#### Performance Improvement {#performance-improvement} - -- The performance of aggregation over short string keys is improved. [\#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Alexander Kuzmenkov](https://github.com/akuzm), [Amos Bird](https://github.com/amosbird)) -- Run another pass of syntax/expression analysis to get potential optimizations after constant predicates are folded. [\#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([Amos Bird](https://github.com/amosbird)) -- Use storage meta info to evaluate trivial `SELECT count() FROM table;` [\#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([Amos Bird](https://github.com/amosbird), [alexey-milovidov](https://github.com/alexey-milovidov)) -- Vectorize processing `arrayReduce` similar to Aggregator `addBatch`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Amos Bird](https://github.com/amosbird)) -- Minor improvements in performance of `Kafka` consumption [\#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement} - -- Add support for cross-compiling to the CPU architecture AARCH64. Refactor packager script. [\#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [\#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([Ivan](https://github.com/abyss7)) -- Unpack darwin-x86\_64 and linux-aarch64 toolchains into mounted Docker volume when building packages [\#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([Ivan](https://github.com/abyss7)) -- Update Docker Image for Binary Packager [\#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([Ivan](https://github.com/abyss7)) -- Fixed compile errors on MacOS Catalina [\#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([Ernest Poletaev](https://github.com/ernestp)) -- Some refactoring in query analysis logic: split complex class into several simple ones. [\#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix build without submodules [\#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) -- Better `add_globs` in CMake files [\#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([Amos Bird](https://github.com/amosbird)) -- Remove hardcoded paths in `unwind` target [\#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Konstantin Podshumok](https://github.com/podshumok)) -- Allow to use mysql format without ssl [\#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) - -#### Other {#other} - -- Added ANTLR4 grammar for ClickHouse SQL dialect [\#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [\#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release v19.16 {#clickhouse-release-v19-16} - -#### Clickhouse release v19.16.14.65, 2020-03-25 - -* Fixed up a bug in batched calculations of ternary logical OPs on multiple arguments (more than 10). [#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([Alexander Kazakov](https://github.com/Akazz)) This bugfix was backported to version 19.16 by a special request from Altinity. - -#### Clickhouse release v19.16.14.65, 2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} - -- Fix distributed subqueries incompatibility with older CH versions. Fixes [\#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) - [(tabplubix)](https://github.com/tavplubix) -- When executing `CREATE` query, fold constant expressions in storage engine arguments. Replace empty database name with current database. Fixes [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Also fix check for local address in `ClickHouseDictionarySource`. - [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) -- Now background merges in `*MergeTree` table engines family preserve storage policy volume order more accurately. - [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Prevent losing data in `Kafka` in rare cases when exception happens after reading suffix but before commit. Fixes [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Related: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) - [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(filimonov)](https://github.com/filimonov) -- Fix bug leading to server termination when trying to use / drop `Kafka` table created with wrong parameters. Fixes [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Incorporates [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). - [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(filimonov)](https://github.com/filimonov) -- Allow using `MaterializedView` with subqueries above `Kafka` tables. - [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) - -#### New Feature {#new-feature-1} - -- Add `deduplicate_blocks_in_dependent_materialized_views` option to control the behaviour of idempotent inserts into tables with materialized views. This new feature was added to the bugfix release by a special request from Altinity. - [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) - -### ClickHouse release v19.16.2.2, 2019-10-30 {#clickhouse-release-v19-16-2-2-2019-10-30} - -#### Backward Incompatible Change {#backward-incompatible-change-1} - -- Add missing arity validation for count/counIf. - [\#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) - [\#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir)) -- Remove legacy `asterisk_left_columns_only` setting (it was disabled by default). - [\#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem - Zuikov](https://github.com/4ertus2)) -- Format strings for Template data format are now specified in files. - [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) - ([tavplubix](https://github.com/tavplubix)) - -#### New Feature {#new-feature-2} - -- Introduce uniqCombined64() to calculate cardinality greater than UINT\_MAX. - [\#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), - [\#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat - Khuzhin](https://github.com/azat)) -- Support Bloom filter indexes on Array columns. - [\#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) - ([achimbab](https://github.com/achimbab)) -- Add a function `getMacro(name)` that returns String with the value of corresponding `` - from server configuration. [\#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Set two configuration options for a dictionary based on an HTTP source: `credentials` and - `http-headers`. [\#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Guillaume - Tassery](https://github.com/YiuRULE)) -- Add a new ProfileEvent `Merge` that counts the number of launched background merges. - [\#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Mikhail - Korotov](https://github.com/millb)) -- Add fullHostName function that returns a fully qualified domain name. - [\#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) - [\#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li)) -- Add function `arraySplit` and `arrayReverseSplit` which split an array by “cut off” - conditions. They are useful in time sequence handling. - [\#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) -- Add new functions that return the Array of all matched indices in multiMatch family of functions. - [\#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila - Kutenin](https://github.com/danlark1)) -- Add a new database engine `Lazy` that is optimized for storing a large number of small -Log - tables. [\#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita - Vasilev](https://github.com/nikvas0)) -- Add aggregate functions groupBitmapAnd, -Or, -Xor for bitmap columns. [\#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang - Yu](https://github.com/yuzhichang)) -- Add aggregate function combinators -OrNull and -OrDefault, which return null - or default values when there is nothing to aggregate. - [\#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) - ([hcz](https://github.com/hczhcz)) -- Introduce CustomSeparated data format that supports custom escaping and - delimiter rules. [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) - ([tavplubix](https://github.com/tavplubix)) -- Support Redis as source of external dictionary. [\#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [\#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Anton - Popov](https://github.com/CurtizJ)) - -#### Bug Fix {#bug-fix-2} - -- Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is - used. [\#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Anton - Popov](https://github.com/CurtizJ)) -- Disabled MariaDB authentication plugin, which depends on files outside of project. - [\#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Yuriy - Baranov](https://github.com/yurriy)) -- Fix exception `Cannot convert column ... because it is constant but values of constants are different in source and result` which could rarely happen when functions `now()`, `today()`, - `yesterday()`, `randConstant()` are used. - [\#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai - Kochetov](https://github.com/KochetovNicolai)) -- Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. - [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily - Nemkov](https://github.com/Enmk)) -- Fixed a segmentation fault in groupBitmapOr (issue [\#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). - [\#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang - Yu](https://github.com/yuzhichang)) -- For materialized views the commit for Kafka is called after all data were written. - [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) -- Fixed wrong `duration_ms` value in `system.part_log` table. It was ten times off. - [\#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Vladimir - Chebotarev](https://github.com/excitoon)) -- A quick fix to resolve crash in LIVE VIEW table and re-enabling all LIVE VIEW tests. - [\#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) - ([vzakaznikov](https://github.com/vzakaznikov)) -- Serialize NULL values correctly in min/max indexes of MergeTree parts. - [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Don’t put virtual columns to .sql metadata when table is created as `CREATE TABLE AS`. - [\#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Ivan](https://github.com/abyss7)) -- Fix segmentation fault in `ATTACH PART` query. - [\#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) - ([alesapin](https://github.com/alesapin)) -- Fix wrong result for some queries given by the optimization of empty IN subqueries and empty - INNER/RIGHT JOIN. [\#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai - Kochetov](https://github.com/KochetovNicolai)) -- Fixing AddressSanitizer error in the LIVE VIEW getHeader() method. - [\#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) - ([vzakaznikov](https://github.com/vzakaznikov)) - -#### Improvement {#improvement-1} - -- Add a message in case of queue\_wait\_max\_ms wait takes place. - [\#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat - Khuzhin](https://github.com/azat)) -- Made setting `s3_min_upload_part_size` table-level. - [\#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir - Chebotarev](https://github.com/excitoon)) -- Check TTL in StorageFactory. [\#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) - ([sundyli](https://github.com/sundy-li)) -- Squash left-hand blocks in partial merge join (optimization). - [\#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Artem - Zuikov](https://github.com/4ertus2)) -- Do not allow non-deterministic functions in mutations of Replicated table engines, because this - can introduce inconsistencies between replicas. - [\#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Alexander - Kazakov](https://github.com/Akazz)) -- Disable memory tracker while converting exception stack trace to string. It can prevent the loss - of error messages of type `Memory limit exceeded` on server, which caused the `Attempt to read after eof` exception on client. [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) - ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Miscellaneous format improvements. Resolves - [\#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), - [\#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), - [\#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), - [\#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) - [\#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) - ([tavplubix](https://github.com/tavplubix)) -- ClickHouse ignores values on the right side of IN operator that are not convertible to the left - side type. Make it work properly for compound types – Array and Tuple. - [\#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Support missing inequalities for ASOF JOIN. It’s possible to join less-or-equal variant and strict - greater and less variants for ASOF column in ON syntax. - [\#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Artem - Zuikov](https://github.com/4ertus2)) -- Optimize partial merge join. [\#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) - ([Artem Zuikov](https://github.com/4ertus2)) -- Do not use more than 98K of memory in uniqCombined functions. - [\#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), - [\#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat - Khuzhin](https://github.com/azat)) -- Flush parts of right-hand joining table on disk in PartialMergeJoin (if there is not enough - memory). Load data back when needed. [\#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) - ([Artem Zuikov](https://github.com/4ertus2)) - -#### Performance Improvement {#performance-improvement-1} - -- Speed up joinGet with const arguments by avoiding data duplication. - [\#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Amos - Bird](https://github.com/amosbird)) -- Return early if the subquery is empty. - [\#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) -- Optimize parsing of SQL expression in Values. - [\#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) - ([tavplubix](https://github.com/tavplubix)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-1} - -- Disable some contribs for cross-compilation to Mac OS. - [\#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7)) -- Add missing linking with PocoXML for clickhouse\_common\_io. - [\#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat - Khuzhin](https://github.com/azat)) -- Accept multiple test filter arguments in clickhouse-test. - [\#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Enable musl and jemalloc for ARM. [\#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) - ([Amos Bird](https://github.com/amosbird)) -- Added `--client-option` parameter to `clickhouse-test` to pass additional parameters to client. - [\#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Nikolai - Kochetov](https://github.com/KochetovNicolai)) -- Preserve existing configs on rpm package upgrade. - [\#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) - ([filimonov](https://github.com/filimonov)) -- Fix errors detected by PVS. [\#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Artem - Zuikov](https://github.com/4ertus2)) -- Fix build for Darwin. [\#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) - ([Ivan](https://github.com/abyss7)) -- glibc 2.29 compatibility. [\#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Amos - Bird](https://github.com/amosbird)) -- Make sure dh\_clean does not touch potential source files. - [\#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Amos - Bird](https://github.com/amosbird)) -- Attempt to avoid conflict when updating from altinity rpm - it has config file packaged separately - in clickhouse-server-common. [\#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) - ([filimonov](https://github.com/filimonov)) -- Optimize some header files for faster rebuilds. - [\#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), - [\#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Add performance tests for Date and DateTime. [\#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Vasily - Nemkov](https://github.com/Enmk)) -- Fix some tests that contained non-deterministic mutations. - [\#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Alexander - Kazakov](https://github.com/Akazz)) -- Add build with MemorySanitizer to CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) - ([Alexander Kuzmenkov](https://github.com/akuzm)) -- Avoid use of uninitialized values in MetricsTransmitter. - [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat - Khuzhin](https://github.com/azat)) -- Fix some issues in Fields found by MemorySanitizer. - [\#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), - [\#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Alexander - Kuzmenkov](https://github.com/akuzm)), [\#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) - ([Amos Bird](https://github.com/amosbird)) -- Fix undefined behavior in murmurhash32. [\#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Amos - Bird](https://github.com/amosbird)) -- Fix undefined behavior in StoragesInfoStream. [\#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) - ([tavplubix](https://github.com/tavplubix)) -- Fixed constant expressions folding for external database engines (MySQL, ODBC, JDBC). In previous - versions it wasn’t working for multiple constant expressions and was not working at all for Date, - DateTime and UUID. This fixes [\#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) - [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no\_users\_thread variable. - [\#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) - ([vzakaznikov](https://github.com/vzakaznikov)) -- Get rid of malloc symbols in libcommon - [\#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), - [\#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Amos - Bird](https://github.com/amosbird)) -- Add global flag ENABLE\_LIBRARIES for disabling all libraries. - [\#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) - ([proller](https://github.com/proller)) - -#### Code cleanup {#code-cleanup} - -- Generalize configuration repository to prepare for DDL for Dictionaries. [\#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) - ([alesapin](https://github.com/alesapin)) -- Parser for dictionaries DDL without any semantic. - [\#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) - ([alesapin](https://github.com/alesapin)) -- Split ParserCreateQuery into different smaller parsers. - [\#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) - ([alesapin](https://github.com/alesapin)) -- Small refactoring and renaming near external dictionaries. - [\#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) - ([alesapin](https://github.com/alesapin)) -- Refactor some code to prepare for role-based access control. [\#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Vitaly - Baranov](https://github.com/vitlibar)) -- Some improvements in DatabaseOrdinary code. - [\#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Nikita - Vasilev](https://github.com/nikvas0)) -- Do not use iterators in find() and emplace() methods of hash tables. - [\#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Fix getMultipleValuesFromConfig in case when parameter root is not empty. [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) - ([Mikhail Korotov](https://github.com/millb)) -- Remove some copy-paste (TemporaryFile and TemporaryFileStream) - [\#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Artem - Zuikov](https://github.com/4ertus2)) -- Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`). - [\#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir - Chebotarev](https://github.com/excitoon)) -- Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws - an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and - fix comments to make obvious that it may throw. - [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) - ([tavplubix](https://github.com/tavplubix)) - -## ClickHouse release 19.15 {#clickhouse-release-19-15} - -### ClickHouse release 19.15.4.10, 2019-10-31 {#clickhouse-release-19-15-4-10-2019-10-31} - -#### Bug Fix {#bug-fix-3} - -- Added handling of SQL\_TINYINT and SQL\_BIGINT, and fix handling of SQL\_FLOAT data source types in ODBC Bridge. - [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) -- Allowed to have some parts on destination disk or volume in MOVE PARTITION. - [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed NULL-values in nullable columns through ODBC-bridge. - [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) -- Fixed INSERT into Distributed non local node with MATERIALIZED columns. - [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) -- Fixed function getMultipleValuesFromConfig. - [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Mikhail Korotov](https://github.com/millb)) -- Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. - [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily Nemkov](https://github.com/Enmk)) -- Wait for all jobs to finish on exception (fixes rare segfaults). - [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix)) -- Don’t push to MVs when inserting into Kafka table. - [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) -- Disable memory tracker for exception stack. - [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed bad code in transforming query for external database. - [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid use of uninitialized values in MetricsTransmitter. - [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat)) -- Added example config with macros for tests ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.15.3.6, 2019-10-09 {#clickhouse-release-19-15-3-6-2019-10-09} - -#### Bug Fix {#bug-fix-4} - -- Fixed bad\_variant in hashed dictionary. - ([alesapin](https://github.com/alesapin)) -- Fixed up bug with segmentation fault in ATTACH PART query. - ([alesapin](https://github.com/alesapin)) -- Fixed time calculation in `MergeTreeData`. - ([Vladimir Chebotarev](https://github.com/excitoon)) -- Commit to Kafka explicitly after the writing is finalized. - [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) -- Serialize NULL values correctly in min/max indexes of MergeTree parts. - [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm)) - -### ClickHouse release 19.15.2.2, 2019-10-01 {#clickhouse-release-19-15-2-2-2019-10-01} - -#### New Feature {#new-feature-3} - -- Tiered storage: support to use multiple storage volumes for tables with MergeTree engine. It’s possible to store fresh data on SSD and automatically move old data to HDD. ([example](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [\#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Igr](https://github.com/ObjatieGroba)) [\#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([alesapin](https://github.com/alesapin)) -- Add table function `input` for reading incoming data in `INSERT SELECT` query. [\#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([palasonic1](https://github.com/palasonic1)) [\#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([Anton Popov](https://github.com/CurtizJ)) -- Add a `sparse_hashed` dictionary layout, that is functionally equivalent to the `hashed` layout, but is more memory efficient. It uses about twice as less memory at the cost of slower value retrieval. [\#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Azat Khuzhin](https://github.com/azat)) -- Implement ability to define list of users for access to dictionaries. Only current connected database using. [\#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Add `LIMIT` option to `SHOW` query. [\#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Philipp Malkovsky](https://github.com/malkfilipp)) -- Add `bitmapSubsetLimit(bitmap, range_start, limit)` function, that returns subset of the smallest `limit` values in set that is no smaller than `range_start`. [\#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([Zhichang Yu](https://github.com/yuzhichang)) -- Add `bitmapMin` and `bitmapMax` functions. [\#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([Zhichang Yu](https://github.com/yuzhichang)) -- Add function `repeat` related to [issue-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [\#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([flynn](https://github.com/ucasFL)) - -#### Experimental Feature {#experimental-feature-1} - -- Implement (in memory) Merge Join variant that does not change current pipeline. Result is partially sorted by merge key. Set `partial_merge_join = 1` to use this feature. The Merge Join is still in development. [\#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Artem Zuikov](https://github.com/4ertus2)) -- Add `S3` engine and table function. It is still in development (no authentication support yet). [\#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Vladimir Chebotarev](https://github.com/excitoon)) - -#### Improvement {#improvement-2} - -- Every message read from Kafka is inserted atomically. This resolves almost all known issues with Kafka engine. [\#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([Ivan](https://github.com/abyss7)) -- Improvements for failover of Distributed queries. Shorten recovery time, also it is now configurable and can be seen in `system.clusters`. [\#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([Vasily Nemkov](https://github.com/Enmk)) -- Support numeric values for Enums directly in `IN` section. \#6766 [\#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) -- Support (optional, disabled by default) redirects on URL storage. [\#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) -- Add information message when client with an older version connects to a server. [\#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Philipp Malkovsky](https://github.com/malkfilipp)) -- Remove maximum backoff sleep time limit for sending data in Distributed tables [\#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Azat Khuzhin](https://github.com/azat)) -- Add ability to send profile events (counters) with cumulative values to graphite. It can be enabled under `` in server `config.xml`. [\#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Azat Khuzhin](https://github.com/azat)) -- Add automatically cast type `T` to `LowCardinality(T)` while inserting data in column of type `LowCardinality(T)` in Native format via HTTP. [\#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Add ability to use function `hex` without using `reinterpretAsString` for `Float32`, `Float64`. [\#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([Mikhail Korotov](https://github.com/millb)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-2} - -- Add gdb-index to clickhouse binary with debug info. It will speed up startup time of `gdb`. [\#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([alesapin](https://github.com/alesapin)) -- Speed up deb packaging with patched dpkg-deb which uses `pigz`. [\#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([alesapin](https://github.com/alesapin)) -- Set `enable_fuzzing = 1` to enable libfuzzer instrumentation of all the project code. [\#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) -- Add split build smoke test in CI. [\#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([alesapin](https://github.com/alesapin)) -- Add build with MemorySanitizer to CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Alexander Kuzmenkov](https://github.com/akuzm)) -- Replace `libsparsehash` with `sparsehash-c11` [\#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Azat Khuzhin](https://github.com/azat)) - -#### Bug Fix {#bug-fix-5} - -- Fixed performance degradation of index analysis on complex keys on large tables. This fixes \#6924. [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix logical error causing segfaults when selecting from Kafka empty topic. [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) -- Fix too early MySQL connection close in `MySQLBlockInputStream.cpp`. [\#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) -- Returned support for very old Linux kernels (fix [\#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [\#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix possible data loss in `insert select` query in case of empty block in input stream. \#6834 \#6862 [\#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -- Fix complex queries with array joins and global subqueries. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) -- Fix `Unknown identifier` error in ORDER BY and GROUP BY with multiple JOINs [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed `MSan` warning while executing function with `LowCardinality` argument. [\#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Backward Incompatible Change {#backward-incompatible-change-2} - -- Changed serialization format of bitmap\* aggregate function states to improve performance. Serialized states of bitmap\* from previous versions cannot be read. [\#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([Zhichang Yu](https://github.com/yuzhichang)) - -## ClickHouse release 19.14 {#clickhouse-release-19-14} - -### ClickHouse release 19.14.7.15, 2019-10-02 {#clickhouse-release-19-14-7-15-2019-10-02} - -#### Bug Fix {#bug-fix-6} - -- This release also contains all bug fixes from 19.11.12.69. -- Fixed compatibility for distributed queries between 19.14 and earlier versions. This fixes [\#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [\#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.14.6.12, 2019-09-19 {#clickhouse-release-19-14-6-12-2019-09-19} - -#### Bug Fix {#bug-fix-7} - -- Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -- Fixed subquery name in queries with `ARRAY JOIN` and `GLOBAL IN subquery` with alias. Use subquery alias for external table name if it is specified. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-3} - -- Fix [flapping](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) test `00715_fetch_merged_or_mutated_part_zookeeper` by rewriting it to a shell scripts because it needs to wait for mutations to apply. [\#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([Alexander Kazakov](https://github.com/Akazz)) -- Fixed UBSan and MemSan failure in function `groupUniqArray` with emtpy array argument. It was caused by placing of empty `PaddedPODArray` into hash table zero cell because constructor for zero cell value was not called. [\#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([Amos Bird](https://github.com/amosbird)) - -### ClickHouse release 19.14.3.3, 2019-09-10 {#clickhouse-release-19-14-3-3-2019-09-10} - -#### New Feature {#new-feature-4} - -- `WITH FILL` modifier for `ORDER BY`. (continuation of [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) -- `WITH TIES` modifier for `LIMIT`. (continuation of [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) -- Parse unquoted `NULL` literal as NULL (if setting `format_csv_unquoted_null_literal_as_null=1`). Initialize null fields with default values if data type of this field is not nullable (if setting `input_format_null_as_default=1`). [\#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [\#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([tavplubix](https://github.com/tavplubix)) -- Support for wildcards in paths of table functions `file` and `hdfs`. If the path contains wildcards, the table will be readonly. Example of usage: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` and `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [\#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Olga Khvostikova](https://github.com/stavrolia)) -- New `system.metric_log` table which stores values of `system.events` and `system.metrics` with specified time interval. [\#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [\#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [\#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to write ClickHouse text logs to `system.text_log` table. [\#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [\#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [\#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Show private symbols in stack traces (this is done via parsing symbol tables of ELF files). Added information about file and line number in stack traces if debug info is present. Speedup symbol name lookup with indexing symbols present in program. Added new SQL functions for introspection: `demangle` and `addressToLine`. Renamed function `symbolizeAddress` to `addressToSymbol` for consistency. Function `addressToSymbol` will return mangled name for performance reasons and you have to apply `demangle`. Added setting `allow_introspection_functions` which is turned off by default. [\#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Table function `values` (the name is case-insensitive). It allows to read from `VALUES` list proposed in [\#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). Example: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) -- Added an ability to alter storage settings. Syntax: `ALTER TABLE MODIFY SETTING = `. [\#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [\#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [\#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([alesapin](https://github.com/alesapin)) -- Support for removing of detached parts. Syntax: `ALTER TABLE DROP DETACHED PART ''`. [\#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([tavplubix](https://github.com/tavplubix)) -- Table constraints. Allows to add constraint to table definition which will be checked at insert. [\#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([Gleb Novikov](https://github.com/NanoBjorn)) [\#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Suppport for cascaded materialized views. [\#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([Amos Bird](https://github.com/amosbird)) -- Turn on query profiler by default to sample every query execution thread once a second. [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Input format `ORC`. [\#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [\#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([akonyaev90](https://github.com/akonyaev90)) -- Added two new functions: `sigmoid` and `tanh` (that are useful for machine learning applications). [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Function `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` to check if given token is in haystack. Token is a maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). Token must be a constant string. Supported by tokenbf\_v1 index specialization. [\#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [\#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([Vasily Nemkov](https://github.com/Enmk)) -- New function `neighbor(value, offset[, default_value])`. Allows to reach prev/next value within column in a block of data. [\#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Alex Krash](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Alexey Milovidov](https://github.com/alexey-milovidov) -- Created a function `currentUser()`, returning login of authorized user. Added alias `user()` for compatibility with MySQL. [\#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Alex Krash](https://github.com/alex-krash)) -- New aggregate functions `quantilesExactInclusive` and `quantilesExactExclusive` which were proposed in [\#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [\#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) -- Function `bitmapRange(bitmap, range_begin, range_end)` which returns new set with specified range (not include the `range_end`). [\#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([Zhichang Yu](https://github.com/yuzhichang)) -- Function `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` which creates array of precision-long strings of geohash-boxes covering provided area. [\#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([Vasily Nemkov](https://github.com/Enmk)) -- Implement support for INSERT query with `Kafka` tables. [\#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([Ivan](https://github.com/abyss7)) -- Added support for `_partition` and `_timestamp` virtual columns to Kafka engine. [\#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([Ivan](https://github.com/abyss7)) -- Possibility to remove sensitive data from `query_log`, server logs, process list with regexp-based rules. [\#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([filimonov](https://github.com/filimonov)) - -#### Experimental Feature {#experimental-feature-2} - -- Input and output data format `Template`. It allows to specify custom format string for input and output. [\#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [\#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([tavplubix](https://github.com/tavplubix)) -- Implementation of `LIVE VIEW` tables that were originally proposed in [\#2898](https://github.com/ClickHouse/ClickHouse/pull/2898), prepared in [\#3925](https://github.com/ClickHouse/ClickHouse/issues/3925), and then updated in [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). See [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) for detailed description. [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([vzakaznikov](https://github.com/vzakaznikov)) [\#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) [\#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([vzakaznikov](https://github.com/vzakaznikov)) Note that `LIVE VIEW` feature may be removed in next versions. - -#### Bug Fix {#bug-fix-8} - -- This release also contains all bug fixes from 19.13 and 19.11. -- Fix segmentation fault when the table has skip indices and vertical merge happens. [\#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([alesapin](https://github.com/alesapin)) -- Fix per-column TTL with non-trivial column defaults. Previously in case of force TTL merge with `OPTIMIZE ... FINAL` query, expired values was replaced by type defaults instead of user-specified column defaults. [\#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([Anton Popov](https://github.com/CurtizJ)) -- Fix Kafka messages duplication problem on normal server restart. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) -- Fixed infinite loop when reading Kafka messages. Do not pause/resume consumer on subscription at all - otherwise it may get paused indefinitely in some scenarios. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([Ivan](https://github.com/abyss7)) -- Fix `Key expression contains comparison between inconvertible types` exception in `bitmapContains` function. [\#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [\#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [\#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) -- Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Removed extra verbose logging in MySQL interface [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Return the ability to parse boolean settings from ‘true’ and ‘false’ in the configuration file. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) -- Fix crash in `quantile` and `median` function over `Nullable(Decimal128)`. [\#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed possible incomplete result returned by `SELECT` query with `WHERE` condition on primary key contained conversion to Float type. It was caused by incorrect checking of monotonicity in `toFloat` function. [\#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -- Check `max_expanded_ast_elements` setting for mutations. Clear mutations after `TRUNCATE TABLE`. [\#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([Winter Zhang](https://github.com/zhang2014)) -- Fix JOIN results for key columns when used with `join_use_nulls`. Attach Nulls instead of columns defaults. [\#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix for skip indices with vertical merge and alter. Fix for `Bad size of marks file` exception. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [\#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([alesapin](https://github.com/alesapin)) -- Fix rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows) [\#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) -- Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed unsafe code around `getIdentifier` function. [\#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [\#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed bug in MySQL wire protocol (is used while connecting to ClickHouse form MySQL client). Caused by heap buffer overflow in `PacketPayloadWriteBuffer`. [\#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([Yuriy Baranov](https://github.com/yurriy)) -- Fixed memory leak in `bitmapSubsetInRange` function. [\#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fix rare bug when mutation executed after granularity change. [\#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([alesapin](https://github.com/alesapin)) -- Allow protobuf message with all fields by default. [\#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([Vitaly Baranov](https://github.com/vitlibar)) -- Resolve a bug with `nullIf` function when we send a `NULL` argument on the second argument. [\#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Fix rare bug with wrong memory allocation/deallocation in complex key cache dictionaries with string fields which leads to infinite memory consumption (looks like memory leak). Bug reproduces when string size was a power of two starting from eight (8, 16, 32, etc). [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) -- Fixed Gorilla encoding on small sequences which caused exception `Cannot write after end of buffer`. [\#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Vasily Nemkov](https://github.com/Enmk)) -- Allow to use not nullable types in JOINs with `join_use_nulls` enabled. [\#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Artem Zuikov](https://github.com/4ertus2)) -- Disable `Poco::AbstractConfiguration` substitutions in query in `clickhouse-client`. [\#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid deadlock in `REPLACE PARTITION`. [\#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Using `arrayReduce` for constant arguments may lead to segfault. [\#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix inconsistent parts which can appear if replica was restored after `DROP PARTITION`. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -- Fixed hang in `JSONExtractRaw` function. [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug with incorrect skip indices serialization and aggregation with adaptive granularity. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [\#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([alesapin](https://github.com/alesapin)) -- Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) -- Fix bug with writing secondary indices marks with adaptive granularity. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) -- Fix initialization order while server startup. Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) -- Clearing the data buffer from the previous read operation that was completed with an error. [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) -- Fix bug with enabling adaptive granularity when creating a new replica for Replicated\*MergeTree table. [\#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) -- Fixed possible crash during server startup in case of exception happened in `libunwind` during exception at access to uninitialized `ThreadStatus` structure. [\#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Fix crash in `yandexConsistentHash` function. Found by fuzz test. [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [\#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed the possibility of hanging queries when server is overloaded and global thread pool becomes near full. This have higher chance to happen on clusters with large number of shards (hundreds), because distributed queries allocate a thread per connection to each shard. For example, this issue may reproduce if a cluster of 330 shards is processing 30 concurrent distributed queries. This issue affects all versions starting from 19.2. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed logic of `arrayEnumerateUniqRanked` function. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix segfault when decoding symbol table. [\#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([Amos Bird](https://github.com/amosbird)) -- Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn’t contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Removed extra quoting of description in `system.settings` table. [\#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [\#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid possible deadlock in `TRUNCATE` of Replicated table. [\#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix reading in order of sorting key. [\#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([Anton Popov](https://github.com/CurtizJ)) -- Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -- Fix bug opened by [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (since 19.4.0). Reproduces in queries to Distributed tables over MergeTree tables when we doesn’t query any columns (`SELECT 1`). [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) -- Fixed overflow in integer division of signed type to unsigned type. The behaviour was exactly as in C or C++ language (integer promotion rules) that may be surprising. Please note that the overflow is still possible when dividing large signed number to large unsigned number or vice-versa (but that case is less usual). The issue existed in all server versions. [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Limit maximum sleep time for throttling when `max_execution_speed` or `max_execution_speed_bytes` is set. Fixed false errors like `Estimated query execution time (inf seconds) is too long`. [\#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [\#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed issues about using `MATERIALIZED` columns and aliases in `MaterializedView`. [\#448](https://github.com/ClickHouse/ClickHouse/issues/448) [\#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [\#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [\#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [\#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [\#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([Amos Bird](https://github.com/amosbird)) [\#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `FormatFactory` behaviour for input streams which are not implemented as processor. [\#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed typo. [\#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Alex Ryndin](https://github.com/alexryndin)) -- Typo in the error message ( is -\> are ). [\#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Denis Zhuravlev](https://github.com/den-crane)) -- Fixed error while parsing of columns list from string if type contained a comma (this issue was relevant for `File`, `URL`, `HDFS` storages) [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) - -#### Security Fix {#security-fix} - -- This release also contains all bug security fixes from 19.13 and 19.11. -- Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser. Fixed the possibility of stack overflow in Merge and Distributed tables, materialized views and conditions for row-level security that involve subqueries. [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvement {#improvement-3} - -- Correct implementation of ternary logic for `AND/OR`. [\#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([Alexander Kazakov](https://github.com/Akazz)) -- Now values and rows with expired TTL will be removed after `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` query. Added queries `SYSTEM STOP/START TTL MERGES` to disallow/allow assign merges with TTL and filter expired values in all merges. [\#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([Anton Popov](https://github.com/CurtizJ)) -- Possibility to change the location of ClickHouse history file for client using `CLICKHOUSE_HISTORY_FILE` env. [\#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([filimonov](https://github.com/filimonov)) -- Remove `dry_run` flag from `InterpreterSelectQuery`. … [\#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Support `ASOF JOIN` with `ON` section. [\#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Artem Zuikov](https://github.com/4ertus2)) -- Better support of skip indexes for mutations and replication. Support for `MATERIALIZE/CLEAR INDEX ... IN PARTITION` query. `UPDATE x = x` recalculates all indices that use column `x`. [\#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([Nikita Vasilev](https://github.com/nikvas0)) -- Allow to `ATTACH` live views (for example, at the server startup) regardless to `allow_experimental_live_view` setting. [\#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- For stack traces gathered by query profiler, do not include stack frames generated by the query profiler itself. [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now table functions `values`, `file`, `url`, `hdfs` have support for ALIAS columns. [\#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Throw an exception if `config.d` file doesn’t have the corresponding root element as the config file. [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) -- Print extra info in exception message for `no space left on device`. [\#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [\#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [\#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([tavplubix](https://github.com/tavplubix)) -- When determining shards of a `Distributed` table to be covered by a read query (for `optimize_skip_unused_shards` = 1) ClickHouse now checks conditions from both `prewhere` and `where` clauses of select statement. [\#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([Alexander Kazakov](https://github.com/Akazz)) -- Enabled `SIMDJSON` for machines without AVX2 but with SSE 4.2 and PCLMUL instruction set. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [\#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- ClickHouse can work on filesystems without `O_DIRECT` support (such as ZFS and BtrFS) without additional tuning. [\#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [\#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Support push down predicate for final subquery. [\#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [\#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Better `JOIN ON` keys extraction [\#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Artem Zuikov](https://github.com/4ertus2)) -- Upated `SIMDJSON`. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [\#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Optimize selecting of smallest column for `SELECT count()` query. [\#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([Amos Bird](https://github.com/amosbird)) -- Added `strict` parameter in `windowFunnel()`. When the `strict` is set, the `windowFunnel()` applies conditions only for the unique values. [\#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([achimbab](https://github.com/achimbab)) -- Safer interface of `mysqlxx::Pool`. [\#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([avasiliev](https://github.com/avasiliev)) -- Options line size when executing with `--help` option now corresponds with terminal size. [\#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) -- Disable “read in order” optimization for aggregation without keys. [\#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([Anton Popov](https://github.com/CurtizJ)) -- HTTP status code for `INCORRECT_DATA` and `TYPE_MISMATCH` error codes was changed from default `500 Internal Server Error` to `400 Bad Request`. [\#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([Alexander Rodin](https://github.com/a-rodin)) -- Move Join object from `ExpressionAction` into `AnalyzedJoin`. `ExpressionAnalyzer` and `ExpressionAction` do not know about `Join` class anymore. Its logic is hidden by `AnalyzedJoin` iface. [\#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed possible deadlock of distributed queries when one of shards is localhost but the query is sent via network connection. [\#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Changed semantic of multiple tables `RENAME` to avoid possible deadlocks. [\#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [\#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Rewritten MySQL compatibility server to prevent loading full packet payload in memory. Decreased memory consumption for each connection to approximately `2 * DBMS_DEFAULT_BUFFER_SIZE` (read/write buffers). [\#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([Yuriy Baranov](https://github.com/yurriy)) -- Move AST alias interpreting logic out of parser that doesn’t have to know anything about query semantics. [\#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Artem Zuikov](https://github.com/4ertus2)) -- Slightly more safe parsing of `NamesAndTypesList`. [\#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [\#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `clickhouse-copier`: Allow use `where_condition` from config with `partition_key` alias in query for checking partition existence (Earlier it was used only in reading data queries). [\#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) -- Added optional message argument in `throwIf`. ([\#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [\#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Vdimir](https://github.com/Vdimir)) -- Server exception got while sending insertion data is now being processed in client as well. [\#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [\#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) -- Added a metric `DistributedFilesToInsert` that shows the total number of files in filesystem that are selected to send to remote servers by Distributed tables. The number is summed across all shards. [\#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Move most of JOINs prepare logic from `ExpressionAction/ExpressionAnalyzer` to `AnalyzedJoin`. [\#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix TSan [warning](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) ‘lock-order-inversion’. [\#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([Vasily Nemkov](https://github.com/Enmk)) -- Better information messages about lack of Linux capabilities. Logging fatal errors with “fatal” level, that will make it easier to find in `system.text_log`. [\#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- When enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`, `ORDER BY`, it didn’t check the free disk space. The fix add a new setting `min_free_disk_space`, when the free disk space it smaller then the threshold, the query will stop and throw `ErrorCodes::NOT_ENOUGH_SPACE`. [\#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([Weiqing Xu](https://github.com/weiqxu)) [\#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Removed recursive rwlock by thread. It makes no sense, because threads are reused between queries. `SELECT` query may acquire a lock in one thread, hold a lock from another thread and exit from first thread. In the same time, first thread can be reused by `DROP` query. This will lead to false “Attempt to acquire exclusive lock recursively” messages. [\#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Split `ExpressionAnalyzer.appendJoin()`. Prepare a place in `ExpressionAnalyzer` for `MergeJoin`. [\#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Artem Zuikov](https://github.com/4ertus2)) -- Added `mysql_native_password` authentication plugin to MySQL compatibility server. [\#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([Yuriy Baranov](https://github.com/yurriy)) -- Less number of `clock_gettime` calls; fixed ABI compatibility between debug/release in `Allocator` (insignificant issue). [\#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Move `collectUsedColumns` from `ExpressionAnalyzer` to `SyntaxAnalyzer`. `SyntaxAnalyzer` makes `required_source_columns` itself now. [\#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Artem Zuikov](https://github.com/4ertus2)) -- Add setting `joined_subquery_requires_alias` to require aliases for subselects and table functions in `FROM` that more than one table is present (i.e. queries with JOINs). [\#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Artem Zuikov](https://github.com/4ertus2)) -- Extract `GetAggregatesVisitor` class from `ExpressionAnalyzer`. [\#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Artem Zuikov](https://github.com/4ertus2)) -- `system.query_log`: change data type of `type` column to `Enum`. [\#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Static linking of `sha256_password` authentication plugin. [\#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([Yuriy Baranov](https://github.com/yurriy)) -- Avoid extra dependency for the setting `compile` to work. In previous versions, the user may get error like `cannot open crti.o`, `unable to find library -lc` etc. [\#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- More validation of the input that may come from malicious replica. [\#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now `clickhouse-obfuscator` file is available in `clickhouse-client` package. In previous versions it was available as `clickhouse obfuscator` (with whitespace). [\#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [\#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) -- Fixed deadlock when we have at least two queries that read at least two tables in different order and another query that performs DDL operation on one of tables. Fixed another very rare deadlock. [\#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added `os_thread_ids` column to `system.processes` and `system.query_log` for better debugging possibilities. [\#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- A workaround for PHP mysqlnd extension bugs which occur when `sha256_password` is used as a default authentication plugin (described in [\#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [\#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([Yuriy Baranov](https://github.com/yurriy)) -- Remove unneeded place with changed nullability columns. [\#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Artem Zuikov](https://github.com/4ertus2)) -- Set default value of `queue_max_wait_ms` to zero, because current value (five seconds) makes no sense. There are rare circumstances when this settings has any use. Added settings `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` and `connection_pool_max_wait_ms` for disambiguation. [\#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Extract `SelectQueryExpressionAnalyzer` from `ExpressionAnalyzer`. Keep the last one for non-select queries. [\#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Artem Zuikov](https://github.com/4ertus2)) -- Removed duplicating input and output formats. [\#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `MergeTree` now has an additional option `ttl_only_drop_parts` (disabled by default) to avoid partial pruning of parts, so that they dropped completely when all the rows in a part are expired. [\#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([Sergi Vladykin](https://github.com/svladykin)) -- Type checks for set index functions. Throw exception if function got a wrong type. This fixes fuzz test with UBSan. [\#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Performance Improvement {#performance-improvement-2} - -- Optimize queries with `ORDER BY expressions` clause, where `expressions` have coinciding prefix with sorting key in `MergeTree` tables. This optimization is controlled by `optimize_read_in_order` setting. [\#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [\#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([Anton Popov](https://github.com/CurtizJ)) -- Allow to use multiple threads during parts loading and removal. [\#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [\#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [\#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Implemented batch variant of updating aggregate function states. It may lead to performance benefits. [\#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Using `FastOps` library for functions `exp`, `log`, `sigmoid`, `tanh`. FastOps is a fast vector math library from Michael Parakhin (Yandex CTO). Improved performance of `exp` and `log` functions more than 6 times. The functions `exp` and `log` from `Float32` argument will return `Float32` (in previous versions they always return `Float64`). Now `exp(nan)` may return `inf`. The result of `exp` and `log` functions may be not the nearest machine representable number to the true answer. [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) Using Danila Kutenin variant to make fastops working [\#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Disable consecutive key optimization for `UInt8/16`. [\#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [\#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([akuzm](https://github.com/akuzm)) -- Improved performance of `simdjson` library by getting rid of dynamic allocation in `ParsedJson::Iterator`. [\#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([Vitaly Baranov](https://github.com/vitlibar)) -- Pre-fault pages when allocating memory with `mmap()`. [\#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([akuzm](https://github.com/akuzm)) -- Fix performance bug in `Decimal` comparison. [\#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-4} - -- Remove Compiler (runtime template instantiation) because we’ve win over it’s performance. [\#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added performance test to show degradation of performance in gcc-9 in more isolated way. [\#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added table function `numbers_mt`, which is multithreaded version of `numbers`. Updated performance tests with hash functions. [\#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Comparison mode in `clickhouse-benchmark` [\#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [\#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) -- Best effort for printing stack traces. Also added `SIGPROF` as a debugging signal to print stack trace of a running thread. [\#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Every function in its own file, part 10. [\#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Remove doubled const `TABLE_IS_READ_ONLY`. [\#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([filimonov](https://github.com/filimonov)) -- Formatting changes for `StringHashMap` PR [\#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [\#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([akuzm](https://github.com/akuzm)) -- Better subquery for join creation in `ExpressionAnalyzer`. [\#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Artem Zuikov](https://github.com/4ertus2)) -- Remove a redundant condition (found by PVS Studio). [\#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([akuzm](https://github.com/akuzm)) -- Separate the hash table interface for `ReverseIndex`. [\#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([akuzm](https://github.com/akuzm)) -- Refactoring of settings. [\#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([alesapin](https://github.com/alesapin)) -- Add comments for `set` index functions. [\#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([Nikita Vasilev](https://github.com/nikvas0)) -- Increase OOM score in debug version on Linux. [\#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([akuzm](https://github.com/akuzm)) -- HDFS HA now work in debug build. [\#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([Weiqing Xu](https://github.com/weiqxu)) -- Added a test to `transform_query_for_external_database`. [\#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add test for multiple materialized views for Kafka table. [\#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([Ivan](https://github.com/abyss7)) -- Make a better build scheme. [\#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([Ivan](https://github.com/abyss7)) -- Fixed `test_external_dictionaries` integration in case it was executed under non root user. [\#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- The bug reproduces when total size of written packets exceeds `DBMS_DEFAULT_BUFFER_SIZE`. [\#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([Yuriy Baranov](https://github.com/yurriy)) -- Added a test for `RENAME` table race condition [\#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid data race on Settings in `KILL QUERY`. [\#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add integration test for handling errors by a cache dictionary. [\#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([Vitaly Baranov](https://github.com/vitlibar)) -- Disable parsing of ELF object files on Mac OS, because it makes no sense. [\#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Attempt to make changelog generator better. [\#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Adding `-Wshadow` switch to the GCC. [\#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) -- Removed obsolete code for `mimalloc` support. [\#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `zlib-ng` determines x86 capabilities and saves this info to global variables. This is done in defalteInit call, which may be made by different threads simultaneously. To avoid multithreaded writes, do it on library startup. [\#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([akuzm](https://github.com/akuzm)) -- Regression test for a bug which in join which was fixed in [\#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [\#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Bakhtiyor Ruziev](https://github.com/theruziev)) -- Fixed MSan report. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix flapping TTL test. [\#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed false data race in `MergeTreeDataPart::is_frozen` field. [\#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed timeouts in fuzz test. In previous version, it managed to find false hangup in query `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [\#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added debug checks to `static_cast` of columns. [\#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Support for Oracle Linux in official RPM packages. [\#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [\#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Changed json perftests from `once` to `loop` type. [\#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- `odbc-bridge.cpp` defines `main()` so it should not be included in `clickhouse-lib`. [\#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Orivej Desh](https://github.com/orivej)) -- Test for crash in `FULL|RIGHT JOIN` with nulls in right table’s keys. [\#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Artem Zuikov](https://github.com/4ertus2)) -- Added a test for the limit on expansion of aliases just in case. [\#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Switched from `boost::filesystem` to `std::filesystem` where appropriate. [\#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [\#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added RPM packages to website. [\#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add a test for fixed `Unknown identifier` exception in `IN` section. [\#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Artem Zuikov](https://github.com/4ertus2)) -- Simplify `shared_ptr_helper` because people facing difficulties understanding it. [\#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added performance tests for fixed Gorilla and DoubleDelta codec. [\#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([Vasily Nemkov](https://github.com/Enmk)) -- Split the integration test `test_dictionaries` into 4 separate tests. [\#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix PVS-Studio warning in `PipelineExecutor`. [\#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Allow to use `library` dictionary source with ASan. [\#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added option to generate changelog from a list of PRs. [\#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Lock the `TinyLog` storage when reading. [\#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([akuzm](https://github.com/akuzm)) -- Check for broken symlinks in CI. [\#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Increase timeout for “stack overflow” test because it may take a long time in debug build. [\#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added a check for double whitespaces. [\#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `new/delete` memory tracking when build with sanitizers. Tracking is not clear. It only prevents memory limit exceptions in tests. [\#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Artem Zuikov](https://github.com/4ertus2)) -- Enable back the check of undefined symbols while linking. [\#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([Ivan](https://github.com/abyss7)) -- Avoid rebuilding `hyperscan` every day. [\#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed UBSan report in `ProtobufWriter`. [\#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Don’t allow to use query profiler with sanitizers because it is not compatible. [\#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add test for reloading a dictionary after fail by timer. [\#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix inconsistency in `PipelineExecutor::prepareProcessor` argument type. [\#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Added a test for bad URIs. [\#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added more checks to `CAST` function. This should get more information about segmentation fault in fuzzy test. [\#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Added `gcc-9` support to `docker/builder` container that builds image locally. [\#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([Gleb Novikov](https://github.com/NanoBjorn)) -- Test for primary key with `LowCardinality(String)`. [\#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [\#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) -- Fixed tests affected by slow stack traces printing. [\#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add a test case for crash in `groupUniqArray` fixed in [\#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [\#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [\#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([akuzm](https://github.com/akuzm)) -- Fixed indices mutations tests. [\#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([Nikita Vasilev](https://github.com/nikvas0)) -- In performance test, do not read query log for queries we didn’t run. [\#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([akuzm](https://github.com/akuzm)) -- Materialized view now could be created with any low cardinality types regardless to the setting about suspicious low cardinality types. [\#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Olga Khvostikova](https://github.com/stavrolia)) -- Updated tests for `send_logs_level` setting. [\#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix build under gcc-8.2. [\#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Max Akhmedov](https://github.com/zlobober)) -- Fix build with internal libc++. [\#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([Ivan](https://github.com/abyss7)) -- Fix shared build with `rdkafka` library [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) -- Fixes for Mac OS build (incomplete). [\#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([alexey-milovidov](https://github.com/alexey-milovidov)) [\#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([alex-zaitsev](https://github.com/alex-zaitsev)) -- Fix “splitted” build. [\#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Other build fixes: [\#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([Amos Bird](https://github.com/amosbird)) [\#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [\#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [\#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([Ivan](https://github.com/abyss7)) [\#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [\#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [\#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) - -#### Backward Incompatible Change {#backward-incompatible-change-3} - -- Removed rarely used table function `catBoostPool` and storage `CatBoostPool`. If you have used this table function, please write email to `clickhouse-feedback@yandex-team.com`. Note that CatBoost integration remains and will be supported. [\#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Disable `ANY RIGHT JOIN` and `ANY FULL JOIN` by default. Set `any_join_distinct_right_table_keys` setting to enable them. [\#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [\#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Artem Zuikov](https://github.com/4ertus2)) - -## ClickHouse release 19.13 {#clickhouse-release-19-13} - -### ClickHouse release 19.13.6.51, 2019-10-02 {#clickhouse-release-19-13-6-51-2019-10-02} - -#### Bug Fix {#bug-fix-9} - -- This release also contains all bug fixes from 19.11.12.69. - -### ClickHouse release 19.13.5.44, 2019-09-20 {#clickhouse-release-19-13-5-44-2019-09-20} - -#### Bug Fix {#bug-fix-10} - -- This release also contains all bug fixes from 19.14.6.12. -- Fixed possible inconsistent state of table while executing `DROP` query for replicated table while zookeeper is not accessible. [\#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [\#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Fix for data race in StorageMerge [\#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug introduced in query profiler which leads to endless recv from socket. [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([alesapin](https://github.com/alesapin)) -- Fix excessive CPU usage while executing `JSONExtractRaw` function over a boolean value. [\#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixes the regression while pushing to materialized view. [\#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([Ivan](https://github.com/abyss7)) -- Table function `url` had the vulnerability allowed the attacker to inject arbitrary HTTP headers in the request. This issue was found by [Nikita Tikhomirov](https://github.com/NSTikhomirov). [\#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix useless `AST` check in Set index. [\#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [\#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([Nikita Vasilev](https://github.com/nikvas0)) -- Fixed parsing of `AggregateFunction` values embedded in query. [\#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [\#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fixed wrong behaviour of `trim` functions family. [\#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.13.4.32, 2019-09-10 {#clickhouse-release-19-13-4-32-2019-09-10} - -#### Bug Fix {#bug-fix-11} - -- This release also contains all bug security fixes from 19.11.9.52 and 19.11.10.54. -- Fixed data race in `system.parts` table and `ALTER` query. [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed mismatched header in streams happened in case of reading from empty distributed table with sample and prewhere. [\#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([Lixiang Qian](https://github.com/fancyqlx)) [\#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed crash when using `IN` clause with a subquery with a tuple. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -- Fix case with same column names in `GLOBAL JOIN ON` section. [\#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix crash when casting types to `Decimal` that do not support it. Throw exception instead. [\#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed crash in `extractAll()` function. [\#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Artem Zuikov](https://github.com/4ertus2)) -- Query transformation for `MySQL`, `ODBC`, `JDBC` table functions now works properly for `SELECT WHERE` queries with multiple `AND` expressions. [\#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [\#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) -- Added previous declaration checks for MySQL 8 integration. [\#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([Rafael David Tinoco](https://github.com/rafaeldtinoco)) - -#### Security Fix {#security-fix-1} - -- Fix two vulnerabilities in codecs in decompression phase (malicious user can fabricate compressed data that will lead to buffer overflow in decompression). [\#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.13.3.26, 2019-08-22 {#clickhouse-release-19-13-3-26-2019-08-22} - -#### Bug Fix {#bug-fix-12} - -- Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -- Fix NPE when using IN clause with a subquery with a tuple. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -- Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -- Fixed issue with parsing CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) -- Fixed data race in system.parts table and ALTER query. This fixes [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Security Fix {#security-fix-2} - -- If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse run, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.13.2.19, 2019-08-14 {#clickhouse-release-19-13-2-19-2019-08-14} - -#### New Feature {#new-feature-5} - -- Sampling profiler on query level. [Example](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [\#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([laplab](https://github.com/laplab)) [\#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([alexey-milovidov](https://github.com/alexey-milovidov)) [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) -- Allow to specify a list of columns with `COLUMNS('regexp')` expression that works like a more sophisticated variant of `*` asterisk. [\#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([mfridental](https://github.com/mfridental)), ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `CREATE TABLE AS table_function()` is now possible [\#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) -- Adam optimizer for stochastic gradient descent is used by default in `stochasticLinearRegression()` and `stochasticLogisticRegression()` aggregate functions, because it shows good quality without almost any tuning. [\#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) -- Added functions for working with the сustom week number [\#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([Andy Yang](https://github.com/andyyzh)) -- `RENAME` queries now work with all storages. [\#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([Ivan](https://github.com/abyss7)) -- Now client receive logs from server with any desired level by setting `send_logs_level` regardless to the log level specified in server settings. [\#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) - -#### Backward Incompatible Change {#backward-incompatible-change-4} - -- The setting `input_format_defaults_for_omitted_fields` is enabled by default. Inserts in Distributed tables need this setting to be the same on cluster (you need to set it before rolling update). It enables calculation of complex default expressions for omitted fields in `JSONEachRow` and `CSV*` formats. It should be the expected behavior but may lead to negligible performance difference. [\#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Artem Zuikov](https://github.com/4ertus2)), [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) - -#### Experimental features {#experimental-features} - -- New query processing pipeline. Use `experimental_use_processors=1` option to enable it. Use for your own trouble. [\#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Bug Fix {#bug-fix-13} - -- Kafka integration has been fixed in this version. -- Fixed `DoubleDelta` encoding of `Int64` for large `DoubleDelta` values, improved `DoubleDelta` encoding for random data for `Int32`. [\#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([Vasily Nemkov](https://github.com/Enmk)) -- Fixed overestimation of `max_rows_to_read` if the setting `merge_tree_uniform_read_distribution` is set to 0. [\#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvement {#improvement-4} - -- Throws an exception if `config.d` file doesn’t have the corresponding root element as the config file [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) - -#### Performance Improvement {#performance-improvement-3} - -- Optimize `count()`. Now it uses the smallest column (if possible). [\#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([Amos Bird](https://github.com/amosbird)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-5} - -- Report memory usage in performance tests. [\#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([akuzm](https://github.com/akuzm)) -- Fix build with external `libcxx` [\#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([Ivan](https://github.com/abyss7)) -- Fix shared build with `rdkafka` library [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) - -## ClickHouse release 19.11 {#clickhouse-release-19-11} - -### ClickHouse release 19.11.13.74, 2019-11-01 {#clickhouse-release-19-11-13-74-2019-11-01} - -#### Bug Fix {#bug-fix-14} - -- Fixed rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows). [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) -- Manual update of `SIMDJSON`. This fixes possible flooding of stderr files with bogus json diagnostic messages. [\#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Alexander Kazakov](https://github.com/Akazz)) -- Fixed bug with `mrk` file extension for mutations ([alesapin](https://github.com/alesapin)) - -### ClickHouse release 19.11.12.69, 2019-10-02 {#clickhouse-release-19-11-12-69-2019-10-02} - -#### Bug Fix {#bug-fix-15} - -- Fixed performance degradation of index analysis on complex keys on large tables. This fixes [\#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid rare SIGSEGV while sending data in tables with Distributed engine (`Failed to send batch: file with index XXXXX is absent`). [\#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Azat Khuzhin](https://github.com/azat)) -- Fix `Unknown identifier` with multiple joins. This fixes [\#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.11.11.57, 2019-09-13 {#clickhouse-release-19-11-11-57-2019-09-13} - -- Fix logical error causing segfaults when selecting from Kafka empty topic. [\#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) -- Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) - -### ClickHouse release 19.11.10.54, 2019-09-10 {#clickhouse-release-19-11-10-54-2019-09-10} - -#### Bug Fix {#bug-fix-16} - -- Do store offsets for Kafka messages manually to be able to commit them all at once for all partitions. Fixes potential duplication in “one consumer - many partitions” scenario. [\#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([Ivan](https://github.com/abyss7)) - -### ClickHouse release 19.11.9.52, 2019-09-6 {#clickhouse-release-19-11-9-52-2019-09-6} - -- Improve error handling in cache dictionaries. [\#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixed bug in function `arrayEnumerateUniqRanked`. [\#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) -- Fix `JSONExtract` function while extracting a `Tuple` from JSON. [\#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) -- Fixed performance test. [\#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Parquet: Fix reading boolean columns. [\#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed wrong behaviour of `nullIf` function for constant arguments. [\#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([Guillaume Tassery](https://github.com/YiuRULE)) [\#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix Kafka messages duplication problem on normal server restart. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) -- Fixed an issue when long `ALTER UPDATE` or `ALTER DELETE` may prevent regular merges to run. Prevent mutations from executing if there is no enough free threads available. [\#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [\#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([tavplubix](https://github.com/tavplubix)) -- Fixed error with processing “timezone” in server configuration file. [\#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix kafka tests. [\#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([Ivan](https://github.com/abyss7)) - -#### Security Fix {#security-fix-3} - -- If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse runs, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.8.46, 2019-08-22 {#clickhouse-release-19-11-8-46-2019-08-22} - -#### Bug Fix {#bug-fix-17} - -- Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -- Fix NPE when using IN clause with a subquery with a tuple. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -- Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -- Fixed issue with parsing CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) -- Fixed data race in system.parts table and ALTER query. This fixes [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.7.40, 2019-08-14 {#clickhouse-release-19-11-7-40-2019-08-14} - -#### Bug fix {#bug-fix-18} - -- Kafka integration has been fixed in this version. -- Fix segfault when using `arrayReduce` for constant arguments. [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `toFloat()` monotonicity. [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -- Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) -- Fixed logic of `arrayEnumerateUniqRanked` function. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Removed extra verbose logging from MySQL handler. [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) -- Do not expose virtual columns in `system.columns` table. This is required for backward compatibility. [\#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug with memory allocation for string fields in complex key cache dictionary. [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) -- Fix bug with enabling adaptive granularity when creating new replica for `Replicated*MergeTree` table. [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) -- Fix infinite loop when reading Kafka messages. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) -- Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser and possibility of stack overflow in `Merge` and `Distributed` tables [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed Gorilla encoding error on small sequences. [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) - -#### Improvement {#improvement-5} - -- Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.5.28, 2019-08-05 {#clickhouse-release-19-11-5-28-2019-08-05} - -#### Bug fix {#bug-fix-19} - -- Fixed the possibility of hanging queries when server is overloaded. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix FPE in yandexConsistentHash function. This fixes [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix parsing of `bool` settings from `true` and `false` strings in configuration files. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) -- Fix rare bug with incompatible stream headers in queries to `Distributed` table over `MergeTree` table when part of `WHERE` moves to `PREWHERE`. [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) -- Fixed overflow in integer division of signed type to unsigned type. This fixes [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Backward Incompatible Change {#backward-incompatible-change-5} - -- `Kafka` still broken. - -### ClickHouse release 19.11.4.24, 2019-08-01 {#clickhouse-release-19-11-4-24-2019-08-01} - -#### Bug Fix {#bug-fix-20} - -- Fix bug with writing secondary indices marks with adaptive granularity. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) -- Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed hang in `JSONExtractRaw` function. Fixed [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix segfault in ExternalLoader::reloadOutdated(). [\#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixed the case when server may close listening sockets but not shutdown and continue serving remaining queries. You may end up with two running clickhouse-server processes. Sometimes, the server may return an error `bad_function_call` for remaining queries. [\#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed useless and incorrect condition on update field for initial loading of external dictionaries via ODBC, MySQL, ClickHouse and HTTP. This fixes [\#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [\#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn’t contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix non-deterministic result of “uniq” aggregate function in extreme rare cases. The bug was present in all ClickHouse versions. [\#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Segfault when we set a little bit too high CIDR on the function `IPv6CIDRToRange`. [\#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Fixed small memory leak when server throw many exceptions from many different contexts. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix the situation when consumer got paused before subscription and not resumed afterwards. [\#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. -- Clearing the Kafka data buffer from the previous read operation that was completed with an error [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) Note that Kafka is broken in this version. -- Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-6} - -- Added official `rpm` packages. [\#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([alesapin](https://github.com/alesapin)) -- Add an ability to build `.rpm` and `.tgz` packages with `packager` script. [\#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([alesapin](https://github.com/alesapin)) -- Fixes for “Arcadia” build system. [\#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) - -#### Backward Incompatible Change {#backward-incompatible-change-6} - -- `Kafka` is broken in this version. - -### ClickHouse release 19.11.3.11, 2019-07-18 {#clickhouse-release-19-11-3-11-2019-07-18} - -#### New Feature {#new-feature-6} - -- Added support for prepared statements. [\#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([Alexander](https://github.com/sanych73)) [\#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `DoubleDelta` and `Gorilla` column codecs [\#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([Vasily Nemkov](https://github.com/Enmk)) -- Added `os_thread_priority` setting that allows to control the “nice” value of query processing threads that is used by OS to adjust dynamic scheduling priority. It requires `CAP_SYS_NICE` capabilities to work. This implements [\#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [\#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Implement `_topic`, `_offset`, `_key` columns for Kafka engine [\#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. -- Add aggregate function combinator `-Resample` [\#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) -- Aggregate functions `groupArrayMovingSum(win_size)(x)` and `groupArrayMovingAvg(win_size)(x)`, which calculate moving sum/avg with or without window-size limitation. [\#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) -- Add synonim `arrayFlatten` \<-\> `flatten` [\#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) -- Intergate H3 function `geoToH3` from Uber. [\#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Remen Ivan](https://github.com/BHYCHIK)) [\#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Bug Fix {#bug-fix-21} - -- Implement DNS cache with asynchronous update. Separate thread resolves all hosts and updates DNS cache with period (setting `dns_cache_update_period`). It should help, when ip of hosts changes frequently. [\#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([Anton Popov](https://github.com/CurtizJ)) -- Fix segfault in `Delta` codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) -- Fix segfault in TTL merge with non-physical columns in block. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) -- Fix rare bug in checking of part with `LowCardinality` column. Previously `checkDataPart` always fails for part with `LowCardinality` column. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) -- Avoid hanging connections when server thread pool is full. It is important for connections from `remote` table function or connections to a shard without replicas when there is long connection timeout. This fixes [\#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [\#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Support for constant arguments to `evalMLModel` function. This fixes [\#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed the issue when ClickHouse determines default time zone as `UCT` instead of `UTC`. This fixes [\#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed buffer underflow in `visitParamExtractRaw`. This fixes [\#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [\#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now distributed `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` queries will be executed directly on leader replica. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) -- Fix `coalesce` for `ColumnConst` with `ColumnNullable` + related changes. [\#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix the `ReadBufferFromKafkaConsumer` so that it keeps reading new messages after `commit()` even if it was stalled before [\#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([Ivan](https://github.com/abyss7)) -- Fix `FULL` and `RIGHT` JOIN results when joining on `Nullable` keys in right table. [\#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Artem Zuikov](https://github.com/4ertus2)) -- Possible fix of infinite sleeping of low-priority queries. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix race condition, which cause that some queries may not appear in query\_log after `SYSTEM FLUSH LOGS` query. [\#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed `heap-use-after-free` ASan warning in ClusterCopier caused by watch which try to use already removed copier object. [\#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed wrong `StringRef` pointer returned by some implementations of `IColumn::deserializeAndInsertFromArena`. This bug affected only unit-tests. [\#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Prevent source and intermediate array join columns of masking same name columns. [\#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix insert and select query to MySQL engine with MySQL style identifier quoting. [\#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Winter Zhang](https://github.com/zhang2014)) -- Now `CHECK TABLE` query can work with MergeTree engine family. It returns check status and message if any for each part (or file in case of simplier engines). Also, fix bug in fetch of a broken part. [\#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([alesapin](https://github.com/alesapin)) -- Fix SPLIT\_SHARED\_LIBRARIES runtime [\#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Danila Kutenin](https://github.com/danlark1)) -- Fixed time zone initialization when `/etc/localtime` is a relative symlink like `../usr/share/zoneinfo/Europe/Moscow` [\#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- clickhouse-copier: Fix use-after free on shutdown [\#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) -- Updated `simdjson`. Fixed the issue that some invalid JSONs with zero bytes successfully parse. [\#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix shutdown of SystemLogs [\#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Anton Popov](https://github.com/CurtizJ)) -- Fix hanging when condition in invalidate\_query depends on a dictionary. [\#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([Vitaly Baranov](https://github.com/vitlibar)) - -#### Improvement {#improvement-6} - -- Allow unresolvable addresses in cluster configuration. They will be considered unavailable and tried to resolve at every connection attempt. This is especially useful for Kubernetes. This fixes [\#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [\#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Close idle TCP connections (with one hour timeout by default). This is especially important for large clusters with multiple distributed tables on every server, because every server can possibly keep a connection pool to every other server, and after peak query concurrency, connections will stall. This fixes [\#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [\#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Better quality of `topK` function. Changed the SavingSpace set behavior to remove the last element if the new element have a bigger weight. [\#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [\#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([Guillaume Tassery](https://github.com/YiuRULE)) -- URL functions to work with domains now can work for incomplete URLs without scheme [\#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([alesapin](https://github.com/alesapin)) -- Checksums added to the `system.parts_columns` table. [\#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Added `Enum` data type as a synonim for `Enum8` or `Enum16`. [\#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) -- Full bit transpose variant for `T64` codec. Could lead to better compression with `zstd`. [\#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Artem Zuikov](https://github.com/4ertus2)) -- Condition on `startsWith` function now can uses primary key. This fixes [\#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) and [\#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [\#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) -- Allow to use `clickhouse-copier` with cross-replication cluster topology by permitting empty database name. [\#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([nvartolomei](https://github.com/nvartolomei)) -- Use `UTC` as default timezone on a system without `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` was printed and server or client refused to start. [\#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Returned back support for floating point argument in function `quantileTiming` for backward compatibility. [\#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Show which table is missing column in error messages. [\#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([Ivan](https://github.com/abyss7)) -- Disallow run query with same query\_id by various users [\#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) -- More robust code for sending metrics to Graphite. It will work even during long multiple `RENAME TABLE` operation. [\#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- More informative error messages will be displayed when ThreadPool cannot schedule a task for execution. This fixes [\#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [\#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Inverting ngramSearch to be more intuitive [\#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Danila Kutenin](https://github.com/danlark1)) -- Add user parsing in HDFS engine builder [\#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([akonyaev90](https://github.com/akonyaev90)) -- Update default value of `max_ast_elements parameter` [\#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Artem Konovalov](https://github.com/izebit)) -- Added a notion of obsolete settings. The obsolete setting `allow_experimental_low_cardinality_type` can be used with no effect. [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Alexey Milovidov](https://github.com/alexey-milovidov) - -#### Performance Improvement {#performance-improvement-4} - -- Increase number of streams to SELECT from Merge table for more uniform distribution of threads. Added setting `max_streams_multiplier_for_merge_tables`. This fixes [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-7} - -- Add a backward compatibility test for client-server interaction with different versions of clickhouse. [\#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([alesapin](https://github.com/alesapin)) -- Test coverage information in every commit and pull request. [\#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([alesapin](https://github.com/alesapin)) -- Cooperate with address sanitizer to support our custom allocators (`Arena` and `ArenaWithFreeLists`) for better debugging of “use-after-free” errors. [\#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([akuzm](https://github.com/akuzm)) -- Switch to [LLVM libunwind implementation](https://github.com/llvm-mirror/libunwind) for C++ exception handling and for stack traces printing [\#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([Nikita Lapkov](https://github.com/laplab)) -- Add two more warnings from -Weverything [\#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to build ClickHouse with Memory Sanitizer. [\#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed ubsan report about `bitTest` function in fuzz test. [\#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Docker: added possibility to init a ClickHouse instance which requires authentication. [\#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([Korviakov Andrey](https://github.com/shurshun)) -- Update librdkafka to version 1.1.0 [\#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([Ivan](https://github.com/abyss7)) -- Add global timeout for integration tests and disable some of them in tests code. [\#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([alesapin](https://github.com/alesapin)) -- Fix some ThreadSanitizer failures. [\#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([akuzm](https://github.com/akuzm)) -- The `--no-undefined` option forces the linker to check all external names for existence while linking. It’s very useful to track real dependencies between libraries in the split build mode. [\#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([Ivan](https://github.com/abyss7)) -- Added performance test for [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed compatibility with gcc-7. [\#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added support for gcc-9. This fixes [\#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [\#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error when libunwind can be linked incorrectly. [\#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a few warnings found by PVS-Studio. [\#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added initial support for `clang-tidy` static analyzer. [\#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Convert BSD/Linux endian macros( ‘be64toh’ and ‘htobe64’) to the Mac OS X equivalents [\#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([Fu Chen](https://github.com/fredchenbj)) -- Improved integration tests guide. [\#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixing build at macosx + gcc9 [\#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([filimonov](https://github.com/filimonov)) -- Fix a hard-to-spot typo: aggreAGte -\> aggregate. [\#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([akuzm](https://github.com/akuzm)) -- Fix freebsd build [\#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) -- Add link to experimental YouTube channel to website [\#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([Ivan Blinkov](https://github.com/blinkov)) -- CMake: add option for coverage flags: WITH\_COVERAGE [\#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) -- Fix initial size of some inline PODArray’s. [\#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([akuzm](https://github.com/akuzm)) -- clickhouse-server.postinst: fix os detection for centos 6 [\#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) -- Added Arch linux package generation. [\#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Split Common/config.h by libs (dbms) [\#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) -- Fixes for “Arcadia” build platform [\#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) -- Fixes for unconventional build (gcc9, no submodules) [\#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) -- Require explicit type in unalignedStore because it was proven to be bug-prone [\#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([akuzm](https://github.com/akuzm)) -- Fixes MacOS build [\#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([filimonov](https://github.com/filimonov)) -- Performance test concerning the new JIT feature with bigger dataset, as requested here [\#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [\#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Run stateful tests in stress test [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([alesapin](https://github.com/alesapin)) - -#### Backward Incompatible Change {#backward-incompatible-change-7} - -- `Kafka` is broken in this version. -- Enable `adaptive_index_granularity` = 10MB by default for new `MergeTree` tables. If you created new MergeTree tables on version 19.11+, downgrade to versions prior to 19.6 will be impossible. [\#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([alesapin](https://github.com/alesapin)) -- Removed obsolete undocumented embedded dictionaries that were used by Yandex.Metrica. The functions `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` are no longer available. If you are using these functions, write email to clickhouse-feedback@yandex-team.com. Note: at the last moment we decided to keep these functions for a while. [\#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release 19.10 {#clickhouse-release-19-10} - -### ClickHouse release 19.10.1.5, 2019-07-12 {#clickhouse-release-19-10-1-5-2019-07-12} - -#### New Feature {#new-feature-7} - -- Add new column codec: `T64`. Made for (U)IntX/EnumX/Data(Time)/DecimalX columns. It should be good for columns with constant or small range values. Codec itself allows enlarge or shrink data type without re-compression. [\#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Artem Zuikov](https://github.com/4ertus2)) -- Add database engine `MySQL` that allow to view all the tables in remote MySQL server [\#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([Winter Zhang](https://github.com/zhang2014)) -- `bitmapContains` implementation. It’s 2x faster than `bitmapHasAny` if the second bitmap contains one element. [\#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([Zhichang Yu](https://github.com/yuzhichang)) -- Support for `crc32` function (with behaviour exactly as in MySQL or PHP). Do not use it if you need a hash function. [\#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Remen Ivan](https://github.com/BHYCHIK)) -- Implemented `SYSTEM START/STOP DISTRIBUTED SENDS` queries to control asynchronous inserts into `Distributed` tables. [\#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([Winter Zhang](https://github.com/zhang2014)) - -#### Bug Fix {#bug-fix-22} - -- Ignore query execution limits and max parts size for merge limits while executing mutations. [\#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([Anton Popov](https://github.com/CurtizJ)) -- Fix bug which may lead to deduplication of normal blocks (extremely rare) and insertion of duplicate blocks (more often). [\#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([alesapin](https://github.com/alesapin)) -- Fix of function `arrayEnumerateUniqRanked` for arguments with empty arrays [\#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) -- Don’t subscribe to Kafka topics without intent to poll any messages. [\#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([Ivan](https://github.com/abyss7)) -- Make setting `join_use_nulls` get no effect for types that cannot be inside Nullable [\#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed `Incorrect size of index granularity` errors [\#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([coraxster](https://github.com/coraxster)) -- Fix Float to Decimal convert overflow [\#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([coraxster](https://github.com/coraxster)) -- Flush buffer when `WriteBufferFromHDFS`’s destructor is called. This fixes writing into `HDFS`. [\#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([Xindong Peng](https://github.com/eejoin)) - -#### Improvement {#improvement-7} - -- Treat empty cells in `CSV` as default values when the setting `input_format_defaults_for_omitted_fields` is enabled. [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) -- Non-blocking loading of external dictionaries. [\#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([Vitaly Baranov](https://github.com/vitlibar)) -- Network timeouts can be dynamically changed for already established connections according to the settings. [\#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Konstantin Podshumok](https://github.com/podshumok)) -- Using “public\_suffix\_list” for functions `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. It’s using a perfect hash table generated by `gperf` with a list generated from the file: https://publicsuffix.org/list/public\_suffix\_list.dat. (for example, now we recognize the domain `ac.uk` as non-significant). [\#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Adopted `IPv6` data type in system tables; unified client info columns in `system.processes` and `system.query_log` [\#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Using sessions for connections with MySQL compatibility protocol. \#5476 [\#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([Yuriy Baranov](https://github.com/yurriy)) -- Support more `ALTER` queries `ON CLUSTER`. [\#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [\#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([sundyli](https://github.com/sundy-li)) -- Support `` section in `clickhouse-local` config file. [\#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) -- Allow run query with `remote` table function in `clickhouse-local` [\#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) - -#### Performance Improvement {#performance-improvement-5} - -- Add the possibility to write the final mark at the end of MergeTree columns. It allows to avoid useless reads for keys that are out of table data range. It is enabled only if adaptive index granularity is in use. [\#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([alesapin](https://github.com/alesapin)) -- Improved performance of MergeTree tables on very slow filesystems by reducing number of `stat` syscalls. [\#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed performance degradation in reading from MergeTree tables that was introduced in version 19.6. Fixes \#5631. [\#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-8} - -- Implemented `TestKeeper` as an implementation of ZooKeeper interface used for testing [\#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([alexey-milovidov](https://github.com/alexey-milovidov)) ([levushkin aleksej](https://github.com/alexey-milovidov)) -- From now on `.sql` tests can be run isolated by server, in parallel, with random database. It allows to run them faster, add new tests with custom server configurations, and be sure that different tests doesn’t affect each other. [\#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([Ivan](https://github.com/abyss7)) -- Remove `` and `` from performance tests [\#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed “select\_format” performance test for `Pretty` formats [\#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release 19.9 {#clickhouse-release-19-9} - -### ClickHouse release 19.9.3.31, 2019-07-05 {#clickhouse-release-19-9-3-31-2019-07-05} - -#### Bug Fix {#bug-fix-23} - -- Fix segfault in Delta codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) -- Fix rare bug in checking of part with LowCardinality column. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) -- Fix segfault in TTL merge with non-physical columns in block. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) -- Fix potential infinite sleeping of low-priority queries. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix how ClickHouse determines default time zone as UCT instead of UTC. [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug about executing distributed DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER queries on follower replica before leader replica. Now they will be executed directly on leader replica. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) -- Fix race condition, which cause that some queries may not appear in query\_log instantly after SYSTEM FLUSH LOGS query. [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) -- Added missing support for constant arguments to `evalMLModel` function. [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.9.2.4, 2019-06-24 {#clickhouse-release-19-9-2-4-2019-06-24} - -#### New Feature {#new-feature-8} - -- Print information about frozen parts in `system.parts` table. [\#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) -- Ask client password on clickhouse-client start on tty if not set in arguments [\#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) -- Implement `dictGet` and `dictGetOrDefault` functions for Decimal types. [\#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvement {#improvement-8} - -- Debian init: Add service stop timeout [\#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) -- Add setting forbidden by default to create table with suspicious types for LowCardinality [\#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Olga Khvostikova](https://github.com/stavrolia)) -- Regression functions return model weights when not used as State in function `evalMLMethod`. [\#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) -- Rename and improve regression methods. [\#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) -- Clearer interfaces of string searchers. [\#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Danila Kutenin](https://github.com/danlark1)) - -#### Bug Fix {#bug-fix-24} - -- Fix potential data loss in Kafka [\#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([Ivan](https://github.com/abyss7)) -- Fix potential infinite loop in `PrettySpace` format when called with zero columns [\#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed UInt32 overflow bug in linear models. Allow eval ML model for non-const model argument. [\#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- `ALTER TABLE ... DROP INDEX IF EXISTS ...` should not raise an exception if provided index does not exist [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) -- Fix segfault with `bitmapHasAny` in scalar subquery [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fixed error when replication connection pool doesn’t retry to resolve host, even when DNS cache was dropped. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) -- Fixed `ALTER ... MODIFY TTL` on ReplicatedMergeTree. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([Anton Popov](https://github.com/CurtizJ)) -- Fix INSERT into Distributed table with MATERIALIZED column [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) -- Fix bad alloc when truncate Join storage [\#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) -- In recent versions of package tzdata some of files are symlinks now. The current mechanism for detecting default timezone gets broken and gives wrong names for some timezones. Now at least we force the timezone name to the contents of TZ if provided. [\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) -- Fix some extremely rare cases with MultiVolnitsky searcher when the constant needles in sum are at least 16KB long. The algorithm missed or overwrote the previous results which can lead to the incorrect result of `multiSearchAny`. [\#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Danila Kutenin](https://github.com/danlark1)) -- Fix the issue when settings for ExternalData requests couldn’t use ClickHouse settings. Also, for now, settings `date_time_input_format` and `low_cardinality_allow_in_native_format` cannot be used because of the ambiguity of names (in external data it can be interpreted as table format and in the query it can be a setting). [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila Kutenin](https://github.com/danlark1)) -- Fix bug when parts were removed only from FS without dropping them from Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) -- Remove debug logging from MySQL protocol [\#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Skip ZNONODE during DDL query processing [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) -- Fix mix `UNION ALL` result column type. There were cases with inconsistent data and column types of resulting columns. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) -- Throw an exception on wrong integers in `dictGetT` functions instead of crash. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix wrong element\_count and load\_factor for hashed dictionary in `system.dictionaries` table. [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-9} - -- Fixed build without `Brotli` HTTP compression support (`ENABLE_BROTLI=OFF` cmake variable). [\#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Anton Yuzhaninov](https://github.com/citrin)) -- Include roaring.h as roaring/roaring.h [\#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Orivej Desh](https://github.com/orivej)) -- Fix gcc9 warnings in hyperscan (\#line directive is evil!) [\#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Danila Kutenin](https://github.com/danlark1)) -- Fix all warnings when compiling with gcc-9. Fix some contrib issues. Fix gcc9 ICE and submit it to bugzilla. [\#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Danila Kutenin](https://github.com/danlark1)) -- Fixed linking with lld [\#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Remove unused specializations in dictionaries [\#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Artem Zuikov](https://github.com/4ertus2)) -- Improvement performance tests for formatting and parsing tables for different types of files [\#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixes for parallel test run [\#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) -- Docker: use configs from clickhouse-test [\#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) -- Fix compile for FreeBSD [\#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) -- Upgrade boost to 1.70 [\#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) -- Fix build clickhouse as submodule [\#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) -- Improve JSONExtract performance tests [\#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([Vitaly Baranov](https://github.com/vitlibar)) - -## ClickHouse release 19.8 {#clickhouse-release-19-8} - -### ClickHouse release 19.8.3.8, 2019-06-11 {#clickhouse-release-19-8-3-8-2019-06-11} - -#### New Features {#new-features} - -- Added functions to work with JSON [\#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [\#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([Vitaly Baranov](https://github.com/vitlibar)) -- Add a function basename, with a similar behaviour to a basename function, which exists in a lot of languages (`os.path.basename` in python, `basename` in PHP, etc…). Work with both an UNIX-like path or a Windows path. [\#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Added `LIMIT n, m BY` or `LIMIT m OFFSET n BY` syntax to set offset of n for LIMIT BY clause. [\#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([Anton Popov](https://github.com/CurtizJ)) -- Added new data type `SimpleAggregateFunction`, which allows to have columns with light aggregation in an `AggregatingMergeTree`. This can only be used with simple functions like `any`, `anyLast`, `sum`, `min`, `max`. [\#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Boris Granveaud](https://github.com/bgranvea)) -- Added support for non-constant arguments in function `ngramDistance` [\#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Danila Kutenin](https://github.com/danlark1)) -- Added functions `skewPop`, `skewSamp`, `kurtPop` and `kurtSamp` to compute for sequence skewness, sample skewness, kurtosis and sample kurtosis respectively. [\#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) -- Support rename operation for `MaterializeView` storage. [\#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Added server which allows connecting to ClickHouse using MySQL client. [\#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([Yuriy Baranov](https://github.com/yurriy)) -- Add `toDecimal*OrZero` and `toDecimal*OrNull` functions. [\#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Artem Zuikov](https://github.com/4ertus2)) -- Support Decimal types in functions: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted`, medianExactWeighted. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) -- Added `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [\#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Danila Kutenin](https://github.com/danlark1)) -- Added `format` function. Formatting constant pattern (simplified Python format pattern) with the strings listed in the arguments. [\#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Danila Kutenin](https://github.com/danlark1)) -- Added `system.detached_parts` table containing information about detached parts of `MergeTree` tables. [\#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([akuzm](https://github.com/akuzm)) -- Added `ngramSearch` function to calculate the non-symmetric difference between needle and haystack. [\#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[\#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Danila Kutenin](https://github.com/danlark1)) -- Implementation of basic machine learning methods (stochastic linear regression and logistic regression) using aggregate functions interface. Has different strategies for updating model weights (simple gradient descent, momentum method, Nesterov method). Also supports mini-batches of custom size. [\#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) -- Implementation of `geohashEncode` and `geohashDecode` functions. [\#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([Vasily Nemkov](https://github.com/Enmk)) -- Added aggregate function `timeSeriesGroupSum`, which can aggregate different time series that sample timestamp not alignment. It will use linear interpolation between two sample timestamp and then sum time-series together. Added aggregate function `timeSeriesGroupRateSum`, which calculates the rate of time-series and then sum rates together. [\#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([Yangkuan Liu](https://github.com/LiuYangkuan)) -- Added functions `IPv4CIDRtoIPv4Range` and `IPv6CIDRtoIPv6Range` to calculate the lower and higher bounds for an IP in the subnet using a CIDR. [\#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Add a X-ClickHouse-Summary header when we send a query using HTTP with enabled setting `send_progress_in_http_headers`. Return the usual information of X-ClickHouse-Progress, with additional information like how many rows and bytes were inserted in the query. [\#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([Guillaume Tassery](https://github.com/YiuRULE)) - -#### Improvements {#improvements} - -- Added `max_parts_in_total` setting for MergeTree family of tables (default: 100 000) that prevents unsafe specification of partition key \#5166. [\#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `clickhouse-obfuscator`: derive seed for individual columns by combining initial seed with column name, not column position. This is intended to transform datasets with multiple related tables, so that tables will remain JOINable after transformation. [\#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added functions `JSONExtractRaw`, `JSONExtractKeyAndValues`. Renamed functions `jsonExtract` to `JSONExtract`. When something goes wrong these functions return the correspondent values, not `NULL`. Modified function `JSONExtract`, now it gets the return type from its last parameter and doesn’t inject nullables. Implemented fallback to RapidJSON in case AVX2 instructions are not available. Simdjson library updated to a new version. [\#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([Vitaly Baranov](https://github.com/vitlibar)) -- Now `if` and `multiIf` functions don’t rely on the condition’s `Nullable`, but rely on the branches for sql compatibility. [\#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([Jian Wu](https://github.com/janplus)) -- `In` predicate now generates `Null` result from `Null` input like the `Equal` function. [\#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([Jian Wu](https://github.com/janplus)) -- Check the time limit every (flush\_interval / poll\_timeout) number of rows from Kafka. This allows to break the reading from Kafka consumer more frequently and to check the time limits for the top-level streams [\#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([Ivan](https://github.com/abyss7)) -- Link rdkafka with bundled SASL. It should allow to use SASL SCRAM authentication [\#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([Ivan](https://github.com/abyss7)) -- Batched version of RowRefList for ALL JOINS. [\#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Artem Zuikov](https://github.com/4ertus2)) -- clickhouse-server: more informative listen error messages. [\#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) -- Support dictionaries in clickhouse-copier for functions in `` [\#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) -- Add new setting `kafka_commit_every_batch` to regulate Kafka committing policy. - It allows to set commit mode: after every batch of messages is handled, or after the whole block is written to the storage. It’s a trade-off between losing some messages or reading them twice in some extreme situations. [\#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([Ivan](https://github.com/abyss7)) -- Make `windowFunnel` support other Unsigned Integer Types. [\#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([sundyli](https://github.com/sundy-li)) -- Allow to shadow virtual column `_table` in Merge engine. [\#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([Ivan](https://github.com/abyss7)) -- Make `sequenceMatch` aggregate functions support other unsigned Integer types [\#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([sundyli](https://github.com/sundy-li)) -- Better error messages if checksum mismatch is most likely caused by hardware failures. [\#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Check that underlying tables support sampling for `StorageMerge` [\#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([Ivan](https://github.com/abyss7)) -- Сlose MySQL connections after their usage in external dictionaries. It is related to issue \#893. [\#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) -- Improvements of MySQL Wire Protocol. Changed name of format to MySQLWire. Using RAII for calling RSA\_free. Disabling SSL if context cannot be created. [\#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([Yuriy Baranov](https://github.com/yurriy)) -- clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, …). [\#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) -- Respect query settings in asynchronous INSERTs into Distributed tables. [\#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) -- Renamed functions `leastSqr` to `simpleLinearRegression`, `LinearRegression` to `linearRegression`, `LogisticRegression` to `logisticRegression`. [\#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Performance Improvements {#performance-improvements} - -- Parallelize processing of parts of non-replicated MergeTree tables in ALTER MODIFY query. [\#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([Ivan Kush](https://github.com/IvanKush)) -- Optimizations in regular expressions extraction. [\#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [\#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Danila Kutenin](https://github.com/danlark1)) -- Do not add right join key column to join result if it’s used only in join on section. [\#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Artem Zuikov](https://github.com/4ertus2)) -- Freeze the Kafka buffer after first empty response. It avoids multiple invokations of `ReadBuffer::next()` for empty result in some row-parsing streams. [\#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([Ivan](https://github.com/abyss7)) -- `concat` function optimization for multiple arguments. [\#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Danila Kutenin](https://github.com/danlark1)) -- Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [\#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Artem Zuikov](https://github.com/4ertus2)) -- Upgrade our LZ4 implementation with reference one to have faster decompression. [\#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Danila Kutenin](https://github.com/danlark1)) -- Implemented MSD radix sort (based on kxsort), and partial sorting. [\#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Evgenii Pravda](https://github.com/kvinty)) - -#### Bug Fixes {#bug-fixes} - -- Fix push require columns with join [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed bug, when ClickHouse is run by systemd, the command `sudo service clickhouse-server forcerestart` was not working as expected. [\#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) -- Fix http error codes in DataPartsExchange (interserver http server on 9009 port always returned code 200, even on errors). [\#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) -- Fix SimpleAggregateFunction for String longer than MAX\_SMALL\_STRING\_SIZE [\#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Azat Khuzhin](https://github.com/azat)) -- Fix error for `Decimal` to `Nullable(Decimal)` conversion in IN. Support other Decimal to Decimal conversions (including different scales). [\#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed FPU clobbering in simdjson library that lead to wrong calculation of `uniqHLL` and `uniqCombined` aggregate function and math functions such as `log`. [\#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed handling mixed const/nonconst cases in JSON functions. [\#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix `retention` function. Now all conditions that satisfy in a row of data are added to the data state. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) -- Fix result type for `quantileExact` with Decimals. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Documentation {#documentation} - -- Translate documentation for `CollapsingMergeTree` to chinese. [\#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) -- Translate some documentation about table engines to chinese. - [\#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) - [\#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) - ([never lee](https://github.com/neverlee)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements} - -- Fix some sanitizer reports that show probable use-after-free.[\#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [\#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [\#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([Ivan](https://github.com/abyss7)) -- Move performance tests out of separate directories for convenience. [\#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix incorrect performance tests. [\#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([alesapin](https://github.com/alesapin)) -- Added a tool to calculate checksums caused by bit flips to debug hardware issues. [\#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Make runner script more usable. [\#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[\#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([filimonov](https://github.com/filimonov)) -- Add small instruction how to write performance tests. [\#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([alesapin](https://github.com/alesapin)) -- Add ability to make substitutions in create, fill and drop query in performance tests [\#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Olga Khvostikova](https://github.com/stavrolia)) - -## ClickHouse release 19.7 {#clickhouse-release-19-7} - -### ClickHouse release 19.7.5.29, 2019-07-05 {#clickhouse-release-19-7-5-29-2019-07-05} - -#### Bug Fix {#bug-fix-25} - -- Fix performance regression in some queries with JOIN. [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) - -### ClickHouse release 19.7.5.27, 2019-06-09 {#clickhouse-release-19-7-5-27-2019-06-09} - -#### New features {#new-features-1} - -- Added bitmap related functions `bitmapHasAny` and `bitmapHasAll` analogous to `hasAny` and `hasAll` functions for arrays. [\#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([Sergi Vladykin](https://github.com/svladykin)) - -#### Bug Fixes {#bug-fixes-1} - -- Fix segfault on `minmax` INDEX with Null value. [\#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([Nikita Vasilev](https://github.com/nikvas0)) -- Mark all input columns in LIMIT BY as required output. It fixes ‘Not found column’ error in some distributed queries. [\#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([Constantin S. Pan](https://github.com/kvap)) -- Fix “Column ‘0’ already exists” error in `SELECT .. PREWHERE` on column with DEFAULT [\#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) -- Fix `ALTER MODIFY TTL` query on `ReplicatedMergeTree`. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([Anton Popov](https://github.com/CurtizJ)) -- Don’t crash the server when Kafka consumers have failed to start. [\#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([Ivan](https://github.com/abyss7)) -- Fixed bitmap functions produce wrong result. [\#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([Andy Yang](https://github.com/andyyzh)) -- Fix element\_count for hashed dictionary (do not include duplicates) [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) -- Use contents of environment variable TZ as the name for timezone. It helps to correctly detect default timezone in some cases.[\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) -- Do not try to convert integers in `dictGetT` functions, because it doesn’t work correctly. Throw an exception instead. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix settings in ExternalData HTTP request. [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila - Kutenin](https://github.com/danlark1)) -- Fix bug when parts were removed only from FS without dropping them from Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) -- Fix segmentation fault in `bitmapHasAny` function. [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fixed error when replication connection pool doesn’t retry to resolve host, even when DNS cache was dropped. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) -- Fixed `DROP INDEX IF EXISTS` query. Now `ALTER TABLE ... DROP INDEX IF EXISTS ...` query doesn’t raise an exception if provided index does not exist. [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) -- Fix union all supertype column. There were cases with inconsistent data and column types of resulting columns. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) -- Skip ZNONODE during DDL query processing. Before if another node removes the znode in task queue, the one that - did not process it, but already get list of children, will terminate the DDLWorker thread. [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) -- Fix INSERT into Distributed() table with MATERIALIZED column. [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) - -### ClickHouse release 19.7.3.9, 2019-05-30 {#clickhouse-release-19-7-3-9-2019-05-30} - -#### New Features {#new-features-2} - -- Allow to limit the range of a setting that can be specified by user. - These constraints can be set up in user settings profile. - [\#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([Vitaly - Baranov](https://github.com/vitlibar)) -- Add a second version of the function `groupUniqArray` with an optional - `max_size` parameter that limits the size of the resulting array. This - behavior is similar to `groupArray(max_size)(x)` function. - [\#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([Guillaume - Tassery](https://github.com/YiuRULE)) -- For TSVWithNames/CSVWithNames input file formats, column order can now be - determined from file header. This is controlled by - `input_format_with_names_use_header` parameter. - [\#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) - ([Alexander](https://github.com/Akazz)) - -#### Bug Fixes {#bug-fixes-2} - -- Crash with uncompressed\_cache + JOIN during merge (\#5197) - [\#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Danila - Kutenin](https://github.com/danlark1)) -- Segmentation fault on a clickhouse-client query to system tables. \#5066 - [\#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) - ([Ivan](https://github.com/abyss7)) -- Data loss on heavy load via KafkaEngine (\#4736) - [\#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) - ([Ivan](https://github.com/abyss7)) -- Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts\_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Performance Improvements {#performance-improvements-1} - -- Use radix sort for sorting by single numeric column in `ORDER BY` without - `LIMIT`. [\#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), - [\#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) - ([Evgenii Pravda](https://github.com/kvinty), - [alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Documentation {#documentation-1} - -- Translate documentation for some table engines to Chinese. - [\#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), - [\#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), - [\#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) - ([张风啸](https://github.com/AlexZFX)), - [\#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([never - lee](https://github.com/neverlee)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-1} - -- Print UTF-8 characters properly in `clickhouse-test`. - [\#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add command line parameter for clickhouse-client to always load suggestion - data. [\#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Resolve some of PVS-Studio warnings. - [\#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Update LZ4 [\#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Danila - Kutenin](https://github.com/danlark1)) -- Add gperf to build requirements for upcoming pull request \#5030. - [\#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) - ([proller](https://github.com/proller)) - -## ClickHouse release 19.6 {#clickhouse-release-19-6} - -### ClickHouse release 19.6.3.18, 2019-06-13 {#clickhouse-release-19-6-3-18-2019-06-13} - -#### Bug Fixes {#bug-fixes-3} - -- Fixed IN condition pushdown for queries from table functions `mysql` and `odbc` and corresponding table engines. This fixes \#3540 and \#2384. [\#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix deadlock in Zookeeper. [\#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) -- Allow quoted decimals in CSV. [\#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Artem Zuikov](https://github.com/4ertus2) -- Disallow conversion from float Inf/NaN into Decimals (throw exception). [\#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix data race in rename query. [\#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([Winter Zhang](https://github.com/zhang2014)) -- Temporarily disable LFAlloc. Usage of LFAlloc might lead to a lot of MAP\_FAILED in allocating UncompressedCache and in a result to crashes of queries at high loaded servers. [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Danila Kutenin](https://github.com/danlark1)) - -### ClickHouse release 19.6.2.11, 2019-05-13 {#clickhouse-release-19-6-2-11-2019-05-13} - -#### New Features {#new-features-3} - -- TTL expressions for columns and tables. [\#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([Anton Popov](https://github.com/CurtizJ)) -- Added support for `brotli` compression for HTTP responses (Accept-Encoding: br) [\#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([Mikhail](https://github.com/fandyushin)) -- Added new function `isValidUTF8` for checking whether a set of bytes is correctly utf-8 encoded. [\#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) -- Add new load balancing policy `first_or_random` which sends queries to the first specified host and if it’s inaccessible send queries to random hosts of shard. Useful for cross-replication topology setups. [\#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) - -#### Experimental Features {#experimental-features-1} - -- Add setting `index_granularity_bytes` (adaptive index granularity) for MergeTree\* tables family. [\#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) - -#### Improvements {#improvements-1} - -- Added support for non-constant and negative size and length arguments for function `substringUTF8`. [\#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Disable push-down to right table in left join, left table in right join, and both tables in full join. This fixes wrong JOIN results in some cases. [\#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([Ivan](https://github.com/abyss7)) -- `clickhouse-copier`: auto upload task configuration from `--task-file` option [\#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) -- Added typos handler for storage factory and table functions factory. [\#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) -- Support asterisks and qualified asterisks for multiple joins without subqueries [\#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) -- Make missing column error message more user friendly. [\#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Performance Improvements {#performance-improvements-2} - -- Significant speedup of ASOF JOIN [\#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) - -#### Backward Incompatible Changes {#backward-incompatible-changes} - -- HTTP header `Query-Id` was renamed to `X-ClickHouse-Query-Id` for consistency. [\#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([Mikhail](https://github.com/fandyushin)) - -#### Bug Fixes {#bug-fixes-4} - -- Fixed potential null pointer dereference in `clickhouse-copier`. [\#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) -- Fixed error on query with JOIN + ARRAY JOIN [\#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed hanging on start of the server when a dictionary depends on another dictionary via a database with engine=Dictionary. [\#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) -- Partially fix distributed\_product\_mode = local. It’s possible to allow columns of local tables in where/having/order by/… via table aliases. Throw exception if table does not have alias. There’s not possible to access to the columns without table aliases yet. [\#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix potentially wrong result for `SELECT DISTINCT` with `JOIN` [\#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts\_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-2} - -- Fixed test failures when running clickhouse-server on different host [\#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) -- clickhouse-test: Disable color control sequences in non tty environment. [\#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([alesapin](https://github.com/alesapin)) -- clickhouse-test: Allow use any test database (remove `test.` qualification where it possible) [\#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) -- Fix ubsan errors [\#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([Vitaly Baranov](https://github.com/vitlibar)) -- Yandex LFAlloc was added to ClickHouse to allocate MarkCache and UncompressedCache data in different ways to catch segfaults more reliable [\#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) -- Python util to help with backports and changelogs. [\#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([Ivan](https://github.com/abyss7)) - -## ClickHouse release 19.5 {#clickhouse-release-19-5} - -### ClickHouse release 19.5.4.22, 2019-05-13 {#clickhouse-release-19-5-4-22-2019-05-13} - -#### Bug fixes {#bug-fixes-5} - -- Fixed possible crash in bitmap\* functions [\#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [\#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([Andy Yang](https://github.com/andyyzh)) -- Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts\_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. This error happened if LowCardinality column was the part of primary key. \#5031 [\#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Modification of retention function: If a row satisfies both the first and NTH condition, only the first satisfied condition is added to the data state. Now all conditions that satisfy in a row of data are added to the data state. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) - -### ClickHouse release 19.5.3.8, 2019-04-18 {#clickhouse-release-19-5-3-8-2019-04-18} - -#### Bug fixes {#bug-fixes-6} - -- Fixed type of setting `max_partitions_per_insert_block` from boolean to UInt64. [\#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([Mohammad Hossein Sekhavat](https://github.com/mhsekhavat)) - -### ClickHouse release 19.5.2.6, 2019-04-15 {#clickhouse-release-19-5-2-6-2019-04-15} - -#### New Features {#new-features-4} - -- [Hyperscan](https://github.com/intel/hyperscan) multiple regular expression matching was added (functions `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [\#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Danila Kutenin](https://github.com/danlark1)) -- `multiSearchFirstPosition` function was added. [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) -- Implement the predefined expression filter per row for tables. [\#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([Ivan](https://github.com/abyss7)) -- A new type of data skipping indices based on bloom filters (can be used for `equal`, `in` and `like` functions). [\#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([Nikita Vasilev](https://github.com/nikvas0)) -- Added `ASOF JOIN` which allows to run queries that join to the most recent value known. [\#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [\#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [\#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [\#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Martijn Bakker](https://github.com/Gladdy), [Artem Zuikov](https://github.com/4ertus2)) -- Rewrite multiple `COMMA JOIN` to `CROSS JOIN`. Then rewrite them to `INNER JOIN` if possible. [\#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvement {#improvement-9} - -- `topK` and `topKWeighted` now supports custom `loadFactor` (fixes issue [\#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [\#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([Kirill Danshin](https://github.com/kirillDanshin)) -- Allow to use `parallel_replicas_count > 1` even for tables without sampling (the setting is simply ignored for them). In previous versions it was lead to exception. [\#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Alexey Elymanov](https://github.com/digitalist)) -- Support for `CREATE OR REPLACE VIEW`. Allow to create a view or set a new definition in a single statement. [\#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Boris Granveaud](https://github.com/bgranvea)) -- `Buffer` table engine now supports `PREWHERE`. [\#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([Yangkuan Liu](https://github.com/LiuYangkuan)) -- Add ability to start replicated table without metadata in zookeeper in `readonly` mode. [\#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([alesapin](https://github.com/alesapin)) -- Fixed flicker of progress bar in clickhouse-client. The issue was most noticeable when using `FORMAT Null` with streaming queries. [\#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to disable functions with `hyperscan` library on per user basis to limit potentially excessive and uncontrolled resource usage. [\#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add version number logging in all errors. [\#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) -- Added restriction to the `multiMatch` functions which requires string size to fit into `unsigned int`. Also added the number of arguments limit to the `multiSearch` functions. [\#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Danila Kutenin](https://github.com/danlark1)) -- Improved usage of scratch space and error handling in Hyperscan. [\#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Danila Kutenin](https://github.com/danlark1)) -- Fill `system.graphite_detentions` from a table config of `*GraphiteMergeTree` engine tables. [\#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Rename `trigramDistance` function to `ngramDistance` and add more functions with `CaseInsensitive` and `UTF`. [\#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Danila Kutenin](https://github.com/danlark1)) -- Improved data skipping indices calculation. [\#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([Nikita Vasilev](https://github.com/nikvas0)) -- Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) - -#### Bug Fix {#bug-fix-26} - -- Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixes capnproto reading from buffer. Sometimes files wasn’t loaded successfully by HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) -- Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) -- Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Deadlock may happen while executing `DROP DATABASE dictionary` query. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix undefined behavior in `median` and `quantile` functions. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -- Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -- Fixed ignorance of `UTC` setting (fixes issue [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -- Fix `histogram` function behaviour with `Distributed` tables. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -- Fixed tsan report `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part\_log is enabled. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Always backquote column names in metadata. Otherwise it’s impossible to create a table with column named `index` (server won’t restart due to malformed `ATTACH` query in metadata). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -- Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) -- Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix crash of `JOIN` on not-nullable vs nullable column. Fix `NULLs` in right keys in `ANY JOIN` + `join_use_nulls`. [\#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed data race when fetching data part that is already obsolete. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed rare data race that can happen during `RENAME` table of MergeTree family. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix `No message received` exception while fetching parts between replicas. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) -- Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix incorrect result in `FULL/RIGHT JOIN` with const column. [\#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix duplicates in `GLOBAL JOIN` with asterisk. [\#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) -- Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) -- Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix wrong name qualification in `GLOBAL JOIN`. [\#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix function `toISOWeek` result for year 1970. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) - -#### Backward Incompatible Change {#backward-incompatible-change-8} - -- Rename setting `insert_sample_with_metadata` to setting `input_format_defaults_for_omitted_fields`. [\#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Artem Zuikov](https://github.com/4ertus2)) -- Added setting `max_partitions_per_insert_block` (with value 100 by default). If inserted block contains larger number of partitions, an exception is thrown. Set it to 0 if you want to remove the limit (not recommended). [\#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Multi-search functions were renamed (`multiPosition` to `multiSearchAllPositions`, `multiSearch` to `multiSearchAny`, `firstMatch` to `multiSearchFirstIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) - -#### Performance Improvement {#performance-improvement-6} - -- Optimize Volnitsky searcher by inlining, giving about 5-10% search improvement for queries with many needles or many similar bigrams. [\#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Danila Kutenin](https://github.com/danlark1)) -- Fix performance issue when setting `use_uncompressed_cache` is greater than zero, which appeared when all read data contained in cache. [\#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([alesapin](https://github.com/alesapin)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-10} - -- Hardening debug build: more granular memory mappings and ASLR; add memory protection for mark cache and index. This allows to find more memory stomping bugs in case when ASan and MSan cannot do it. [\#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add support for cmake variables `ENABLE_PROTOBUF`, `ENABLE_PARQUET` and `ENABLE_BROTLI` which allows to enable/disable the above features (same as we can do for librdkafka, mysql, etc). [\#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Silviu Caragea](https://github.com/silviucpp)) -- Add ability to print process list and stacktraces of all threads if some queries are hung after test run. [\#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([alesapin](https://github.com/alesapin)) -- Add retries on `Connection loss` error in `clickhouse-test`. [\#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([alesapin](https://github.com/alesapin)) -- Add freebsd build with vagrant and build with thread sanitizer to packager script. [\#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [\#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([alesapin](https://github.com/alesapin)) -- Now user asked for password for user `'default'` during installation. [\#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) -- Suppress warning in `rdkafka` library. [\#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow ability to build without ssl. [\#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) -- Add a way to launch clickhouse-server image from a custom user. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Upgrade contrib boost to 1.69. [\#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) -- Disable usage of `mremap` when compiled with Thread Sanitizer. Surprisingly enough, TSan does not intercept `mremap` (though it does intercept `mmap`, `munmap`) that leads to false positives. Fixed TSan report in stateful tests. [\#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add test checking using format schema via HTTP interface. [\#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([Vitaly Baranov](https://github.com/vitlibar)) - -## ClickHouse release 19.4 {#clickhouse-release-19-4} - -### ClickHouse release 19.4.4.33, 2019-04-17 {#clickhouse-release-19-4-4-33-2019-04-17} - -#### Bug Fixes {#bug-fixes-7} - -- Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixes capnproto reading from buffer. Sometimes files wasn’t loaded successfully by HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) -- Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) -- Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Deadlock may happen while executing `DROP DATABASE dictionary` query. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix undefined behavior in `median` and `quantile` functions. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -- Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -- Fixed ignorance of `UTC` setting (fixes issue [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -- Fix `histogram` function behaviour with `Distributed` tables. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -- Fixed tsan report `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part\_log is enabled. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Always backquote column names in metadata. Otherwise it’s impossible to create a table with column named `index` (server won’t restart due to malformed `ATTACH` query in metadata). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -- Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) -- Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed data race when fetching data part that is already obsolete. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed rare data race that can happen during `RENAME` table of MergeTree family. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix `No message received` exception while fetching parts between replicas. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) -- Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) -- Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) -- Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix function `toISOWeek` result for year 1970. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) - -#### Improvements {#improvements-2} - -- Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) - -### ClickHouse release 19.4.3.11, 2019-04-02 {#clickhouse-release-19-4-3-11-2019-04-02} - -#### Bug Fixes {#bug-fixes-8} - -- Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-11} - -- Add a way to launch clickhouse-server image from a custom user. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse release 19.4.2.7, 2019-03-30 {#clickhouse-release-19-4-2-7-2019-03-30} - -#### Bug Fixes {#bug-fixes-9} - -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -### ClickHouse release 19.4.1.3, 2019-03-19 {#clickhouse-release-19-4-1-3-2019-03-19} - -#### Bug Fixes {#bug-fixes-10} - -- Fixed remote queries which contain both `LIMIT BY` and `LIMIT`. Previously, if `LIMIT BY` and `LIMIT` were used for remote query, `LIMIT` could happen before `LIMIT BY`, which led to too filtered result. [\#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([Constantin S. Pan](https://github.com/kvap)) - -### ClickHouse release 19.4.0.49, 2019-03-09 {#clickhouse-release-19-4-0-49-2019-03-09} - -#### New Features {#new-features-5} - -- Added full support for `Protobuf` format (input and output, nested data structures). [\#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [\#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added bitmap functions with Roaring Bitmaps. [\#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([Andy Yang](https://github.com/andyyzh)) [\#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([Vitaly Baranov](https://github.com/vitlibar)) -- Parquet format support. [\#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) -- N-gram distance was added for fuzzy string comparison. It is similar to q-gram metrics in R language. [\#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Danila Kutenin](https://github.com/danlark1)) -- Combine rules for graphite rollup from dedicated aggregation and retention patterns. [\#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Added `max_execution_speed` and `max_execution_speed_bytes` to limit resource usage. Added `min_execution_speed_bytes` setting to complement the `min_execution_speed`. [\#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([Winter Zhang](https://github.com/zhang2014)) -- Implemented function `flatten`. [\#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [\#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([alexey-milovidov](https://github.com/alexey-milovidov), [kzon](https://github.com/kzon)) -- Added functions `arrayEnumerateDenseRanked` and `arrayEnumerateUniqRanked` (it’s like `arrayEnumerateUniq` but allows to fine tune array depth to look inside multidimensional arrays). [\#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [\#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/… [\#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes {#bug-fixes-11} - -- This release also contains all bug fixes from 19.3 and 19.1. -- Fixed bug in data skipping indices: order of granules after INSERT was incorrect. [\#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([Nikita Vasilev](https://github.com/nikvas0)) -- Fixed `set` index for `Nullable` and `LowCardinality` columns. Before it, `set` index with `Nullable` or `LowCardinality` column led to error `Data type must be deserialized with multiple streams` while selecting. [\#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Correctly set update\_time on full `executable` dictionary update. [\#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Tema Novikov](https://github.com/temoon)) -- Fix broken progress bar in 19.3. [\#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([filimonov](https://github.com/filimonov)) -- Fixed inconsistent values of MemoryTracker when memory region was shrinked, in certain cases. [\#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed undefined behaviour in ThreadPool. [\#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a very rare crash with the message `mutex lock failed: Invalid argument` that could happen when a MergeTree table was dropped concurrently with a SELECT. [\#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Alex Zatelepin](https://github.com/ztlpn)) -- ODBC driver compatibility with `LowCardinality` data type. [\#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) -- FreeBSD: Fixup for `AIOcontextPool: Found io_event with unknown id 0` error. [\#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) -- `system.part_log` table was created regardless to configuration. [\#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix undefined behaviour in `dictIsIn` function for cache dictionaries. [\#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([alesapin](https://github.com/alesapin)) -- Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [\#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Alex Zatelepin](https://github.com/ztlpn)) -- Disable compile\_expressions by default until we get own `llvm` contrib and can test it with `clang` and `asan`. [\#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([alesapin](https://github.com/alesapin)) -- Prevent `std::terminate` when `invalidate_query` for `clickhouse` external dictionary source has returned wrong resultset (empty or more than one row or more than one column). Fixed issue when the `invalidate_query` was performed every five seconds regardless to the `lifetime`. [\#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid deadlock when the `invalidate_query` for a dictionary with `clickhouse` source was involving `system.dictionaries` table or `Dictionaries` database (rare case). [\#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixes for CROSS JOIN with empty WHERE. [\#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed segfault in function “replicate” when constant argument is passed. [\#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix lambda function with predicate optimizer. [\#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([Winter Zhang](https://github.com/zhang2014)) -- Multiple JOINs multiple fixes. [\#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvements {#improvements-3} - -- Support aliases in JOIN ON section for right table columns. [\#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Artem Zuikov](https://github.com/4ertus2)) -- Result of multiple JOINs need correct result names to be used in subselects. Replace flat aliases with source names in result. [\#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Artem Zuikov](https://github.com/4ertus2)) -- Improve push-down logic for joined statements. [\#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([Ivan](https://github.com/abyss7)) - -#### Performance Improvements {#performance-improvements-3} - -- Improved heuristics of “move to PREWHERE” optimization. [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Use proper lookup tables that uses HashTable’s API for 8-bit and 16-bit keys. [\#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([Amos Bird](https://github.com/amosbird)) -- Improved performance of string comparison. [\#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Cleanup distributed DDL queue in a separate thread so that it doesn’t slow down the main loop that processes distributed DDL tasks. [\#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Alex Zatelepin](https://github.com/ztlpn)) -- When `min_bytes_to_use_direct_io` is set to 1, not every file was opened with O\_DIRECT mode because the data size to read was sometimes underestimated by the size of one compressed block. [\#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-12} - -- Added support for clang-9 [\#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix wrong `__asm__` instructions (again) [\#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Konstantin Podshumok](https://github.com/podshumok)) -- Add ability to specify settings for `clickhouse-performance-test` from command line. [\#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([alesapin](https://github.com/alesapin)) -- Add dictionaries tests to integration tests. [\#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([alesapin](https://github.com/alesapin)) -- Added queries from the benchmark on the website to automated performance tests. [\#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `xxhash.h` does not exist in external lz4 because it is an implementation detail and its symbols are namespaced with `XXH_NAMESPACE` macro. When lz4 is external, xxHash has to be external too, and the dependents have to link to it. [\#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Orivej Desh](https://github.com/orivej)) -- Fixed a case when `quantileTiming` aggregate function can be called with negative or floating point argument (this fixes fuzz test with undefined behaviour sanitizer). [\#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Spelling error correction. [\#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([sdk2](https://github.com/sdk2)) -- Fix compilation on Mac. [\#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([Vitaly Baranov](https://github.com/vitlibar)) -- Build fixes for FreeBSD and various unusual build configurations. [\#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) - -## ClickHouse release 19.3 {#clickhouse-release-19-3} - -### ClickHouse release 19.3.9.1, 2019-04-02 {#clickhouse-release-19-3-9-1-2019-04-02} - -#### Bug Fixes {#bug-fixes-12} - -- Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-13} - -- Add a way to launch clickhouse-server image from a custom user [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse release 19.3.7, 2019-03-12 {#clickhouse-release-19-3-7-2019-03-12} - -#### Bug fixes {#bug-fixes-13} - -- Fixed error in \#3920. This error manifests itself as random cache corruption (messages `Unknown codec family code`, `Cannot seek through file`) and segfaults. This bug first appeared in version 19.1 and is present in versions up to 19.1.10 and 19.3.6. [\#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.3.6, 2019-03-02 {#clickhouse-release-19-3-6-2019-03-02} - -#### Bug fixes {#bug-fixes-14} - -- When there are more than 1000 threads in a thread pool, `std::terminate` may happen on thread exit. [Azat Khuzhin](https://github.com/azat) [\#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [\#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now it’s possible to create `ReplicatedMergeTree*` tables with comments on columns without defaults and tables with columns codecs without comments and defaults. Also fix comparison of codecs. [\#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([alesapin](https://github.com/alesapin)) -- Fixed crash on JOIN with array or tuple. [\#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed crash in clickhouse-copier with the message `ThreadStatus not created`. [\#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed hangup on server shutdown if distributed DDLs were used. [\#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Alex Zatelepin](https://github.com/ztlpn)) -- Incorrect column numbers were printed in error message about text format parsing for columns with number greater than 10. [\#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-3} - -- Fixed build with AVX enabled. [\#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Enable extended accounting and IO accounting based on good known version instead of kernel under which it is compiled. [\#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([nvartolomei](https://github.com/nvartolomei)) -- Allow to skip setting of core\_dump.size\_limit, warning instead of throw if limit set fail. [\#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) -- Removed the `inline` tags of `void readBinary(...)` in `Field.cpp`. Also merged redundant `namespace DB` blocks. [\#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) - -### ClickHouse release 19.3.5, 2019-02-21 {#clickhouse-release-19-3-5-2019-02-21} - -#### Bug fixes {#bug-fixes-15} - -- Fixed bug with large http insert queries processing. [\#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([alesapin](https://github.com/alesapin)) -- Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed backward incompatibility of table function `remote` introduced with column comments. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.3.4, 2019-02-16 {#clickhouse-release-19-3-4-2019-02-16} - -#### Improvements {#improvements-4} - -- Table index size is not accounted for memory limits when doing `ATTACH TABLE` query. Avoided the possibility that a table cannot be attached after being detached. [\#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Slightly raised up the limit on max string and array size received from ZooKeeper. It allows to continue to work with increased size of `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` on ZooKeeper. [\#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to repair abandoned replica even if it already has huge number of nodes in its queue. [\#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add one required argument to `SET` index (max stored rows number). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Bug Fixes {#bug-fixes-16} - -- Fixed `WITH ROLLUP` result for group by single `LowCardinality` key. [\#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed bug in the set index (dropping a granule if it contains more than `max_rows` rows). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) -- A lot of FreeBSD build fixes. [\#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) -- Fixed aliases substitution in queries with subquery containing same alias (issue [\#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [\#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-4} - -- Add ability to run `clickhouse-server` for stateless tests in docker image. [\#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([Vasily Nemkov](https://github.com/Enmk)) - -### ClickHouse release 19.3.3, 2019-02-13 {#clickhouse-release-19-3-3-2019-02-13} - -#### New Features {#new-features-6} - -- Added the `KILL MUTATION` statement that allows removing mutations that are for some reasons stuck. Added `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` fields to the `system.mutations` table for easier troubleshooting. [\#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Alex Zatelepin](https://github.com/ztlpn)) -- Added aggregate function `entropy` which computes Shannon entropy. [\#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) -- Added ability to send queries `INSERT INTO tbl VALUES (....` to server without splitting on `query` and `data` parts. [\#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([alesapin](https://github.com/alesapin)) -- Generic implementation of `arrayWithConstant` function was added. [\#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Implemented `NOT BETWEEN` comparison operator. [\#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Dmitry Naumov](https://github.com/nezed)) -- Implement `sumMapFiltered` in order to be able to limit the number of keys for which values will be summed by `sumMap`. [\#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Added support of `Nullable` types in `mysql` table function. [\#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) -- Support for arbitrary constant expressions in `LIMIT` clause. [\#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) -- Added `topKWeighted` aggregate function that takes additional argument with (unsigned integer) weight. [\#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([Andrew Golman](https://github.com/andrewgolman)) -- `StorageJoin` now supports `join_any_take_last_row` setting that allows overwriting existing values of the same key. [\#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird) -- Added function `toStartOfInterval`. [\#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added `RowBinaryWithNamesAndTypes` format. [\#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)) -- Added `IPv4` and `IPv6` data types. More effective implementations of `IPv*` functions. [\#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([Vasily Nemkov](https://github.com/Enmk)) -- Added function `toStartOfTenMinutes()`. [\#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added `Protobuf` output format. [\#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [\#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added brotli support for HTTP interface for data import (INSERTs). [\#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([Mikhail](https://github.com/fandyushin)) -- Added hints while user make typo in function name or type in command line client. [\#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Danila Kutenin](https://github.com/danlark1)) -- Added `Query-Id` to Server’s HTTP Response header. [\#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([Mikhail](https://github.com/fandyushin)) - -#### Experimental features {#experimental-features-2} - -- Added `minmax` and `set` data skipping indices for MergeTree table engines family. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) -- Added conversion of `CROSS JOIN` to `INNER JOIN` if possible. [\#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [\#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes {#bug-fixes-17} - -- Fixed `Not found column` for duplicate columns in `JOIN ON` section. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) -- Make `START REPLICATED SENDS` command start replicated sends. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -- Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed crash on dictionary reload if dictionary not available. This bug was appeared in 19.1.6. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -- Fixed `ALL JOIN` with duplicates in right table. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed segmentation fault with `use_uncompressed_cache=1` and exception with wrong uncompressed size. This bug was appeared in 19.1.6. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) -- Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) -- Fixed infinite loop when selecting from table function `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Temporarily disable predicate optimization for `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed `Illegal instruction` error when using base64 functions on old CPUs. This error has been reproduced only when ClickHouse was compiled with gcc-8. [\#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- ClickHouse dictionaries now load within `clickhouse` process. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error when system logs are tried to create again at server shutdown. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Correctly return the right type and properly handle locks in `joinGet` function. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) -- Added `sumMapWithOverflow` function. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Fixed segfault with `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed bug with incorrect `Date` and `DateTime` comparison. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -- Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed rare race condition when removing of old data parts can fail with `File not found` error. [\#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix install package with missing /etc/clickhouse-server/config.xml. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-5} - -- Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -- Various build fixes for FreeBSD. [\#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) -- Added ability to create, fill and drop tables in perftest. [\#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([alesapin](https://github.com/alesapin)) -- Added a script to check for duplicate includes. [\#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added ability to run queries by index in performance test. [\#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([alesapin](https://github.com/alesapin)) -- Package with debug symbols is suggested to be installed. [\#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Refactoring of performance-test. Better logging and signals handling. [\#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([alesapin](https://github.com/alesapin)) -- Added docs to anonymized Yandex.Metrika datasets. [\#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([alesapin](https://github.com/alesapin)) -- Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [\#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Alex Zatelepin](https://github.com/ztlpn)) -- Added docs about two datasets in s3. [\#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([alesapin](https://github.com/alesapin)) -- Added script which creates changelog from pull requests description. [\#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [\#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([KochetovNicolai](https://github.com/KochetovNicolai)) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Added puppet module for Clickhouse. [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) -- Added docs for a group of undocumented functions. [\#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Winter Zhang](https://github.com/zhang2014)) -- ARM build fixes. [\#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[\#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [\#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) -- Dictionary tests now able to run from `ctest`. [\#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) -- Now `/etc/ssl` is used as default directory with SSL certificates. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added checking SSE and AVX instruction at start. [\#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Igr](https://github.com/igron99)) -- Init script will wait server until start. [\#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) - -#### Backward Incompatible Changes {#backward-incompatible-changes-1} - -- Removed `allow_experimental_low_cardinality_type` setting. `LowCardinality` data types are production ready. [\#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Reduce mark cache size and uncompressed cache size accordingly to available memory amount. [\#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Lopatin Konstantin](https://github.com/k-lopatin) -- Added keyword `INDEX` in `CREATE TABLE` query. A column with name `index` must be quoted with backticks or double quotes: `` `index` ``. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) -- `sumMap` now promote result type instead of overflow. The old `sumMap` behavior can be obtained by using `sumMapWithOverflow` function. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) - -#### Performance Improvements {#performance-improvements-4} - -- `std::sort` replaced by `pdqsort` for queries without `LIMIT`. [\#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Evgenii Pravda](https://github.com/kvinty)) -- Now server reuse threads from global thread pool. This affects performance in some corner cases. [\#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvements {#improvements-5} - -- Implemented AIO support for FreeBSD. [\#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) -- `SELECT * FROM a JOIN b USING a, b` now return `a` and `b` columns only from the left table. [\#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Artem Zuikov](https://github.com/4ertus2)) -- Allow `-C` option of client to work as `-c` option. [\#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([syominsergey](https://github.com/syominsergey)) -- Now option `--password` used without value requires password from stdin. [\#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD\_Conqueror](https://github.com/bsd-conqueror)) -- Added highlighting of unescaped metacharacters in string literals that contain `LIKE` expressions or regexps. [\#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added cancelling of HTTP read only queries if client socket goes away. [\#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([nvartolomei](https://github.com/nvartolomei)) -- Now server reports progress to keep client connections alive. [\#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([Ivan](https://github.com/abyss7)) -- Slightly better message with reason for OPTIMIZE query with `optimize_throw_if_noop` setting enabled. [\#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added support of `--version` option for clickhouse server. [\#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Lopatin Konstantin](https://github.com/k-lopatin)) -- Added `--help/-h` option to `clickhouse-server`. [\#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([Yuriy Baranov](https://github.com/yurriy)) -- Added support for scalar subqueries with aggregate function state result. [\#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Improved server shutdown time and ALTERs waiting time. [\#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added info about the replicated\_can\_become\_leader setting to system.replicas and add logging if the replica won’t try to become leader. [\#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Alex Zatelepin](https://github.com/ztlpn)) - -## ClickHouse release 19.1 {#clickhouse-release-19-1} - -### ClickHouse release 19.1.14, 2019-03-14 {#clickhouse-release-19-1-14-2019-03-14} - -- Fixed error `Column ... queried more than once` that may happen if the setting `asterisk_left_columns_only` is set to 1 in case of using `GLOBAL JOIN` with `SELECT *` (rare case). The issue does not exist in 19.3 and newer. [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.1.13, 2019-03-12 {#clickhouse-release-19-1-13-2019-03-12} - -This release contains exactly the same set of patches as 19.3.7. - -### ClickHouse release 19.1.10, 2019-03-03 {#clickhouse-release-19-1-10-2019-03-03} - -This release contains exactly the same set of patches as 19.3.6. - -## ClickHouse release 19.1 {#clickhouse-release-19-1-1} - -### ClickHouse release 19.1.9, 2019-02-21 {#clickhouse-release-19-1-9-2019-02-21} - -#### Bug fixes {#bug-fixes-18} - -- Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed backward incompatibility of table function `remote` introduced with column comments. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.1.8, 2019-02-16 {#clickhouse-release-19-1-8-2019-02-16} - -#### Bug Fixes {#bug-fixes-19} - -- Fix install package with missing /etc/clickhouse-server/config.xml. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - -## ClickHouse release 19.1 {#clickhouse-release-19-1-2} - -### ClickHouse release 19.1.7, 2019-02-15 {#clickhouse-release-19-1-7-2019-02-15} - -#### Bug Fixes {#bug-fixes-20} - -- Correctly return the right type and properly handle locks in `joinGet` function. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) -- Fixed error when system logs are tried to create again at server shutdown. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) -- `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed an issue: local ClickHouse dictionaries are loaded via TCP, but should load within process. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Temporarily disable predicate optimization for `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed infinite loop when selecting from table function `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) -- Fixed segmentation fault with `uncompressed_cache=1` and exception with wrong uncompressed size. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) -- Fixed `ALL JOIN` with duplicates in right table. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -- Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Make `START REPLICATED SENDS` command start replicated sends. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -- Fixed `Not found column` for duplicate columns in JOIN ON section. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) -- Now `/etc/ssl` is used as default directory with SSL certificates. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed crash on dictionary reload if dictionary not available. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -- Fixed bug with incorrect `Date` and `DateTime` comparison. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -- Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.1.6, 2019-01-24 {#clickhouse-release-19-1-6-2019-01-24} - -#### New Features {#new-features-7} - -- Custom per column compression codecs for tables. [\#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [\#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag)) -- Added compression codec `Delta`. [\#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) -- Allow to `ALTER` compression codecs. [\#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) -- Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [\#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) -- Support for write in `HDFS` tables and `hdfs` table function. [\#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) -- Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [\#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) -- Pruning of unused shards if `SELECT` query filters by sharding key (setting `optimize_skip_unused_shards`). [\#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7)) -- Allow `Kafka` engine to ignore some number of parsing errors per block. [\#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) -- Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [\#607](https://github.com/catboost/catboost/pull/607). [\#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [\#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) -- Added hashing functions `xxHash64` and `xxHash32`. [\#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) -- Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [\#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) -- Added hashing functions `javaHash`, `hiveHash`. [\#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) -- Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [\#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) - -#### Experimental features {#experimental-features-3} - -- Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [\#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes {#bug-fixes-21} - -- Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [\#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) -- Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [\#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [\#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix a bug when `from_zk` config elements weren’t refreshed after a request to ZooKeeper timed out. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix bug with wrong prefix for IPv4 subnet masks. [\#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) -- Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [\#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [\#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) -- Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [\#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [\#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) -- Fixed String to UInt monotonic conversion in case of usage String in primary key. [\#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed error in calculation of integer conversion function monotonicity. [\#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [\#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix UB in StorageMerge. [\#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird)) -- Fixed segfault in functions `addDays`, `subtractDays`. [\#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [\#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a bug induced by ‘kill query sync’ which leads to a core dump. [\#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) -- Fix bug with long delay after empty replication queue. [\#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [\#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) -- Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [\#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [\#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [\#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed incorrect result while using distinct by single LowCardinality numeric column. [\#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [\#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [\#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fix user and password forwarding for replicated tables queries. [\#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) -- Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [\#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [\#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [\#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection)) -- Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [\#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [\#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvements {#improvements-6} - -- Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [\#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) -- Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [\#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `CapnProtoInputStream` now support jagged structures. [\#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) -- Usability improvement: added a check that server process is started from the data directory’s owner. Do not allow to start server from root if the data belongs to non-root user. [\#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) -- Better logic of checking required columns during analysis of queries with JOINs. [\#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) -- Decreased the number of connections in case of large number of Distributed tables in a single server. [\#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014)) -- Supported totals row for `WITH TOTALS` query for ODBC driver. [\#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) -- Allowed to use `Enum`s as integers inside if function. [\#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7)) -- Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Removed some redundant objects from compiled expressions cache to lower memory usage. [\#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) -- Add check that `SET send_logs_level = 'value'` query accept appropriate value. [\#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx)) -- Fixed data type check in type conversion functions. [\#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014)) - -#### Performance Improvements {#performance-improvements-5} - -- Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn’t support it. [\#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) -- Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn’t contain time. [\#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Performance improvement for integer numbers serialization. [\#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird)) -- Zero left padding PODArray so that -1 element is always valid and zeroed. It’s used for branchless calculation of offsets. [\#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird)) -- Reverted `jemalloc` version which lead to performance degradation. [\#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Backward Incompatible Changes {#backward-incompatible-changes-2} - -- Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [\#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) -- Removed function `shardByHash`. [\#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Forbid using scalar subqueries with result of type `AggregateFunction`. [\#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-6} - -- Added support for PowerPC (`ppc64le`) build. [\#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) -- Stateful functional tests are run on public available dataset. [\#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [\#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [\#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7)) -- Updated `mariadb-client` library. Fixed one of issues found by UBSan. [\#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Some fixes for UBSan builds. [\#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [\#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [\#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added per-commit runs of tests with UBSan build. -- Added per-commit runs of PVS-Studio static analyzer. -- Fixed bugs found by PVS-Studio. [\#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed glibc compatibility issues. [\#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Move Docker images to 18.10 and add compatibility file for glibc \>= 2.28 [\#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) -- Add env variable if user don’t want to chown directories in server Docker image. [\#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) -- Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [\#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added a few more warnings that are available only in clang 8. [\#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [\#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej)) -- Added sanitizer variables for test images. [\#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) -- `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [\#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Improved compilation time, fixed includes. [\#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) -- Added performance tests for hash functions. [\#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) -- Fixed cyclic library dependences. [\#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) -- Improved compilation with low available memory. [\#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) -- Added test script to reproduce performance degradation in `jemalloc`. [\#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed misspells in comments and string literals under `dbms`. [\#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) -- Fixed typos in comments. [\#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) - -## [Changelog for 2018](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2018.md) diff --git a/docs/ru/changelog/index.md b/docs/ru/changelog/index.md deleted file mode 120000 index 79b747aee1b..00000000000 --- a/docs/ru/changelog/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../CHANGELOG.md \ No newline at end of file diff --git a/docs/ru/commercial/cloud.md b/docs/ru/commercial/cloud.md index f096bdb92cf..9716f4f1cd2 100644 --- a/docs/ru/commercial/cloud.md +++ b/docs/ru/commercial/cloud.md @@ -1,20 +1,16 @@ ---- -en_copy: true ---- +# Поставщики облачных услуг ClickHouse {#clickhouse-cloud-service-providers} -# ClickHouse Cloud Service Providers {#clickhouse-cloud-service-providers} - -!!! info "Info" - If you have launched a public cloud with managed ClickHouse service, feel free to [open a pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/cloud.md) adding it to the following list. +!!! info "Инфо" + Если вы запустили публичный облачный сервис с управляемым ClickHouse, не стесняйтесь [открыть pull request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/cloud.md) c добавлением его в последующий список. ## Yandex Cloud {#yandex-cloud} -[Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) provides the following key features: +[Yandex Managed Service for ClickHouse](https://cloud.yandex.ru/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) предоставляет следующие ключевые возможности: -- Fully managed ZooKeeper service for [ClickHouse replication](../operations/table_engines/replication.md) -- Multiple storage type choices -- Replicas in different availability zones -- Encryption and isolation -- Automated maintenance +- Полностью управляемый сервис ZooKeeper для [репликации ClickHouse](../engines/table_engines/mergetree_family/replication.md) +- Выбор типа хранилища +- Реплики в разных зонах доступности +- Шифрование и изоляция +- Автоматизированное техническое обслуживание -{## [Original article](https://clickhouse.tech/docs/en/commercial/cloud/) ##} +{## [Оригинальная статья](https://clickhouse.tech/docs/ru/commercial/cloud/) ##} diff --git a/docs/ru/commercial/index.md b/docs/ru/commercial/index.md new file mode 100644 index 00000000000..f9065c7cd50 --- /dev/null +++ b/docs/ru/commercial/index.md @@ -0,0 +1,7 @@ +--- +toc_folder_title: Commercial +toc_priority: 70 +toc_title: Commercial +--- + + diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index dc92d425d37..f5f57179ece 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -1,200 +1,201 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# Overview of ClickHouse Architecture {#overview-of-clickhouse-architecture} +# Обзор архитектуры ClickHouse {#overview-of-clickhouse-architecture} -ClickHouse is a true column-oriented DBMS. Data is stored by columns and during the execution of arrays (vectors or chunks of columns). Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called “vectorized query execution,” and it helps lower the cost of actual data processing. +ClickHouse-это настоящая СУБД, ориентированная на столбцы. Данные хранятся столбцами и во время выполнения массивов (векторов или кусков столбцов). Когда это возможно, операции отправляются на массивы, а не на отдельные значения. Это называется «vectorized query execution,» и это помогает снизить стоимость фактической обработки данных. -> This idea is nothing new. It dates back to the `APL` programming language and its descendants: `A +`, `J`, `K`, and `Q`. Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `Vectorwise` system. +> В этой идее нет ничего нового. Она восходит к тому времени, когда `APL` язык программирования и его потомки: `A +`, `J`, `K`, и `Q`. Массивное программирование используется в научной обработке данных. Эта идея также не является чем-то новым в реляционных базах данных: например, она используется в `Vectorwise` система. -There are two different approaches for speeding up query processing: vectorized query execution and runtime code generation. The latter removes all indirection and dynamic dispatch. Neither of these approaches is strictly better than the other. Runtime code generation can be better when it fuses many operations, thus fully utilizing CPU execution units and the pipeline. Vectorized query execution can be less practical because it involves temporary vectors that must be written to the cache and read back. If the temporary data does not fit in the L2 cache, this becomes an issue. But vectorized query execution more easily utilizes the SIMD capabilities of the CPU. A [research paper](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) written by our friends shows that it is better to combine both approaches. ClickHouse uses vectorized query execution and has limited initial support for runtime code generation. +Существует два различных подхода для ускорения обработки запросов: векторизованное выполнение запросов и генерация кода во время выполнения. Последнее устраняет все косвенные действия и динамическую диспетчеризацию. Ни один из этих подходов не является строго лучшим, чем другой. Генерация кода во время выполнения может быть лучше, когда он объединяет множество операций, таким образом полностью используя исполнительные блоки процессора и конвейер. Векторизованное выполнение запроса может быть менее практичным, поскольку оно включает временные векторы, которые должны быть записаны в кэш и считаны обратно. Если временные данные не помещаются в кэш L2, это становится проблемой. Но векторизованное выполнение запросов более легко использует возможности SIMD центрального процессора. Один [научная статья](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) написанное нашими друзьями показывает, что лучше сочетать оба подхода. ClickHouse использует векторизованное выполнение запросов и имеет ограниченную начальную поддержку для генерации кода во время выполнения. -## Columns {#columns} +## Столбцы {#columns} -`IColumn` interface is used to represent columns in memory (actually, chunks of columns). This interface provides helper methods for the implementation of various relational operators. Almost all operations are immutable: they do not modify the original column, but create a new modified one. For example, the `IColumn :: filter` method accepts a filter byte mask. It is used for the `WHERE` and `HAVING` relational operators. Additional examples: the `IColumn :: permute` method to support `ORDER BY`, the `IColumn :: cut` method to support `LIMIT`. +`IColumn` интерфейс используется для представления столбцов в памяти (собственно, кусков столбцов). Этот интерфейс предоставляет вспомогательные методы для реализации различных реляционных операторов. Почти все операции неизменяемы: они не изменяют исходный столбец, а создают новый измененный. Например, в `IColumn :: filter` метод принимает маску байта фильтра. Он используется для `WHERE` и `HAVING` реляционный оператор. Дополнительные примеры: `IColumn :: permute` способ поддержки `ORDER BY`, этот `IColumn :: cut` способ поддержки `LIMIT`. -Various `IColumn` implementations (`ColumnUInt8`, `ColumnString`, and so on) are responsible for the memory layout of columns. The memory layout is usually a contiguous array. For the integer type of columns, it is just one contiguous array, like `std :: vector`. For `String` and `Array` columns, it is two vectors: one for all array elements, placed contiguously, and a second one for offsets to the beginning of each array. There is also `ColumnConst` that stores just one value in memory, but looks like a column. +Различный `IColumn` реализации (`ColumnUInt8`, `ColumnString`, и так далее) отвечают за расположение столбцов в памяти. Расположение памяти обычно представляет собой непрерывный массив. Для целочисленного типа столбцов это всего лишь один непрерывный массив, например `std :: vector`. Для `String` и `Array` столбцы, это два вектора: один для всех элементов массива, расположенных последовательно, и второй для смещений к началу каждого массива. Существует также `ColumnConst` это сохраняет только одно значение в памяти, но выглядит как столбец. -## Field {#field} +## Поле {#field} -Nevertheless, it is possible to work with individual values as well. To represent an individual value, the `Field` is used. `Field` is just a discriminated union of `UInt64`, `Int64`, `Float64`, `String` and `Array`. `IColumn` has the `operator[]` method to get the n-th value as a `Field` and the `insert` method to append a `Field` to the end of a column. These methods are not very efficient, because they require dealing with temporary `Field` objects representing an individual value. There are more efficient methods, such as `insertFrom`, `insertRangeFrom`, and so on. +Тем не менее, можно работать и с индивидуальными ценностями. Чтобы представить индивидуальную ценность, то `Field` предназначенный. `Field` это просто дискриминированный Союз `UInt64`, `Int64`, `Float64`, `String` и `Array`. `IColumn` имеет `operator[]` метод получения n-го значения в виде a `Field` и `insert` способ, чтобы добавить `Field` до самого конца колонны. Эти методы не очень эффективны, потому что они требуют решения временных проблем `Field` объекты, представляющие индивидуальную ценность. Существуют и более эффективные методы, такие как `insertFrom`, `insertRangeFrom` и так далее. -`Field` doesn’t have enough information about a specific data type for a table. For example, `UInt8`, `UInt16`, `UInt32`, and `UInt64` are all represented as `UInt64` in a `Field`. +`Field` у него нет достаточной информации о конкретном типе данных для таблицы. Например, `UInt8`, `UInt16`, `UInt32`, и `UInt64` все они представлены в виде `UInt64` в `Field`. -## Leaky Abstractions {#leaky-abstractions} +## Дырявые абстракции {#leaky-abstractions} -`IColumn` has methods for common relational transformations of data, but they don’t meet all needs. For example, `ColumnUInt64` doesn’t have a method to calculate the sum of two columns, and `ColumnString` doesn’t have a method to run a substring search. These countless routines are implemented outside of `IColumn`. +`IColumn` есть методы для общих реляционных преобразований данных, но они не удовлетворяют всем потребностям. Например, `ColumnUInt64` не имеет метода для вычисления суммы двух столбцов, и `ColumnString` у него нет метода для запуска поиска по подстрокам. Эти бесчисленные процедуры реализуются за пределами `IColumn`. -Various functions on columns can be implemented in a generic, non-efficient way using `IColumn` methods to extract `Field` values, or in a specialized way using knowledge of inner memory layout of data in a specific `IColumn` implementation. It is implemented by casting functions to a specific `IColumn` type and deal with internal representation directly. For example, `ColumnUInt64` has the `getData` method that returns a reference to an internal array, then a separate routine reads or fills that array directly. We have “leaky abstractions” to allow efficient specializations of various routines. +Различные функции на столбцах могут быть реализованы общим, неэффективным способом с использованием `IColumn` способы извлечения `Field` значения, или специализированным способом, использующим знание внутренней компоновки памяти данных в определенном месте. `IColumn` реализация. Он реализуется путем приведения функций к определенному виду `IColumn` тип и дело с внутренним представлением непосредственно. Например, `ColumnUInt64` имеет `getData` метод, который возвращает ссылку на внутренний массив, а затем отдельная процедура считывает или заполняет этот массив непосредственно. У нас есть «leaky abstractions» чтобы обеспечить эффективную специализацию различных процедур. -## Data Types {#data_types} +## Тип данных {#data_types} -`IDataType` is responsible for serialization and deserialization: for reading and writing chunks of columns or individual values in binary or text form. `IDataType` directly corresponds to data types in tables. For example, there are `DataTypeUInt32`, `DataTypeDateTime`, `DataTypeString` and so on. +`IDataType` отвечает за сериализацию и десериализацию: чтение и запись фрагментов столбцов или отдельных значений в двоичной или текстовой форме. `IDataType` непосредственно соответствует типам данных в таблицах. Например, существуют `DataTypeUInt32`, `DataTypeDateTime`, `DataTypeString` и так далее. -`IDataType` and `IColumn` are only loosely related to each other. Different data types can be represented in memory by the same `IColumn` implementations. For example, `DataTypeUInt32` and `DataTypeDateTime` are both represented by `ColumnUInt32` or `ColumnConstUInt32`. In addition, the same data type can be represented by different `IColumn` implementations. For example, `DataTypeUInt8` can be represented by `ColumnUInt8` or `ColumnConstUInt8`. +`IDataType` и `IColumn` они лишь слабо связаны друг с другом. Различные типы данных могут быть представлены в памяти одним и тем же именем `IColumn` реализации. Например, `DataTypeUInt32` и `DataTypeDateTime` оба они представлены следующим образом `ColumnUInt32` или `ColumnConstUInt32`. Кроме того, один и тот же тип данных может быть представлен разными `IColumn` реализации. Например, `DataTypeUInt8` может быть представлен следующим образом `ColumnUInt8` или `ColumnConstUInt8`. -`IDataType` only stores metadata. For instance, `DataTypeUInt8` doesn’t store anything at all (except vptr) and `DataTypeFixedString` stores just `N` (the size of fixed-size strings). +`IDataType` хранит только метаданные. Например, `DataTypeUInt8` не хранит вообще ничего (кроме vptr) и `DataTypeFixedString` магазины просто `N` (размер строк фиксированного размера). -`IDataType` has helper methods for various data formats. Examples are methods to serialize a value with possible quoting, to serialize a value for JSON, and to serialize a value as part of the XML format. There is no direct correspondence to data formats. For example, the different data formats `Pretty` and `TabSeparated` can use the same `serializeTextEscaped` helper method from the `IDataType` interface. +`IDataType` имеет вспомогательные методы для различных форматов данных. Примерами являются методы сериализации значения с возможным цитированием, сериализации значения для JSON и сериализации значения в формате XML. Прямого соответствия форматам данных не существует. Например, различные форматы данных `Pretty` и `TabSeparated` можно использовать то же самое `serializeTextEscaped` вспомогательный метод от `IDataType` интерфейс. -## Block {#block} +## Блок {#block} -A `Block` is a container that represents a subset (chunk) of a table in memory. It is just a set of triples: `(IColumn, IDataType, column name)`. During query execution, data is processed by `Block`s. If we have a `Block`, we have data (in the `IColumn` object), we have information about its type (in `IDataType`) that tells us how to deal with that column, and we have the column name. It could be either the original column name from the table or some artificial name assigned for getting temporary results of calculations. +A `Block` это контейнер, представляющий подмножество (фрагмент) таблицы в памяти. Это всего лишь набор троек: `(IColumn, IDataType, column name)`. Во время выполнения запроса данные обрабатываются с помощью `Block`s. Если у нас есть `Block`, у нас есть данные (в `IColumn` объект), у нас есть информация о его типе (в `IDataType`) это говорит нам, как обращаться с этим столбцом, и у нас есть имя столбца. Это может быть либо исходное имя столбца из таблицы, либо какое-то искусственное имя, назначенное для получения временных результатов вычислений. -When we calculate some function over columns in a block, we add another column with its result to the block, and we don’t touch columns for arguments of the function because operations are immutable. Later, unneeded columns can be removed from the block, but not modified. It is convenient for the elimination of common subexpressions. +Когда мы вычисляем некоторую функцию по столбцам в блоке, мы добавляем другой столбец с его результатом в блок, и мы не касаемся столбцов для аргументов функции, потому что операции неизменяемы. Позже ненужные столбцы могут быть удалены из блока, но не изменены. Это удобно для исключения общих подвыражений. -Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared\_ptrs and column names. +Блоки создаются для каждого обработанного фрагмента данных. Обратите внимание, что для одного и того же типа вычисления имена столбцов и типы остаются одинаковыми для разных блоков, и изменяются только данные столбцов. Лучше разделить данные блока из заголовка блока, потому что небольшие размеры блока имеют высокую нагрузку временных строк для копирования shared\_ptrs и имен столбцов. -## Block Streams {#block-streams} +## Блокировать Потоки {#block-streams} -Block streams are for processing data. We use streams of blocks to read data from somewhere, perform data transformations, or write data to somewhere. `IBlockInputStream` has the `read` method to fetch the next block while available. `IBlockOutputStream` has the `write` method to push the block somewhere. +Блочные потоки предназначены для обработки данных. Мы используем потоки блоков для чтения данных откуда-то, выполнения преобразований данных или записи данных куда-то. `IBlockInputStream` имеет `read` метод для извлечения следующего блока, пока он доступен. `IBlockOutputStream` имеет `write` метод, чтобы подтолкнуть блок куда-то. -Streams are responsible for: +Потоки отвечают за: -1. Reading or writing to a table. The table just returns a stream for reading or writing blocks. -2. Implementing data formats. For example, if you want to output data to a terminal in `Pretty` format, you create a block output stream where you push blocks, and it formats them. -3. Performing data transformations. Let’s say you have `IBlockInputStream` and want to create a filtered stream. You create `FilterBlockInputStream` and initialize it with your stream. Then when you pull a block from `FilterBlockInputStream`, it pulls a block from your stream, filters it, and returns the filtered block to you. Query execution pipelines are represented this way. +1. Чтение или письмо за столом. Таблица просто возвращает поток для чтения или записи блоков. +2. Реализация форматов данных. Например, если вы хотите вывести данные на терминал в `Pretty` форматирование, вы создаете поток вывода блока, где вы толкаете блоки, и он форматирует их. +3. Выполнение преобразований данных. Скажем так у вас есть `IBlockInputStream` и хотите создать отфильтрованный поток. Вы создаете `FilterBlockInputStream` и инициализируйте его с помощью своего потока. Затем, когда вы вытащите блок из `FilterBlockInputStream`, он извлекает блок из вашего потока, фильтрует его и возвращает отфильтрованный блок вам. Конвейеры выполнения запросов представлены таким образом. -There are more sophisticated transformations. For example, when you pull from `AggregatingBlockInputStream`, it reads all data from its source, aggregates it, and then returns a stream of aggregated data for you. Another example: `UnionBlockInputStream` accepts many input sources in the constructor and also a number of threads. It launches multiple threads and reads from multiple sources in parallel. +Есть и более сложные трансформации. Например, когда вы тянете из `AggregatingBlockInputStream`, он считывает все данные из своего источника, агрегирует их, а затем возвращает поток агрегированных данных для вас. Еще пример: `UnionBlockInputStream` принимает множество источников ввода в конструкторе, а также ряд потоков. Он запускает несколько потоков и читает из нескольких источников параллельно. -> Block streams use the “pull” approach to control flow: when you pull a block from the first stream, it consequently pulls the required blocks from nested streams, and the entire execution pipeline will work. Neither “pull” nor “push” is the best solution, because control flow is implicit, and that limits the implementation of various features like simultaneous execution of multiple queries (merging many pipelines together). This limitation could be overcome with coroutines or just running extra threads that wait for each other. We may have more possibilities if we make control flow explicit: if we locate the logic for passing data from one calculation unit to another outside of those calculation units. Read this [article](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) for more thoughts. +> Потоки блокируют использовать «pull» подход к управлению потоком: когда вы вытягиваете блок из первого потока, он, следовательно, вытягивает необходимые блоки из вложенных потоков, и весь конвейер выполнения будет работать. Ни «pull» ни «push» это лучшее решение, потому что поток управления является неявным, и это ограничивает реализацию различных функций, таких как одновременное выполнение нескольких запросов (объединение многих конвейеров вместе). Это ограничение может быть преодолено с помощью сопрограмм или просто запуском дополнительных потоков, которые ждут друг друга. У нас может быть больше возможностей, если мы сделаем поток управления явным: если мы найдем логику для передачи данных из одной расчетной единицы в другую вне этих расчетных единиц. Читать это [статья](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) для новых мыслей. -We should note that the query execution pipeline creates temporary data at each step. We try to keep block size small enough so that temporary data fits in the CPU cache. With that assumption, writing and reading temporary data is almost free in comparison with other calculations. We could consider an alternative, which is to fuse many operations in the pipeline together. It could make the pipeline as short as possible and remove much of the temporary data, which could be an advantage, but it also has drawbacks. For example, a split pipeline makes it easy to implement caching intermediate data, stealing intermediate data from similar queries running at the same time, and merging pipelines for similar queries. +Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся держать размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в трубопроводе вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у него также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, кражу промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов. -## Formats {#formats} +## Форматы {#formats} -Data formats are implemented with block streams. There are “presentational” formats only suitable for the output of data to the client, such as `Pretty` format, which provides only `IBlockOutputStream`. And there are input/output formats, such as `TabSeparated` or `JSONEachRow`. +Форматы данных реализуются с помощью блочных потоков. Есть «presentational» форматы, пригодные только для вывода данных клиенту, такие как `Pretty` формат, который предоставляет только `IBlockOutputStream`. И есть форматы ввода/вывода, такие как `TabSeparated` или `JSONEachRow`. -There are also row streams: `IRowInputStream` and `IRowOutputStream`. They allow you to pull/push data by individual rows, not by blocks. And they are only needed to simplify the implementation of row-oriented formats. The wrappers `BlockInputStreamFromRowInputStream` and `BlockOutputStreamFromRowOutputStream` allow you to convert row-oriented streams to regular block-oriented streams. +Существуют также потоки подряд : `IRowInputStream` и `IRowOutputStream`. Они позволяют вытягивать / выталкивать данные отдельными строками, а не блоками. И они нужны только для упрощения реализации ориентированных на строки форматов. Обертка `BlockInputStreamFromRowInputStream` и `BlockOutputStreamFromRowOutputStream` позволяет конвертировать потоки, ориентированные на строки, в обычные потоки, ориентированные на блоки. ## I/O {#io} -For byte-oriented input/output, there are `ReadBuffer` and `WriteBuffer` abstract classes. They are used instead of C++ `iostream`s. Don’t worry: every mature C++ project is using something other than `iostream`s for good reasons. +Для байт-ориентированных входов / выходов существуют `ReadBuffer` и `WriteBuffer` абстрактный класс. Они используются вместо C++ `iostream`s. Не волнуйтесь: каждый зрелый проект C++ использует что-то другое, чем `iostream`s по уважительным причинам. -`ReadBuffer` and `WriteBuffer` are just a contiguous buffer and a cursor pointing to the position in that buffer. Implementations may own or not own the memory for the buffer. There is a virtual method to fill the buffer with the following data (for `ReadBuffer`) or to flush the buffer somewhere (for `WriteBuffer`). The virtual methods are rarely called. +`ReadBuffer` и `WriteBuffer` это просто непрерывный буфер и курсор, указывающий на позицию в этом буфере. Реализации могут владеть или не владеть памятью для буфера. Существует виртуальный метод заполнения буфера следующими данными (для `ReadBuffer`) или смыть буфер куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываются. -Implementations of `ReadBuffer`/`WriteBuffer` are used for working with files and file descriptors and network sockets, for implementing compression (`CompressedWriteBuffer` is initialized with another WriteBuffer and performs compression before writing data to it), and for other purposes – the names `ConcatReadBuffer`, `LimitReadBuffer`, and `HashingWriteBuffer` speak for themselves. +Реализация следующих принципов: `ReadBuffer`/`WriteBuffer` используются для работы с файлами и файловыми дескрипторами, а также сетевыми сокетами, для реализации сжатия (`CompressedWriteBuffer` is initialized with another WriteBuffer and performs compression before writing data to it), and for other purposes – the names `ConcatReadBuffer`, `LimitReadBuffer`, и `HashingWriteBuffer` за себя говорить. -Read/WriteBuffers only deal with bytes. There are functions from `ReadHelpers` and `WriteHelpers` header files to help with formatting input/output. For example, there are helpers to write a number in decimal format. +Буферы чтения/записи имеют дело только с байтами. Есть функции от `ReadHelpers` и `WriteHelpers` заголовочные файлы, чтобы помочь с форматированием ввода / вывода. Например, есть помощники для записи числа в десятичном формате. -Let’s look at what happens when you want to write a result set in `JSON` format to stdout. You have a result set ready to be fetched from `IBlockInputStream`. You create `WriteBufferFromFileDescriptor(STDOUT_FILENO)` to write bytes to stdout. You create `JSONRowOutputStream`, initialized with that `WriteBuffer`, to write rows in `JSON` to stdout. You create `BlockOutputStreamFromRowOutputStream` on top of it, to represent it as `IBlockOutputStream`. Then you call `copyData` to transfer data from `IBlockInputStream` to `IBlockOutputStream`, and everything works. Internally, `JSONRowOutputStream` will write various JSON delimiters and call the `IDataType::serializeTextJSON` method with a reference to `IColumn` and the row number as arguments. Consequently, `IDataType::serializeTextJSON` will call a method from `WriteHelpers.h`: for example, `writeText` for numeric types and `writeJSONString` for `DataTypeString`. +Давайте посмотрим, что происходит, когда вы хотите написать результирующий набор в `JSON` форматирование в stdout. У вас есть результирующий набор, готовый к извлечению из него `IBlockInputStream`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputStream`, инициализируется с помощью этого `WriteBuffer`, чтобы записать строки в `JSON` в stdout. Вы создаете `BlockOutputStreamFromRowOutputStream` кроме того, чтобы представить его как `IBlockOutputStream`. А потом ты позвонишь `copyData` для передачи данных из `IBlockInputStream` к `IBlockOutputStream` и все это работает. Внутренне, `JSONRowOutputStream` буду писать в формате JSON различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`. -## Tables {#tables} +## Таблицы {#tables} -The `IStorage` interface represents tables. Different implementations of that interface are different table engines. Examples are `StorageMergeTree`, `StorageMemory`, and so on. Instances of these classes are just tables. +То `IStorage` интерфейс представляет собой таблицы. Различные реализации этого интерфейса являются различными движками таблиц. Примеры `StorageMergeTree`, `StorageMemory` и так далее. Экземпляры этих классов являются просто таблицами. -The key `IStorage` methods are `read` and `write`. There are also `alter`, `rename`, `drop`, and so on. The `read` method accepts the following arguments: the set of columns to read from a table, the `AST` query to consider, and the desired number of streams to return. It returns one or multiple `IBlockInputStream` objects and information about the stage of data processing that was completed inside a table engine during query execution. +Ключ `IStorage` методы `read` и `write`. Есть и другие варианты `alter`, `rename`, `drop` и так далее. То `read` метод принимает следующие аргументы: набор столбцов для чтения из таблицы, набор столбцов для чтения из таблицы. `AST` запрос для рассмотрения и желаемое количество потоков для возврата. Он возвращает один или несколько `IBlockInputStream` объекты и информация о стадии обработки данных, которая была завершена внутри табличного движка во время выполнения запроса. -In most cases, the read method is only responsible for reading the specified columns from a table, not for any further data processing. All further data processing is done by the query interpreter and is outside the responsibility of `IStorage`. +В большинстве случаев метод read отвечает только за чтение указанных столбцов из таблицы, а не за дальнейшую обработку данных. Вся дальнейшая обработка данных осуществляется интерпретатором запросов и не входит в сферу ответственности компании `IStorage`. -But there are notable exceptions: +Но есть и заметные исключения: -- The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table. -- Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data. +- Запрос AST передается на сервер `read` метод, и механизм таблиц может использовать его для получения использования индекса и считывания меньшего количества данных из таблицы. +- Иногда механизм таблиц может сам обрабатывать данные до определенного этапа. Например, `StorageDistributed` можно отправить запрос на удаленные серверы, попросить их обработать данные на этапе, когда данные с разных удаленных серверов могут быть объединены, и вернуть эти предварительно обработанные данные. Затем интерпретатор запросов завершает обработку данных. -The table’s `read` method can return multiple `IBlockInputStream` objects to allow parallel data processing. These multiple block input streams can read from a table in parallel. Then you can wrap these streams with various transformations (such as expression evaluation or filtering) that can be calculated independently and create a `UnionBlockInputStream` on top of them, to read from multiple streams in parallel. +Стол `read` метод может возвращать несколько значений `IBlockInputStream` объекты, позволяющие осуществлять параллельную обработку данных. Эти несколько блочных входных потоков могут считываться из таблицы параллельно. Затем вы можете обернуть эти потоки с помощью различных преобразований (таких как вычисление выражений или фильтрация), которые могут быть вычислены независимо, и создать `UnionBlockInputStream` поверх них, чтобы читать из нескольких потоков параллельно. -There are also `TableFunction`s. These are functions that return a temporary `IStorage` object to use in the `FROM` clause of a query. +Есть и другие варианты `TableFunction`s. Это функции, которые возвращают временное значение `IStorage` объект для использования в `FROM` предложение запроса. -To get a quick idea of how to implement your table engine, look at something simple, like `StorageMemory` or `StorageTinyLog`. +Чтобы получить быстрое представление о том, как реализовать свой движок таблиц, посмотрите на что-то простое, например `StorageMemory` или `StorageTinyLog`. -> As the result of the `read` method, `IStorage` returns `QueryProcessingStage` – information about what parts of the query were already calculated inside storage. +> В результате этого `read` метод, `IStorage` возвращается `QueryProcessingStage` – information about what parts of the query were already calculated inside storage. -## Parsers {#parsers} +## Синтаксический анализатор {#parsers} -A hand-written recursive descent parser parses a query. For example, `ParserSelectQuery` just recursively calls the underlying parsers for various parts of the query. Parsers create an `AST`. The `AST` is represented by nodes, which are instances of `IAST`. +Написанный от руки рекурсивный парсер спуска анализирует запрос. Например, `ParserSelectQuery` просто рекурсивно вызывает базовые Парсеры для различных частей запроса. Парсеры создают `AST`. То `AST` представлен узлами, которые являются экземплярами `IAST`. -> Parser generators are not used for historical reasons. +> Генераторы парсеров не используются по историческим причинам. -## Interpreters {#interpreters} +## Переводчики {#interpreters} -Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time. +Интерпретаторы отвечают за создание конвейера выполнения запроса из `AST`. Есть простые переводчики, такие как `InterpreterExistsQuery` и `InterpreterDropQuery` или более изощренные `InterpreterSelectQuery`. Конвейер выполнения запроса представляет собой комбинацию блочных входных и выходных потоков. Например, результат интерпретации `SELECT` запросов `IBlockInputStream` для чтения результирующего набора из; результат запроса INSERT - это `IBlockOutputStream` чтобы записать данные для вставки в, и результат интерпретации `INSERT SELECT` запросов `IBlockInputStream` это возвращает пустой результирующий набор при первом чтении, но копирует данные из него `SELECT` к `INSERT` в то же время. -`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations or query. +`InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` машины для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` это довольно грязно и должно быть переписано: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запрос. -## Functions {#functions} +## Функции {#functions} -There are ordinary functions and aggregate functions. For aggregate functions, see the next section. +Существуют обычные функции и агрегатные функции. Агрегатные функции см. В следующем разделе. -Ordinary functions don’t change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s of data to implement vectorized query execution. +Ordinary functions don't change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`'s данных для реализации векторизованного выполнения запросов. -There are some miscellaneous functions, like [blockSize](../query_language/functions/other_functions.md#function-blocksize), [rowNumberInBlock](../query_language/functions/other_functions.md#function-rownumberinblock), and [runningAccumulate](../query_language/functions/other_functions.md#function-runningaccumulate), that exploit block processing and violate the independence of rows. +Есть некоторые другие функции, такие как [размер блока](../sql_reference/functions/other_functions.md#function-blocksize), [роунумберинблок](../sql_reference/functions/other_functions.md#function-rownumberinblock), и [runningAccumulate](../sql_reference/functions/other_functions.md#function-runningaccumulate), которые эксплуатируют обработку блоков и нарушают независимость строк. -ClickHouse has strong typing, so there’s no implicit type conversion. If a function doesn’t support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function. +ClickHouse имеет сильную типизацию, поэтому нет никакого неявного преобразования типов. Если функция не поддерживает определенную комбинацию типов, она создает исключение. Но функции могут работать (перегружаться) для многих различных комбинаций типов. Например, в `plus` функция (для реализации `+` оператор) работает для любой комбинации числовых типов: `UInt8` + `Float32`, `UInt16` + `Int8` и так далее. Кроме того, некоторые вариадические функции могут принимать любое количество аргументов, например `concat` функция. -Implementing a function may be slightly inconvenient because a function explicitly dispatches supported data types and supported `IColumns`. For example, the `plus` function has code generated by instantiation of a C++ template for each combination of numeric types, and constant or non-constant left and right arguments. +Реализация функции может быть немного неудобной, поскольку функция явно отправляет поддерживаемые типы данных и поддерживается `IColumns`. Например, в `plus` функция имеет код, генерируемый экземпляром шаблона C++ для каждой комбинации числовых типов, а также постоянные или непостоянные левые и правые аргументы. -It is an excellent place to implement runtime code generation to avoid template code bloat. Also, it makes it possible to add fused functions like fused multiply-add or to make multiple comparisons in one loop iteration. +Это отличное место для реализации генерации кода во время выполнения, чтобы избежать раздувания кода шаблона. Кроме того, он позволяет добавлять слитые функции, такие как fused multiply-add или выполнять несколько сравнений в одной итерации цикла. -Due to vectorized query execution, functions are not short-circuited. For example, if you write `WHERE f(x) AND g(y)`, both sides are calculated, even for rows, when `f(x)` is zero (except when `f(x)` is a zero constant expression). But if the selectivity of the `f(x)` condition is high, and calculation of `f(x)` is much cheaper than `g(y)`, it’s better to implement multi-pass calculation. It would first calculate `f(x)`, then filter columns by the result, and then calculate `g(y)` only for smaller, filtered chunks of data. +Из-за векторизованного выполнения запроса функции не закорачиваются. Например, если вы пишете `WHERE f(x) AND g(y)`, обе стороны вычисляются, даже для строк, когда `f(x)` равно нулю (за исключением тех случаев, когда `f(x)` является нулевым постоянным выражением). Но если избирательность самого `f(x)` состояние является высоким, и расчет `f(x)` это гораздо дешевле, чем `g(y)`, лучше всего реализовать многоходовой расчет. Это будет первый расчет `f(x)`, затем отфильтруйте столбцы по результату, а затем вычислите `g(y)` только для небольших отфильтрованных фрагментов данных. -## Aggregate Functions {#aggregate-functions} +## Статистическая функция {#aggregate-functions} -Aggregate functions are stateful functions. They accumulate passed values into some state and allow you to get results from that state. They are managed with the `IAggregateFunction` interface. States can be rather simple (the state for `AggregateFunctionCount` is just a single `UInt64` value) or quite complex (the state of `AggregateFunctionUniqCombined` is a combination of a linear array, a hash table, and a `HyperLogLog` probabilistic data structure). +Агрегатные функции - это функции, определяющие состояние. Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Они управляются с помощью `IAggregateFunction` интерфейс. Состояния могут быть довольно простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) или довольно сложное (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и `HyperLogLog` вероятностная структура данных). -States are allocated in `Arena` (a memory pool) to deal with multiple states while executing a high-cardinality `GROUP BY` query. States can have a non-trivial constructor and destructor: for example, complicated aggregation states can allocate additional memory themselves. It requires some attention to creating and destroying states and properly passing their ownership and destruction order. +Государства распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении высокой мощности `GROUP BY` запрос. Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами выделять дополнительную память. Это требует некоторого внимания к созданию и уничтожению государств и правильной передаче их права собственности и порядка уничтожения. -Aggregation states can be serialized and deserialized to pass over the network during distributed query execution or to write them on the disk where there is not enough RAM. They can even be stored in a table with the `DataTypeAggregateFunction` to allow incremental aggregation of data. +Агрегатные состояния могут быть сериализованы и десериализованы для передачи по сети во время выполнения распределенного запроса или для записи их на диск, где недостаточно оперативной памяти. Они даже могут храниться в таблице с `DataTypeAggregateFunction` чтобы разрешить инкрементное агрегирование данных. -> The serialized data format for aggregate function states is not versioned right now. It is ok if aggregate states are only stored temporarily. But we have the `AggregatingMergeTree` table engine for incremental aggregation, and people are already using it in production. It is the reason why backward compatibility is required when changing the serialized format for any aggregate function in the future. +> Сериализованный формат данных для состояний агрегатных функций в настоящее время не является версионным. Это нормально, если агрегатные состояния хранятся только временно. Но у нас есть такая возможность `AggregatingMergeTree` механизм таблиц для инкрементного агрегирования, и люди уже используют его в производстве. Именно по этой причине обратная совместимость требуется при изменении сериализованного формата для любой агрегатной функции в будущем. -## Server {#server} +## Сервер {#server} -The server implements several different interfaces: +Сервер реализует несколько различных интерфейсов: -- An HTTP interface for any foreign clients. -- A TCP interface for the native ClickHouse client and for cross-server communication during distributed query execution. -- An interface for transferring data for replication. +- Интерфейс HTTP для любых иностранных клиентов. +- TCP-интерфейс для собственного клиента ClickHouse и для межсерверной связи во время выполнения распределенного запроса. +- Интерфейс для передачи данных для репликации. -Internally, it is just a primitive multithreaded server without coroutines or fibers. Since the server is not designed to process a high rate of simple queries but to process a relatively low rate of complex queries, each of them can process a vast amount of data for analytics. +Внутренне это просто примитивный многопоточный сервер без сопрограмм или волокон. Поскольку сервер предназначен не для обработки высокой скорости простых запросов, а для обработки относительно низкой скорости сложных запросов, каждый из них может обрабатывать огромное количество данных для аналитики. -The server initializes the `Context` class with the necessary environment for query execution: the list of available databases, users and access rights, settings, clusters, the process list, the query log, and so on. Interpreters use this environment. +Сервер инициализирует программу `Context` класс с необходимой средой для выполнения запроса: список доступных баз данных, пользователей и прав доступа, настройки, кластеры, список процессов, журнал запросов и так далее. Переводчики используют эту среду. -We maintain full backward and forward compatibility for the server TCP protocol: old clients can talk to new servers, and new clients can talk to old servers. But we don’t want to maintain it eternally, and we are removing support for old versions after about one year. +Мы поддерживаем полную обратную и прямую совместимость для протокола TCP сервера: старые клиенты могут разговаривать с новыми серверами, а новые клиенты-со старыми серверами. Но мы не хотим поддерживать его вечно, и мы удаляем поддержку старых версий примерно через год. -!!! note "Note" - For most external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data, and it uses custom framing for compressed data. We haven’t released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical. +!!! note "Примечание" + Для большинства внешних приложений мы рекомендуем использовать интерфейс HTTP, поскольку он прост и удобен в использовании. Протокол TCP более тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных, а также использует пользовательское обрамление для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что она требует связывания большей части кодовой базы ClickHouse, что нецелесообразно. -## Distributed Query Execution {#distributed-query-execution} +## Выполнение Распределенных Запросов {#distributed-query-execution} -Servers in a cluster setup are mostly independent. You can create a `Distributed` table on one or all servers in a cluster. The `Distributed` table does not store data itself – it only provides a “view” to all local tables on multiple nodes of a cluster. When you SELECT from a `Distributed` table, it rewrites that query, chooses remote nodes according to load balancing settings, and sends the query to them. The `Distributed` table requests remote servers to process a query just up to a stage where intermediate results from different servers can be merged. Then it receives the intermediate results and merges them. The distributed table tries to distribute as much work as possible to remote servers and does not send much intermediate data over the network. +Серверы в кластерной установке в основном независимы. Вы можете создать `Distributed` таблица на одном или всех серверах кластера. То `Distributed` table does not store data itself – it only provides a «view» ко всем локальным таблицам на нескольких узлах кластера. Когда вы выберите из `Distributed` таблица, он переписывает этот запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет запрос к ним. То `Distributed` таблица запрашивает удаленные серверы для обработки запроса только до стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и сливает их. Распределенная таблица пытается распределить как можно больше работы на удаленные серверы и не отправляет много промежуточных данных по сети. -Things become more complicated when you have subqueries in IN or JOIN clauses, and each of them uses a `Distributed` table. We have different strategies for the execution of these queries. +Все становится сложнее, когда у вас есть подзапросы в предложениях IN или JOIN, и каждый из них использует a `Distributed` стол. У нас есть разные стратегии выполнения этих запросов. -There is no global query plan for distributed query execution. Each node has its local query plan for its part of the job. We only have simple one-pass distributed query execution: we send queries for remote nodes and then merge the results. But this is not feasible for complicated queries with high cardinality GROUP BYs or with a large amount of temporary data for JOIN. In such cases, we need to “reshuffle” data between servers, which requires additional coordination. ClickHouse does not support that kind of query execution, and we need to work on it. +Глобального плана запросов для выполнения распределенных запросов не существует. Каждый узел имеет свой локальный план запроса для своей части задания. У нас есть только простое однопроходное распределенное выполнение запросов: мы отправляем запросы на удаленные узлы, а затем объединяем результаты. Но это неосуществимо для сложных запросов с высокой мощностью группы BYs или с большим количеством временных данных для соединения. В таких случаях нам необходимо: «reshuffle» данные между серверами, что требует дополнительной координации. ClickHouse не поддерживает такого рода выполнение запросов, и мы должны работать над этим. -## Merge Tree {#merge-tree} +## Дерево Слияния {#merge-tree} -`MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in “parts”. Each part stores data in the primary key order, so data is ordered lexicographically by the primary key tuple. All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the primary key defines the order), so when you iterate by many columns, you get values for the corresponding rows. +`MergeTree` это семейство механизмов хранения данных, поддерживающих индексацию по первичному ключу. Первичный ключ может быть произвольным кортежем столбцов или выражений. Данные в a `MergeTree` таблица хранится в «parts». Каждая часть хранит данные в порядке первичного ключа, поэтому данные лексикографически упорядочиваются кортежем первичного ключа. Все столбцы таблицы хранятся отдельно `column.bin` файлы в этих краях. Файлы состоят из сжатых блоков. Каждый блок обычно содержит от 64 КБ до 1 МБ несжатых данных, в зависимости от среднего размера значения. Блоки состоят из значений столбцов, расположенных последовательно друг за другом. Значения столбцов находятся в одном и том же порядке для каждого столбца (первичный ключ определяет порядок), поэтому при итерации по многим столбцам вы получаете значения для соответствующих строк. -The primary key itself is “sparse”. It doesn’t address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached. +Сам первичный ключ является «sparse». Он адресует не каждую отдельную строку, а только некоторые диапазоны данных. Разделение `primary.idx` файл имеет значение первичного ключа для каждой N-й строки, где N называется `index_granularity` (обычно N = 8192). Кроме того, для каждой колонки у нас есть `column.mrk` файлы с «marks,» которые являются смещениями для каждой N-й строки в файле данных. Каждая метка представляет собой пару: смещение в файле к началу сжатого блока и смещение в распакованном блоке к началу данных. Обычно сжатые блоки выравниваются по меткам, а смещение в распакованном блоке равно нулю. Данные для `primary.idx` всегда находится в памяти, а данные для `column.mrk` файлы кэшируются. -When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table. +Когда мы собираемся прочитать что-то из части в `MergeTree`, мы смотрим на `primary.idx` данные и найдите диапазоны, которые могут содержать запрошенные данные, а затем посмотрите на `column.mrk` данные и рассчитать смещения для того, чтобы начать чтение этих диапазонов. Из-за разреженности могут быть прочитаны избыточные данные. ClickHouse не подходит для высокой загрузки простых точечных запросов, так как весь диапазон с `index_granularity` строки должны быть прочитаны для каждого ключа, и весь сжатый блок должен быть распакован для каждого столбца. Мы сделали индекс разреженным, потому что мы должны быть в состоянии поддерживать триллионы строк на одном сервере без заметного потребления памяти для индекса. Кроме того, поскольку первичный ключ разрежен, он не является уникальным: он не может проверить существование ключа в таблице во время вставки. В таблице может быть много строк с одним и тем же ключом. -When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. +Когда вы `INSERT` куча данных в `MergeTree`, эта связка сортируется по порядку первичного ключа и образует новую часть. Существуют фоновые потоки, которые периодически выделяют некоторые детали и объединяют их в одну сортированную деталь, чтобы сохранить количество деталей относительно низким. Вот почему он так называется `MergeTree`. Конечно, слияние приводит к тому, что «write amplification». Все части неизменны: они только создаются и удаляются, но не изменяются. Когда SELECT выполняется, он содержит снимок таблицы (набор деталей). После слияния мы также сохраняем старые детали в течение некоторого времени, чтобы облегчить восстановление после сбоя, поэтому, если мы видим, что какая-то объединенная деталь, вероятно, сломана, мы можем заменить ее исходными частями. -`MergeTree` is not an LSM tree because it doesn’t contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications. +`MergeTree` это не дерево LSM, потому что оно не содержит «memtable» и «log»: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity's sake, and because we are already inserting data in batches in our applications. -> MergeTree tables can only have one (primary) index: there aren’t any secondary indices. It would be nice to allow multiple physical representations under one logical table, for example, to store data in more than one physical order or even to allow representations with pre-aggregated data along with original data. +> Таблицы MergeTree могут иметь только один (первичный) индекс: вторичных индексов не существует. Было бы неплохо разрешить несколько физических представлений в одной логической таблице, например, хранить данные в более чем одном физическом порядке или даже разрешить представления с предварительно агрегированными данными наряду с исходными данными. -There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form. +Есть движки MergeTree, которые выполняют дополнительную работу во время фоновых слияний. Примеры `CollapsingMergeTree` и `AggregatingMergeTree`. Это можно рассматривать как специальную поддержку обновлений. Имейте в виду, что это не настоящие обновления, поскольку пользователи обычно не имеют никакого контроля над временем выполнения фоновых слияний, а данные в `MergeTree` таблица почти всегда хранится в нескольких частях, а не в полностью объединенном виде. -## Replication {#replication} +## Копирование {#replication} -Replication in ClickHouse can be configured on a per-table basis. You could have some replicated and some non-replicated tables on the same server. You could also have tables replicated in different ways, such as one table with two-factor replication and another with three-factor. +Репликация в ClickHouse может быть настроена на основе каждой таблицы. Вы можете иметь некоторые реплицированные и некоторые нереплицированные таблицы на одном сервере. Вы также можете иметь таблицы, реплицируемые различными способами,например, одна таблица с двухфакторной репликацией, а другая-с трехфакторной. -Replication is implemented in the `ReplicatedMergeTree` storage engine. The path in `ZooKeeper` is specified as a parameter for the storage engine. All tables with the same path in `ZooKeeper` become replicas of each other: they synchronize their data and maintain consistency. Replicas can be added and removed dynamically simply by creating or dropping a table. +Репликация осуществляется в виде `ReplicatedMergeTree` подсистема хранилища. Путь в `ZooKeeper` указывается в качестве параметра для механизма хранения данных. Все таблицы с одинаковым путем внутри `ZooKeeper` становятся репликами друг друга: они синхронизируют свои данные и поддерживают согласованность. Реплики можно добавлять и удалять динамически, просто создавая или удаляя таблицу. -Replication uses an asynchronous multi-master scheme. You can insert data into any replica that has a session with `ZooKeeper`, and data is replicated to all other replicas asynchronously. Because ClickHouse doesn’t support UPDATEs, replication is conflict-free. As there is no quorum acknowledgment of inserts, just-inserted data might be lost if one node fails. +Репликация использует асинхронную многомастерную схему. Вы можете вставить данные в любую реплику, которая имеет сеанс с `ZooKeeper`, и данные реплицируются во все остальные реплики асинхронно. Поскольку ClickHouse не поддерживает обновления, репликация является бесконфликтной. Поскольку нет подтверждения кворума вставок, только что вставленные данные могут быть потеряны, если один узел выйдет из строя. -Metadata for replication is stored in ZooKeeper. There is a replication log that lists what actions to do. Actions are: get part; merge parts; drop a partition, and so on. Each replica copies the replication log to its queue and then executes the actions from the queue. For example, on insertion, the “get the part” action is created in the log, and every replica downloads that part. Merges are coordinated between replicas to get byte-identical results. All parts are merged in the same way on all replicas. It is achieved by electing one replica as the leader, and that replica initiates merges and writes “merge parts” actions to the log. +Метаданные для репликации хранятся в ZooKeeper. Существует журнал репликации, в котором перечислены необходимые действия. Действия таковы: получить часть; объединить части; удалить раздел и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из этой очереди. Например, при вставке «get the part» действие создается в журнале, и каждая реплика загружает эту часть. Слияния координируются между репликами для получения идентичных байтам результатов. Все части объединяются одинаково на всех репликах. Это достигается путем выбора одной реплики в качестве лидера, и эта реплика инициирует слияние и запись «merge parts» действия по ведению журнала. -Replication is physical: only compressed parts are transferred between nodes, not queries. Merges are processed on each replica independently in most cases to lower the network costs by avoiding network amplification. Large merged parts are sent over the network only in cases of significant replication lag. +Репликация является физической: между узлами передаются только сжатые части, а не запросы. Слияния обрабатываются на каждой реплике независимо в большинстве случаев, чтобы снизить затраты на сеть, избегая усиления сети. Большие объединенные части передаются по сети только в случаях значительного запаздывания репликации. -Besides, each replica stores its state in ZooKeeper as the set of parts and its checksums. When the state on the local filesystem diverges from the reference state in ZooKeeper, the replica restores its consistency by downloading missing and broken parts from other replicas. When there is some unexpected or broken data in the local filesystem, ClickHouse does not remove it, but moves it to a separate directory and forgets it. +Кроме того, каждая реплика хранит свое состояние в ZooKeeper как набор деталей и их контрольные суммы. Когда состояние локальной файловой системы отличается от эталонного состояния в ZooKeeper, реплика восстанавливает свою согласованность, загружая недостающие и сломанные части из других реплик. Когда в локальной файловой системе появляются неожиданные или неработающие данные, ClickHouse не удаляет их, а перемещает в отдельный каталог и забывает. -!!! note "Note" - The ClickHouse cluster consists of independent shards, and each shard consists of replicas. The cluster is **not elastic**, so after adding a new shard, data is not rebalanced between shards automatically. Instead, the cluster load is supposed to be adjusted to be uneven. This implementation gives you more control, and it is ok for relatively small clusters, such as tens of nodes. But for clusters with hundreds of nodes that we are using in production, this approach becomes a significant drawback. We should implement a table engine that spans across the cluster with dynamically replicated regions that could be split and balanced between clusters automatically. +!!! note "Примечание" + Кластер ClickHouse состоит из независимых сегментов, и каждый сегмент состоит из реплик. Кластер таков **неупругий**, поэтому после добавления нового осколка данные не будут автоматически перебалансированы между осколками. Вместо этого предполагается, что нагрузка на кластер будет регулироваться неравномерно. Эта реализация дает вам больше контроля, и это нормально для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в производстве, этот подход становится существенным недостатком. Мы должны реализовать механизм таблиц, который охватывает весь кластер с динамически реплицируемыми областями, которые могут быть разделены и сбалансированы между кластерами автоматически. -{## [Original article](https://clickhouse.tech/docs/en/development/architecture/) ##} +{## [Оригинальная статья](https://clickhouse.tech/docs/en/development/architecture/) ##} diff --git a/docs/ru/development/build.md b/docs/ru/development/build.md index 32042a4128e..3e0c3763be6 100644 --- a/docs/ru/development/build.md +++ b/docs/ru/development/build.md @@ -1,26 +1,27 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# How to Build ClickHouse for Development {#how-to-build-clickhouse-for-development} +# Как построить ClickHouse для развития {#how-to-build-clickhouse-for-development} -The following tutorial is based on the Ubuntu Linux system. -With appropriate changes, it should also work on any other Linux distribution. -Supported platforms: x86\_64 and AArch64. Support for Power9 is experimental. +Следующий учебник основан на системе Ubuntu Linux. +С соответствующими изменениями он также должен работать на любом другом дистрибутиве Linux. +Поддерживаемые платформы: x86\_64 и AArch64. Поддержка Power9 является экспериментальной. -## Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja} +## Установите Git, CMake, Python и Ninja {#install-git-cmake-python-and-ninja} ``` bash $ sudo apt-get install git cmake python ninja-build ``` -Or cmake3 instead of cmake on older systems. +Или cmake3 вместо cmake на старых системах. -## Install GCC 9 {#install-gcc-9} +## Установка GCC 9 {#install-gcc-9} -There are several ways to do this. +Есть несколько способов сделать это. -### Install from a PPA Package {#install-from-a-ppa-package} +### Установка из PPA пакет {#install-from-a-ppa-package} ``` bash $ sudo apt-get install software-properties-common @@ -29,30 +30,30 @@ $ sudo apt-get update $ sudo apt-get install gcc-9 g++-9 ``` -### Install from Sources {#install-from-sources} +### Установка из источников {#install-from-sources} -Look at [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh) +Смотреть на [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh) -## Use GCC 9 for Builds {#use-gcc-9-for-builds} +## Использовать GCC для сборки 9 {#use-gcc-9-for-builds} ``` bash $ export CC=gcc-9 $ export CXX=g++-9 ``` -## Checkout ClickHouse Sources {#checkout-clickhouse-sources} +## Проверка Источников ClickHouse {#checkout-clickhouse-sources} ``` bash $ git clone --recursive git@github.com:ClickHouse/ClickHouse.git ``` -or +или ``` bash $ git clone --recursive https://github.com/ClickHouse/ClickHouse.git ``` -## Build ClickHouse {#build-clickhouse} +## Построить ClickHouse {#build-clickhouse} ``` bash $ cd ClickHouse @@ -63,23 +64,23 @@ $ ninja $ cd .. ``` -To create an executable, run `ninja clickhouse`. -This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments. +Чтобы создать исполняемый файл, выполните команду `ninja clickhouse`. +Это позволит создать `programs/clickhouse` исполняемый файл, который может быть использован с `client` или `server` аргументы. -# How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux} +# Как построить ClickHouse на любом Linux {#how-to-build-clickhouse-on-any-linux} -The build requires the following components: +Для сборки требуются следующие компоненты: -- Git (is used only to checkout the sources, it’s not needed for the build) -- CMake 3.10 or newer -- Ninja (recommended) or Make -- C++ compiler: gcc 9 or clang 8 or newer -- Linker: lld or gold (the classic GNU ld won’t work) -- Python (is only used inside LLVM build and it is optional) +- Git (используется только для проверки исходных текстов, он не нужен для сборки) +- CMake 3.10 или новее +- Ниндзя (рекомендуется) или сделать +- Компилятор C++: gcc 9 или clang 8 или новее +- Компоновщик: lld или gold (классический GNU ld не будет работать) +- Python (используется только внутри сборки LLVM и является необязательным) -If all the components are installed, you may build in the same way as the steps above. +Если все компоненты установлены, Вы можете построить их так же, как и описанные выше шаги. -Example for Ubuntu Eoan: +Пример для Ubuntu Eoan: sudo apt update sudo apt install git cmake ninja-build g++ python @@ -88,7 +89,7 @@ Example for Ubuntu Eoan: cmake ../ClickHouse ninja -Example for OpenSUSE Tumbleweed: +Пример для OpenSUSE перекати-поле: sudo zypper install git cmake ninja gcc-c++ python lld git clone --recursive https://github.com/ClickHouse/ClickHouse.git @@ -96,7 +97,7 @@ Example for OpenSUSE Tumbleweed: cmake ../ClickHouse ninja -Example for Fedora Rawhide: +Пример для сыромятной кожи Fedora: sudo yum update yum --nogpg install git cmake make gcc-c++ python2 @@ -105,34 +106,34 @@ Example for Fedora Rawhide: cmake ../ClickHouse make -j $(nproc) -# You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse} +# Вам не нужно строить ClickHouse {#you-dont-have-to-build-clickhouse} -ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour. +ClickHouse доступен в готовых двоичных файлах и пакетах. Двоичные файлы являются портативными и могут быть запущены на любом вкусе Linux. -They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request. +Они созданы для стабильных, предустановленных и тестовых релизов до тех пор, пока для каждого коммита к мастеру и для каждого запроса на вытягивание. -To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”. +Чтобы найти самую свежую сборку из `master`, обратиться [совершает страницы](https://github.com/ClickHouse/ClickHouse/commits/master), нажмите на первую зеленую галочку или красный крестик рядом с фиксацией и нажмите на кнопку «Details» ссылка сразу после этого «ClickHouse Build Check». -# How to Build ClickHouse Debian Package {#how-to-build-clickhouse-debian-package} +# Как создать пакет ClickHouse Debian {#how-to-build-clickhouse-debian-package} -## Install Git and Pbuilder {#install-git-and-pbuilder} +## Установите Git и Pbuilder {#install-git-and-pbuilder} ``` bash $ sudo apt-get update $ sudo apt-get install git python pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring ``` -## Checkout ClickHouse Sources {#checkout-clickhouse-sources-1} +## Проверка Источников ClickHouse {#checkout-clickhouse-sources-1} ``` bash $ git clone --recursive --branch master https://github.com/ClickHouse/ClickHouse.git $ cd ClickHouse ``` -## Run Release Script {#run-release-script} +## Запустить Сценарий Выпуска {#run-release-script} ``` bash $ ./release ``` -[Original article](https://clickhouse.tech/docs/en/development/build/) +[Оригинальная статья](https://clickhouse.tech/docs/en/development/build/) diff --git a/docs/ru/development/build_cross_arm.md b/docs/ru/development/build_cross_arm.md index 0936a3133b2..27e2d73c759 100644 --- a/docs/ru/development/build_cross_arm.md +++ b/docs/ru/development/build_cross_arm.md @@ -1,17 +1,18 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# How to Build ClickHouse on Linux for AARCH64 (ARM64) architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} +# Как построить ClickHouse на Linux для архитектуры AArch64 (ARM64) {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. +Это для случая, когда у вас есть Linux-машина и вы хотите использовать ее для сборки `clickhouse` двоичный файл, который будет работать на другой машине Linux с архитектурой процессора AARCH64. Это предназначено для непрерывной проверки интеграции, которая выполняется на серверах Linux. -The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first. +Кросс-сборка для AARCH64 основана на следующих принципах: [Инструкции по сборке](build.md)- сначала следуйте за ними. -# Install Clang-8 {#install-clang-8} +# Установка Clang-8 {#install-clang-8} -Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. -For example, in Ubuntu Bionic you can use the following commands: +Следуйте инструкциям от https://apt.llvm.org/ для вашей установки Ubuntu или Debian. +Например, в Ubuntu Bionic вы можете использовать следующие команды: ``` bash echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" | sudo tee /etc/apt/sources.list.d/llvm.list @@ -19,7 +20,7 @@ sudo apt-get update sudo apt-get install clang-8 ``` -# Install Cross-Compilation Toolset {#install-cross-compilation-toolset} +# Установка Набора Инструментов Перекрестной Компиляции {#install-cross-compilation-toolset} ``` bash cd ClickHouse @@ -28,7 +29,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1 ``` -# Build ClickHouse {#build-clickhouse} +# Построить ClickHouse {#build-clickhouse} ``` bash cd ClickHouse @@ -37,4 +38,4 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linu ninja -C build-arm64 ``` -The resulting binary will run only on Linux with the AARCH64 CPU architecture. +Полученный двоичный файл будет работать только в Linux с архитектурой процессора AARCH64. diff --git a/docs/ru/development/build_cross_osx.md b/docs/ru/development/build_cross_osx.md index a708dc4d4f3..04d505f1a83 100644 --- a/docs/ru/development/build_cross_osx.md +++ b/docs/ru/development/build_cross_osx.md @@ -1,26 +1,27 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x} +# Как построить ClickHouse на Linux для Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x} -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build_osx.md). +Это для случая, когда у вас есть Linux-машина и вы хотите использовать ее для сборки `clickhouse` двоичный файл, который будет работать на OS X. Это предназначено для непрерывной проверки интеграции, которая выполняется на серверах Linux. Если вы хотите построить ClickHouse непосредственно на Mac OS X, то продолжайте [еще одна инструкция](build_osx.md). -The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first. +Кросс-сборка для Mac OS X основана на следующих принципах: [Инструкции по сборке](build.md)- сначала следуйте за ними. -# Install Clang-8 {#install-clang-8} +# Установка Clang-8 {#install-clang-8} -Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. -For example the commands for Bionic are like: +Следуйте инструкциям от https://apt.llvm.org/ для вашей установки Ubuntu или Debian. +Например команды для Bionic выглядят так: ``` bash sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list sudo apt-get install clang-8 ``` -# Install Cross-Compilation Toolset {#install-cross-compilation-toolset} +# Установка Набора Инструментов Перекрестной Компиляции {#install-cross-compilation-toolset} -Let’s remember the path where we install `cctools` as ${CCTOOLS} +Давайте вспомним путь, по которому мы устанавливаем `cctools` как ${CCTOOLS} ``` bash mkdir ${CCTOOLS} @@ -37,7 +38,7 @@ cd cctools-port/cctools make install ``` -Also, we need to download macOS X SDK into the working tree. +Кроме того, нам нужно загрузить MacOS X SDK в рабочее дерево. ``` bash cd ClickHouse @@ -46,7 +47,7 @@ mkdir -p build-darwin/cmake/toolchain/darwin-x86_64 tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1 ``` -# Build ClickHouse {#build-clickhouse} +# Построить ClickHouse {#build-clickhouse} ``` bash cd ClickHouse @@ -58,4 +59,4 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin ninja -C build-osx ``` -The resulting binary will have a Mach-O executable format and can’t be run on Linux. +Полученный двоичный файл будет иметь исполняемый формат Mach-O и не может быть запущен в Linux. diff --git a/docs/ru/development/build_osx.md b/docs/ru/development/build_osx.md index 6b1839aaf7f..b218304d9d1 100644 --- a/docs/ru/development/build_osx.md +++ b/docs/ru/development/build_osx.md @@ -1,30 +1,31 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x} +# Как построить ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x} -Build should work on Mac OS X 10.15 (Catalina) +Сборка должна работать на Mac OS X 10.15 (Catalina) -## Install Homebrew {#install-homebrew} +## Установите Homebrew {#install-homebrew} ``` bash $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" ``` -## Install Required Compilers, Tools, and Libraries {#install-required-compilers-tools-and-libraries} +## Установите необходимые компиляторы, инструменты и библиотеки {#install-required-compilers-tools-and-libraries} ``` bash $ brew install cmake ninja libtool gettext ``` -## Checkout ClickHouse Sources {#checkout-clickhouse-sources} +## Проверка Источников ClickHouse {#checkout-clickhouse-sources} ``` bash $ git clone --recursive git@github.com:ClickHouse/ClickHouse.git ``` -or +или ``` bash $ git clone --recursive https://github.com/ClickHouse/ClickHouse.git @@ -32,7 +33,7 @@ $ git clone --recursive https://github.com/ClickHouse/ClickHouse.git $ cd ClickHouse ``` -## Build ClickHouse {#build-clickhouse} +## Построить ClickHouse {#build-clickhouse} ``` bash $ mkdir build @@ -42,16 +43,16 @@ $ ninja $ cd .. ``` -## Caveats {#caveats} +## Предостережения {#caveats} -If you intend to run clickhouse-server, make sure to increase the system’s maxfiles variable. +Если вы собираетесь запустить clickhouse-сервер, убедитесь в том, чтобы увеличить параметром maxfiles системная переменная. -!!! info "Note" - You’ll need to use sudo. +!!! info "Примечание" + Вам нужно будет использовать sudo. -To do so, create the following file: +Для этого создайте следующий файл: -/Library/LaunchDaemons/limit.maxfiles.plist: +/Библиотека / LaunchDaemons / limit.параметром maxfiles.файл plist: ``` xml @@ -77,14 +78,14 @@ To do so, create the following file: ``` -Execute the following command: +Выполните следующую команду: ``` bash $ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist ``` -Reboot. +Перезагрузить. -To check if it’s working, you can use `ulimit -n` command. +Чтобы проверить, работает ли он, вы можете использовать `ulimit -n` команда. -[Original article](https://clickhouse.tech/docs/en/development/build_osx/) +[Оригинальная статья](https://clickhouse.tech/docs/en/development/build_osx/) diff --git a/docs/ru/development/index.md b/docs/ru/development/index.md index 727e89ca891..8bf31ed0d3f 100644 --- a/docs/ru/development/index.md +++ b/docs/ru/development/index.md @@ -1,7 +1,8 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# ClickHouse Development {#clickhouse-development} +# Разработка ClickHouse {#clickhouse-development} -[Original article](https://clickhouse.tech/docs/en/development/) +[Оригинальная статья](https://clickhouse.tech/docs/en/development/) diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md index c703d6cd5b3..630ceecf2b2 100644 --- a/docs/ru/development/tests.md +++ b/docs/ru/development/tests.md @@ -1,87 +1,88 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# ClickHouse Testing {#clickhouse-testing} +# Тестирование ClickHouse {#clickhouse-testing} -## Functional Tests {#functional-tests} +## Функциональные пробы {#functional-tests} -Functional tests are the most simple and convenient to use. Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way. +Функциональные тесты являются наиболее простыми и удобными в использовании. Большинство функций ClickHouse можно протестировать с помощью функциональных тестов, и они обязательны для использования при каждом изменении кода ClickHouse, которое может быть протестировано таким образом. -Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. +Каждый функциональный тест отправляет один или несколько запросов на запущенный сервер ClickHouse и сравнивает результат со ссылкой. -Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. +Тесты расположены в `queries` каталог. Существует два подкаталога: `stateless` и `stateful`. Тесты без состояния выполняют запросы без каких - либо предварительно загруженных тестовых данных-они часто создают небольшие синтетические наборы данных на лету, в самом тесте. Статусные тесты требуют предварительно загруженных тестовых данных от Яндекса.Метрика и не доступна широкой публике. Мы склонны использовать только `stateless` тесты и избегайте добавления новых `stateful` тесты. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. +Каждый тест может быть одного из двух типов: `.sql` и `.sh`. `.sql` тест - это простой SQL-скрипт, который передается по конвейеру в `clickhouse-client --multiquery --testmode`. `.sh` тест - это скрипт, который запускается сам по себе. -To run all tests, use `testskhouse-test` tool. Look `--help` for the list of possible options. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. +Чтобы выполнить все тесты, используйте `testskhouse-test` инструмент. Смотри `--help` для списка возможных вариантов. Вы можете просто запустить все тесты или запустить подмножество тестов, отфильтрованных по подстроке в имени теста: `./clickhouse-test substring`. -The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. +Самый простой способ вызвать функциональные тесты-это скопировать `clickhouse-client` к `/usr/bin/`, бежать `clickhouse-server` а потом бежать `./clickhouse-test` из собственного каталога. -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +Чтобы добавить новый тест, создайте `.sql` или `.sh` файл в `queries/0_stateless` каталог, проверьте его вручную, а затем сгенерируйте `.reference` файл создается следующим образом: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` или `./00000_test.sh > ./00000_test.reference`. -Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. +Тесты должны использовать (создавать, отбрасывать и т. д.) Только таблицы в `test` предполагается, что база данных создается заранее; также тесты могут использовать временные таблицы. -If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`. +Если вы хотите использовать распределенные запросы в функциональных тестах, вы можете использовать их в качестве рычагов `remote` функция таблицы с `127.0.0.{1..2}` адреса для запроса самого сервера; или вы можете использовать предопределенные тестовые кластеры в файле конфигурации сервера, например `test_shard_localhost`. -Some tests are marked with `zookeeper`, `shard` or `long` in their names. -`zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that -requires server to listen `127.0.0.*`; `distributed` or `global` have the same -meaning. `long` is for tests that run slightly longer that one second. You can -disable these groups of tests using `--no-zookeeper`, `--no-shard` and -`--no-long` options, respectively. +Некоторые тесты помечены знаком `zookeeper`, `shard` или `long` в своем названии. +`zookeeper` это для тестов, которые используют ZooKeeper. `shard` это для тестов, что +требуется сервер для прослушивания `127.0.0.*`; `distributed` или `global` есть то же самое +значение. `long` это для тестов, которые работают немного дольше, чем одна секунда. Ты можешь +отключите эти группы тестов с помощью `--no-zookeeper`, `--no-shard` и +`--no-long` варианты, соответственно. -## Known bugs {#known-bugs} +## Известная ошибка {#known-bugs} -If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `queries/bugs` directory. These tests will be moved to `teststests_stateless` when bugs are fixed. +Если мы знаем некоторые ошибки, которые могут быть легко воспроизведены функциональными тестами, мы помещаем подготовленные функциональные тесты в `queries/bugs` каталог. Эти тесты будут перенесены в `teststests_stateless` когда ошибки будут исправлены. -## Integration Tests {#integration-tests} +## Интеграционные Тесты {#integration-tests} -Integration tests allow to test ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software. +Интеграционные тесты позволяют тестировать ClickHouse в кластерной конфигурации и взаимодействие ClickHouse с другими серверами, такими как MySQL, Postgres, MongoDB. Они полезны для эмуляции сетевых разбиений, отбрасывания пакетов и т. д. Эти тесты выполняются в Docker и создают несколько контейнеров с различным программным обеспечением. -See `testsgration/README.md` on how to run these tests. +Видеть `testsgration/README.md` о том, как проводить эти тесты. -Note that integration of ClickHouse with third-party drivers is not tested. Also we currently don’t have integration tests with our JDBC and ODBC drivers. +Обратите внимание, что интеграция ClickHouse со сторонними драйверами не тестируется. Кроме того, в настоящее время у нас нет интеграционных тестов с нашими драйверами JDBC и ODBC. -## Unit Tests {#unit-tests} +## Модульное тестирование {#unit-tests} -Unit tests are useful when you want to test not the ClickHouse as a whole, but a single isolated library or class. You can enable or disable build of tests with `ENABLE_TESTS` CMake option. Unit tests (and other test programs) are located in `tests` subdirectories across the code. To run unit tests, type `ninja test`. Some tests use `gtest`, but some are just programs that return non-zero exit code on test failure. +Модульные тесты полезны, если вы хотите протестировать не весь ClickHouse в целом, а одну изолированную библиотеку или класс. Вы можете включить или отключить сборку тестов с помощью `ENABLE_TESTS` Вариант CMake. Модульные тесты (и другие тестовые программы) расположены в `tests` подкаталоги по всему коду. Чтобы запустить модульные тесты, введите `ninja test`. Некоторые тесты используют `gtest`, но некоторые из них-это просто программы, которые возвращают ненулевой код выхода при сбое теста. -It’s not necessarily to have unit tests if the code is already covered by functional tests (and functional tests are usually much more simple to use). +Не обязательно иметь модульные тесты, Если код уже охвачен функциональными тестами (а функциональные тесты обычно гораздо более просты в использовании). -## Performance Tests {#performance-tests} +## Эксплуатационное испытание {#performance-tests} -Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Tests are located at `tests/performance`. Each test is represented by `.xml` file with description of test case. Tests are run with `clickhouse performance-test` tool (that is embedded in `clickhouse` binary). See `--help` for invocation. +Тесты производительности позволяют измерять и сравнивать производительность некоторой изолированной части ClickHouse по синтетическим запросам. Тесты расположены по адресу `tests/performance`. Каждый тест представлен следующим образом `.xml` файл с описанием тестового случая. Тесты выполняются с помощью `clickhouse performance-test` инструмент (который встроен в `clickhouse` двоичный). Видеть `--help` для призыва. -Each test run one or miltiple queries (possibly with combinations of parameters) in a loop with some conditions for stop (like “maximum execution speed is not changing in three seconds”) and measure some metrics about query performance (like “maximum execution speed”). Some tests can contain preconditions on preloaded test dataset. +Каждый тест запускает один или несколько запросов (возможно, с комбинациями параметров) в цикле с некоторыми условиями остановки (например «maximum execution speed is not changing in three seconds») и измерьте некоторые показатели производительности запросов (например, «maximum execution speed»). Некоторые тесты могут содержать предварительные условия для предварительно загруженного тестового набора данных. -If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other perf tools during your tests. +Если вы хотите улучшить производительность ClickHouse в каком-то сценарии, и если улучшения могут наблюдаться в простых запросах, настоятельно рекомендуется написать тест производительности. Это всегда имеет смысл использовать `perf top` или другие инструменты perf во время ваших тестов. -## Test Tools And Scripts {#test-tools-and-scripts} +## Инструменты И Сценарии Тестирования {#test-tools-and-scripts} -Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `dbms/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing. +Некоторые программы в `tests` каталог-это не подготовленные тесты, а инструменты тестирования. Например, для `Lexer` есть такой инструмент `dbms/Parsers/tests/lexer` это просто делает токенизацию stdin и записывает раскрашенный результат в stdout. Вы можете использовать эти инструменты в качестве примеров кода, а также для исследования и ручного тестирования. -You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated. +Вы также можете разместить пару файлов `.sh` и `.reference` вместе с инструментом нужно запустить его на каком - то заранее заданном входе- тогда результат скрипта можно сравнить с `.reference` файл. Такого рода тесты не автоматизированы. -## Miscellanous Tests {#miscellanous-tests} +## Различные Тесты {#miscellanous-tests} -There are tests for external dictionaries located at `tests/external_dictionaries` and for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests. +Существуют тесты для внешних словарей, расположенных по адресу `tests/external_dictionaries` и для машинно-обученных моделей в `tests/external_models`. Эти тесты не обновляются и должны быть перенесены в интеграционные тесты. -There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not. +Существует отдельный тест для вставки кворума. Этот тест запускает кластер ClickHouse на отдельных серверах и эмулирует различные случаи сбоя: разделение сети, отбрасывание пакетов (между узлами ClickHouse, между ClickHouse и ZooKeeper, между сервером ClickHouse и клиентом и т. д.), `kill -9`, `kill -STOP` и `kill -CONT` , любить [Джепсен](https://aphyr.com/tags/Jepsen). Затем тест проверяет, что все признанные вставки были записаны, а все отклоненные вставки-нет. -Quorum test was written by separate team before ClickHouse was open-sourced. This team no longer work with ClickHouse. Test was accidentially written in Java. For these reasons, quorum test must be rewritten and moved to integration tests. +Тест кворума был написан отдельной командой еще до того, как ClickHouse стал открытым исходным кодом. Эта команда больше не работает с ClickHouse. Тест был случайно написан на Java. По этим причинам тест кворума должен быть переписан и перенесен в интеграционные тесты. -## Manual Testing {#manual-testing} +## Ручное тестирование {#manual-testing} -When you develop a new feature, it is reasonable to also test it manually. You can do it with the following steps: +Когда вы разрабатываете новую функцию, разумно также протестировать ее вручную. Вы можете сделать это с помощью следующих шагов: -Build ClickHouse. Run ClickHouse from the terminal: change directory to `programs/clickhouse-server` and run it with `./clickhouse-server`. It will use configuration (`config.xml`, `users.xml` and files within `config.d` and `users.d` directories) from the current directory by default. To connect to ClickHouse server, run `programs/clickhouse-client/clickhouse-client`. +Постройте ClickHouse. Запустите ClickHouse из терминала: измените каталог на `programs/clickhouse-server` и запустить его с помощью `./clickhouse-server`. Он будет использовать конфигурацию (`config.xml`, `users.xml` и файлы внутри `config.d` и `users.d` каталоги) из текущего каталога по умолчанию. Чтобы подключиться к серверу ClickHouse, выполните команду `programs/clickhouse-client/clickhouse-client`. -Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`. +Обратите внимание, что все инструменты clickhouse (сервер, клиент и т. д.) являются просто символическими ссылками на один двоичный файл с именем `clickhouse`. Вы можете найти этот двоичный файл по адресу `programs/clickhouse`. Все инструменты также могут быть вызваны как `clickhouse tool` вместо `clickhouse-tool`. -Alternatively you can install ClickHouse package: either stable release from Yandex repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo service clickhouse-server start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`. +В качестве альтернативы вы можете установить пакет ClickHouse: либо стабильный релиз из репозитория Яндекса, либо вы можете построить пакет для себя с помощью `./release` в корне источников ClickHouse. Затем запустите сервер с помощью `sudo service clickhouse-server start` (или остановить, чтобы остановить сервер). Ищите журналы по адресу `/etc/clickhouse-server/clickhouse-server.log`. -When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary: +Когда ClickHouse уже установлен в вашей системе, вы можете построить новый `clickhouse` двоичный код и заменить существующий двоичный код: ``` bash $ sudo service clickhouse-server stop @@ -89,161 +90,161 @@ $ sudo cp ./clickhouse /usr/bin/ $ sudo service clickhouse-server start ``` -Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal: +Также вы можете остановить системный clickhouse-сервер и запустить свой собственный с той же конфигурацией, но с регистрацией в терминал: ``` bash $ sudo service clickhouse-server stop $ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml ``` -Example with gdb: +Пример с gdb: ``` bash $ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml ``` -If the system clickhouse-server is already running and you don’t want to stop it, you can change port numbers in your `config.xml` (or override them in a file in `config.d` directory), provide appropriate data path, and run it. +Если системный clickhouse-сервер уже запущен, и вы не хотите его останавливать, вы можете изменить номера портов в своей системе. `config.xml` (или переопределить их в файле внутри `config.d` каталог), укажите соответствующий путь к данным и запустите его. -`clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above. +`clickhouse` binary почти не имеет зависимостей и работает в широком диапазоне дистрибутивов Linux. Чтобы быстро и грязно протестировать свои изменения на сервере, вы можете просто `scp` ваша свежая постройка `clickhouse` двоичный файл на ваш сервер, а затем запустите его, как в приведенных выше примерах. -## Testing Environment {#testing-environment} +## Тестовая среда {#testing-environment} -Before publishing release as stable we deploy it on testing environment. Testing environment is a cluster that process 1/39 part of [Yandex.Metrica](https://metrica.yandex.com/) data. We share our testing environment with Yandex.Metrica team. ClickHouse is upgraded without downtime on top of existing data. We look at first that data is processed successfully without lagging from realtime, the replication continue to work and there is no issues visible to Yandex.Metrica team. First check can be done in the following way: +Перед публикацией релиза как стабильного мы развертываем его в тестовой среде. Среда тестирования-это кластер, который обрабатывает 1/39 часть [Яндекс.Метрика](https://metrica.yandex.com/) данные. Мы делимся нашей тестовой средой с Яндексом.Команда метрики. ClickHouse обновляется без простоев поверх существующих данных. Мы смотрим сначала на то, что данные обрабатываются успешно, не отставая от реального времени, репликация продолжает работать и нет никаких проблем, видимых Яндексу.Команда метрики. Первую проверку можно провести следующим образом: ``` sql SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h; ``` -In some cases we also deploy to testing environment of our friend teams in Yandex: Market, Cloud, etc. Also we have some hardware servers that are used for development purposes. +В некоторых случаях мы также развернуть на тестирование среды нашего друга команды Яндекса: Маркет, облако и т. д. Кроме того, у нас есть некоторые аппаратные серверы, которые используются для целей разработки. -## Load Testing {#load-testing} +## Нагрузочное тестирование {#load-testing} -After deploying to testing environment we run load testing with queries from production cluster. This is done manually. +После развертывания в среде тестирования мы запускаем нагрузочное тестирование с запросами из производственного кластера. Это делается вручную. -Make sure you have enabled `query_log` on your production cluster. +Убедитесь, что вы включили `query_log` на вашем производственном кластере. -Collect query log for a day or more: +Сбор журнала запросов в течение одного или нескольких дней: ``` bash $ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv ``` -This is a way complicated example. `type = 2` will filter queries that are executed successfully. `query LIKE '%ym:%'` is to select relevant queries from Yandex.Metrica. `is_initial_query` is to select only queries that are initiated by client, not by ClickHouse itself (as parts of distributed query processing). +Это очень сложный пример. `type = 2` будет фильтровать запросы, которые выполняются успешно. `query LIKE '%ym:%'` это выбор релевантных запросов от Яндекса.Метрика. `is_initial_query` это выбор только тех запросов, которые инициируются клиентом, а не самим ClickHouse (как части распределенной обработки запросов). -`scp` this log to your testing cluster and run it as following: +`scp` это войдите в свой тестовый кластер и запустите его следующим образом: ``` bash $ clickhouse benchmark --concurrency 16 < queries.tsv ``` -(probably you also want to specify a `--user`) +(вероятно, вы также хотите указать a `--user`) -Then leave it for a night or weekend and go take a rest. +Затем оставьте его на ночь или выходные и идите отдыхать. -You should check that `clickhouse-server` doesn’t crash, memory footprint is bounded and performance not degrading over time. +Вы должны это проверить `clickhouse-server` не дает сбоя, объем памяти ограничен, а производительность не ухудшается с течением времени. -Precise query execution timings are not recorded and not compared due to high variability of queries and environment. +Точные тайминги выполнения запросов не регистрируются и не сравниваются из-за высокой вариативности запросов и окружающей среды. -## Build Tests {#build-tests} +## Построение Тестов {#build-tests} -Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. Tests are located at `ci` directory. They run build from source inside Docker, Vagrant, and sometimes with `qemu-user-static` inside Docker. These tests are under development and test runs are not automated. +Тесты сборки позволяют проверить, что сборка не нарушается на различных альтернативных конфигурациях и на некоторых зарубежных системах. Тесты расположены по адресу `ci` каталог. Они запускают сборку из исходного кода внутри Docker, Vagrant, а иногда и с помощью `qemu-user-static` внутри Докер. Эти тесты находятся в стадии разработки, и тестовые запуски не автоматизированы. -Motivation: +Мотивация: -Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples: +Обычно мы выпускаем и запускаем все тесты на одном варианте сборки ClickHouse. Но есть и альтернативные варианты сборки, которые не проходят тщательной проверки. Примеры: -- build on FreeBSD; -- build on Debian with libraries from system packages; -- build with shared linking of libraries; -- build on AArch64 platform; -- build on PowerPc platform. +- сборка на FreeBSD; +- сборка на Debian с библиотеками из системных пакетов; +- сборка с общим связыванием библиотек; +- построить на платформе AArch64 ; +- постройте на платформе PowerPc. -For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts. +Например, сборка с системными пакетами-это плохая практика, потому что мы не можем гарантировать, какая именно версия пакетов будет у системы. Но это действительно необходимо сопровождающим Debian. По этой причине мы, по крайней мере, должны поддерживать этот вариант сборки. Другой пример: Общие ссылки-это общий источник проблем, но он необходим для некоторых энтузиастов. -Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests. +Хотя мы не можем выполнить все тесты на всех вариантах сборки, мы хотим проверить, по крайней мере, что различные варианты сборки не нарушены. Для этого мы используем тесты сборки. -## Testing For Protocol Compatibility {#testing-for-protocol-compatibility} +## Тестирование Совместимости Протоколов {#testing-for-protocol-compatibility} -When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages). +Когда мы расширяем сетевой протокол ClickHouse, мы вручную проверяем, что старый clickhouse-клиент работает с новым clickhouse-сервером, а новый clickhouse-клиент работает со старым clickhouse-сервером (просто запустив двоичные файлы из соответствующих пакетов). -## Help From The Compiler {#help-from-the-compiler} +## Помощь От Компилятора {#help-from-the-compiler} -Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries. +Основной код ClickHouse (который находится в `dbms` каталог) строится с помощью `-Wall -Wextra -Werror` и с некоторыми дополнительными включенными предупреждениями. Хотя эти параметры не включены для сторонних библиотек. -Clang has even more useful warnings - you can look for them with `-Weverything` and pick something to default build. +У Clang есть еще более полезные предупреждения - вы можете искать их с помощью `-Weverything` и выберите что-то для сборки по умолчанию. -For production builds, gcc is used (it still generates slightly more efficient code than clang). For development, clang is usually more convenient to use. You can build on your own machine with debug mode (to save battery of your laptop), but please note that compiler is able to generate more warnings with `-O3` due to better control flow and inter-procedure analysis. When building with clang, `libc++` is used instead of `libstdc++` and when building with debug mode, debug version of `libc++` is used that allows to catch more errors at runtime. +Для производственных сборок используется gcc (он все еще генерирует немного более эффективный код, чем clang). Для развития, лязгают, как правило, более удобны в использовании. Вы можете построить на своей собственной машине с режимом отладки (чтобы сэкономить батарею вашего ноутбука), но обратите внимание, что компилятор способен генерировать больше предупреждений с помощью `-O3` благодаря лучшему потоку управления и межпроцедурному анализу. При строительстве с лязгом, `libc++` используется вместо `libstdc++` и при построении с режимом отладки, отладочная версия `libc++` используется, что позволяет ловить больше ошибок во время выполнения. -## Sanitizers {#sanitizers} +## Дезинфицирующее средство {#sanitizers} -**Address sanitizer**. -We run functional and integration tests under ASan on per-commit basis. +**Адрес дезинфицирующее средство**. +Мы проводим функциональные и интеграционные тесты в асане на фиксации основы. -**Valgrind (Memcheck)**. -We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse). +**С Valgrind (Помощи Valgrind)**. +Мы проводим функциональные тесты под Valgrind ночь. Это займет несколько часов. В настоящее время существует один известный ложноположительный результат в `re2` библиотека, см. [эта статья](https://research.swtch.com/sparse). -**Undefined behaviour sanitizer.** -We run functional and integration tests under ASan on per-commit basis. +**Неопределенное поведение дезинфицирующего средства.** +Мы проводим функциональные и интеграционные тесты в асане на фиксации основы. -**Thread sanitizer**. -We run functional tests under TSan on per-commit basis. We still don’t run integration tests under TSan on per-commit basis. +**Дезинфицирующее средство для нитей**. +Мы проводим функциональные тесты в рамках TSan на основе per-commit. Мы все еще не запускаем интеграционные тесты под TSan на основе per-commit. -**Memory sanitizer**. -Currently we still don’t use MSan. +**Дезинфицирующее средство для памяти**. +В настоящее время мы все еще не используем MSan. -**Debug allocator.** -Debug version of `jemalloc` is used for debug build. +**Отладочный распределитель.** +Отладочная версия `jemalloc` используется для отладки сборки. -## Fuzzing {#fuzzing} +## Затуманивающего {#fuzzing} -We use simple fuzz test to generate random SQL queries and to check that the server doesn’t die. Fuzz testing is performed with Address sanitizer. You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). +Мы используем простой тест fuzz для генерации случайных SQL-запросов и проверки того, что сервер не умирает. Тестирование пуха проводится с помощью адресного дезинфицирующего средства. Вы можете найти его в `00746_sql_fuzzy.pl`. Этот тест следует проводить непрерывно (в течение ночи и дольше). -As of December 2018, we still don’t use isolated fuzz testing of library code. +По состоянию на декабрь 2018 года мы все еще не используем изолированное тестирование fuzz библиотечного кода. -## Security Audit {#security-audit} +## Аудит безопасности {#security-audit} -People from Yandex Cloud department do some basic overview of ClickHouse capabilities from the security standpoint. +Люди из облачного отдела Яндекса делают некоторый базовый обзор возможностей ClickHouse с точки зрения безопасности. -## Static Analyzers {#static-analyzers} +## Статический анализатор {#static-analyzers} -We run `PVS-Studio` on per-commit basis. We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/). +Мы бежим `PVS-Studio` на основе каждой фиксации. Мы провели оценку `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`. Вы найдете инструкции по использованию в `tests/instructions/` каталог. Кроме того, вы можете читать [статья на русском языке](https://habr.com/company/yandex/blog/342018/). -If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box. +Если вы используете `CLion` как IDE, вы можете использовать некоторые из них `clang-tidy` выписывает чеки из коробки. -## Hardening {#hardening} +## Затвердение {#hardening} -`FORTIFY_SOURCE` is used by default. It is almost useless, but still makes sense in rare cases and we don’t disable it. +`FORTIFY_SOURCE` используется по умолчанию. Это почти бесполезно, но все же имеет смысл в редких случаях, и мы не отключаем его. -## Code Style {#code-style} +## Стиль Кода {#code-style} -Code style rules are described [here](https://clickhouse.tech/docs/en/development/style/). +Описаны правила стиля кода [здесь](https://clickhouse.tech/docs/en/development/style/). -To check for some common style violations, you can use `utils/check-style` script. +Чтобы проверить наличие некоторых распространенных нарушений стиля, вы можете использовать `utils/check-style` скрипт. -To force proper style of your code, you can use `clang-format`. File `.clang-format` is located at the sources root. It mostly corresponding with our actual code style. But it’s not recommended to apply `clang-format` to existing files because it makes formatting worse. You can use `clang-format-diff` tool that you can find in clang source repository. +Чтобы принудительно создать правильный стиль вашего кода, Вы можете использовать `clang-format`. Файл `.clang-format` находится в корне источника. Это в основном соответствует нашему фактическому стилю кода. Но применять его не рекомендуется `clang-format` к существующим файлам, потому что это ухудшает форматирование. Вы можете использовать `clang-format-diff` инструмент, который вы можете найти в репозитории Clang source. -Alternatively you can try `uncrustify` tool to reformat your code. Configuration is in `uncrustify.cfg` in the sources root. It is less tested than `clang-format`. +В качестве альтернативы вы можете попробовать `uncrustify` инструмент для переформатирования вашего кода. Конфигурации в `uncrustify.cfg` в корне источников. Это меньше, чем `clang-format`. -`CLion` has its own code formatter that has to be tuned for our code style. +`CLion` имеет свой собственный формататор кода, который должен быть настроен для нашего стиля кода. -## Metrica B2B Tests {#metrica-b2b-tests} +## В2В метрика тесты {#metrica-b2b-tests} -Each ClickHouse release is tested with Yandex Metrica and AppMetrica engines. Testing and stable versions of ClickHouse are deployed on VMs and run with a small copy of Metrica engine that is processing fixed sample of input data. Then results of two instances of Metrica engine are compared together. +Каждый релиз ClickHouse тестируется с помощью движков Yandex Metrica и AppMetrica. Тестовые и стабильные версии ClickHouse развертываются на виртуальных машинах и запускаются с небольшой копией движка Metrica engine, который обрабатывает фиксированную выборку входных данных. Затем результаты двух экземпляров двигателя Metrica сравниваются вместе. -These tests are automated by separate team. Due to high number of moving parts, tests are fail most of the time by completely unrelated reasons, that are very difficult to figure out. Most likely these tests have negative value for us. Nevertheless these tests was proved to be useful in about one or two times out of hundreds. +Эти тесты автоматизированы отдельной командой. Из-за большого количества движущихся частей тесты чаще всего проваливаются по совершенно несвязанным причинам, которые очень трудно выяснить. Скорее всего, эти тесты имеют для нас отрицательное значение. Тем не менее эти тесты оказались полезными примерно в одном или двух случаях из сотен. -## Test Coverage {#test-coverage} +## Тестовое покрытие {#test-coverage} -As of July 2018 we don’t track test coverage. +По состоянию на июль 2018 года мы не отслеживаем покрытие тестов. -## Test Automation {#test-automation} +## Автоматизация тестирования {#test-automation} -We run tests with Yandex internal CI and job automation system named “Sandbox”. +Мы проводим тесты с помощью внутренней CI Яндекса и системы автоматизации заданий под названием «Sandbox». -Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored eternally. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. +Задания сборки и тесты выполняются в песочнице на основе каждой фиксации. Полученные пакеты и результаты тестирования публикуются на GitHub и могут быть загружены по прямым ссылкам. Артефакты хранятся вечно. Когда вы отправляете запрос на вытягивание на GitHub, мы помечаем его как «can be tested» и наша система CI построит пакеты ClickHouse (release, debug, with address sanitizer и т. д.) Для вас. -We don’t use Travis CI due to the limit on time and computational power. -We don’t use Jenkins. It was used before and now we are happy we are not using Jenkins. +Мы не используем Travis CI из-за ограничения по времени и вычислительной мощности. +Мы не используем Дженкинса. Он был использован раньше, и теперь мы счастливы, что не используем Дженкинса. -[Original article](https://clickhouse.tech/docs/en/development/tests/) -velopment/tests/) +[Оригинальная статья](https://clickhouse.tech/docs/en/development/tests/) +разработка / испытания/) diff --git a/docs/ru/database_engines/index.md b/docs/ru/engines/database_engines/index.md similarity index 78% rename from docs/ru/database_engines/index.md rename to docs/ru/engines/database_engines/index.md index 982324e0408..75086eaf678 100644 --- a/docs/ru/database_engines/index.md +++ b/docs/ru/engines/database_engines/index.md @@ -2,7 +2,7 @@ Движки баз данных обеспечивают работу с таблицами. -По умолчанию ClickHouse использует собственный движок баз данных, который поддерживает конфигурируемые [движки таблиц](../operations/table_engines/index.md) и [диалект SQL](../query_language/syntax.md). +По умолчанию ClickHouse использует собственный движок баз данных, который поддерживает конфигурируемые [движки таблиц](../../engines/database_engines/index.md) и [диалект SQL](../../engines/database_engines/index.md). Также можно использовать следующие движки баз данных: diff --git a/docs/ru/database_engines/lazy.md b/docs/ru/engines/database_engines/lazy.md similarity index 100% rename from docs/ru/database_engines/lazy.md rename to docs/ru/engines/database_engines/lazy.md diff --git a/docs/ru/database_engines/mysql.md b/docs/ru/engines/database_engines/mysql.md similarity index 68% rename from docs/ru/database_engines/mysql.md rename to docs/ru/engines/database_engines/mysql.md index 45547407be6..1dbcb67e8f1 100644 --- a/docs/ru/database_engines/mysql.md +++ b/docs/ru/engines/database_engines/mysql.md @@ -28,23 +28,23 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') | MySQL | ClickHouse | |----------------------------------|---------------------------------------------| -| UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) | -| TINYINT | [Int8](../data_types/int_uint.md) | -| UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) | -| SMALLINT | [Int16](../data_types/int_uint.md) | -| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) | -| INT, MEDIUMINT | [Int32](../data_types/int_uint.md) | -| UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) | -| BIGINT | [Int64](../data_types/int_uint.md) | -| FLOAT | [Float32](../data_types/float.md) | -| DOUBLE | [Float64](../data_types/float.md) | -| DATE | [Date](../data_types/date.md) | -| DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) | -| BINARY | [FixedString](../data_types/fixedstring.md) | +| UNSIGNED TINYINT | [UInt8](../../engines/database_engines/mysql.md) | +| TINYINT | [Int8](../../engines/database_engines/mysql.md) | +| UNSIGNED SMALLINT | [UInt16](../../engines/database_engines/mysql.md) | +| SMALLINT | [Int16](../../engines/database_engines/mysql.md) | +| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../../engines/database_engines/mysql.md) | +| INT, MEDIUMINT | [Int32](../../engines/database_engines/mysql.md) | +| UNSIGNED BIGINT | [UInt64](../../engines/database_engines/mysql.md) | +| BIGINT | [Int64](../../engines/database_engines/mysql.md) | +| FLOAT | [Float32](../../engines/database_engines/mysql.md) | +| DOUBLE | [Float64](../../engines/database_engines/mysql.md) | +| DATE | [Date](../../engines/database_engines/mysql.md) | +| DATETIME, TIMESTAMP | [DateTime](../../engines/database_engines/mysql.md) | +| BINARY | [FixedString](../../engines/database_engines/mysql.md) | -Все прочие типы данных преобразуются в [String](../data_types/string.md). +Все прочие типы данных преобразуются в [String](../../engines/database_engines/mysql.md). -[Nullable](../data_types/nullable.md) поддержан. +[Nullable](../../engines/database_engines/mysql.md) поддержан. ## Примеры использования {#primery-ispolzovaniia} diff --git a/docs/ru/engines/index.md b/docs/ru/engines/index.md new file mode 100644 index 00000000000..c4b0b299858 --- /dev/null +++ b/docs/ru/engines/index.md @@ -0,0 +1,6 @@ +--- +toc_folder_title: Engines +toc_priority: 25 +--- + + diff --git a/docs/ru/operations/table_engines/index.md b/docs/ru/engines/table_engines/index.md similarity index 64% rename from docs/ru/operations/table_engines/index.md rename to docs/ru/engines/table_engines/index.md index 775164ccb52..fdf9cd50bf6 100644 --- a/docs/ru/operations/table_engines/index.md +++ b/docs/ru/engines/table_engines/index.md @@ -13,27 +13,27 @@ ### MergeTree {#mergetree} -Наиболее универсальные и функциональные движки таблиц для задач с высокой загрузкой. Общим свойством этих движков является быстрая вставка данных с последующей фоновой обработкой данных. Движки `*MergeTree` поддерживают репликацию данных (в [Replicated\*](replication.md) версиях движков), партиционирование, и другие возможности не поддержанные для других движков. +Наиболее универсальные и функциональные движки таблиц для задач с высокой загрузкой. Общим свойством этих движков является быстрая вставка данных с последующей фоновой обработкой данных. Движки `*MergeTree` поддерживают репликацию данных (в [Replicated\*](mergetree_family/replication.md) версиях движков), партиционирование, и другие возможности не поддержанные для других движков. Движки семейства: -- [MergeTree](mergetree.md) -- [ReplacingMergeTree](replacingmergetree.md) -- [SummingMergeTree](summingmergetree.md) -- [AggregatingMergeTree](aggregatingmergetree.md) -- [CollapsingMergeTree](collapsingmergetree.md) -- [VersionedCollapsingMergeTree](versionedcollapsingmergetree.md) -- [GraphiteMergeTree](graphitemergetree.md) +- [MergeTree](mergetree_family/mergetree.md) +- [ReplacingMergeTree](mergetree_family/replacingmergetree.md) +- [SummingMergeTree](mergetree_family/summingmergetree.md) +- [AggregatingMergeTree](mergetree_family/aggregatingmergetree.md) +- [CollapsingMergeTree](mergetree_family/collapsingmergetree.md) +- [VersionedCollapsingMergeTree](mergetree_family/versionedcollapsingmergetree.md) +- [GraphiteMergeTree](mergetree_family/graphitemergetree.md) ### Log {#log} -Простые [движки](log_family.md) с минимальной функциональностью. Они наиболее эффективны, когда вам нужно быстро записать много небольших таблиц (до примерно 1 миллиона строк) и прочитать их позже целиком. +Простые [движки](log_family/index.md) с минимальной функциональностью. Они наиболее эффективны, когда вам нужно быстро записать много небольших таблиц (до примерно 1 миллиона строк) и прочитать их позже целиком. Движки семейства: -- [TinyLog](tinylog.md) -- [StripeLog](stripelog.md) -- [Log](log.md) +- [TinyLog](log_family/tinylog.md) +- [StripeLog](log_family/stripelog.md) +- [Log](log_family/log.md) ### Движки для интеграции {#dvizhki-dlia-integratsii} @@ -41,27 +41,27 @@ Движки семейства: -- [Kafka](kafka.md) -- [MySQL](mysql.md) -- [ODBC](odbc.md) -- [JDBC](jdbc.md) +- [Kafka](integrations/kafka.md) +- [MySQL](integrations/mysql.md) +- [ODBC](integrations/odbc.md) +- [JDBC](integrations/jdbc.md) ### Специальные движки {#spetsialnye-dvizhki} Движки семейства: -- [Distributed](distributed.md) -- [MaterializedView](materializedview.md) -- [Dictionary](dictionary.md) -- [Merge](merge.md) -- [File](file.md) -- [Null](null.md) -- [Set](set.md) -- [Join](join.md) -- [URL](url.md) -- [View](view.md) -- [Memory](memory.md) -- [Buffer](buffer.md) +- [Distributed](special/distributed.md) +- [MaterializedView](special/materializedview.md) +- [Dictionary](special/dictionary.md) +- [Merge](special/merge.md) +- [File](special/file.md) +- [Null](special/null.md) +- [Set](special/set.md) +- [Join](special/join.md) +- [URL](special/url.md) +- [View](special/view.md) +- [Memory](special/memory.md) +- [Buffer](special/buffer.md) ## Виртуальные столбцы {#table_engines-virtual-columns} diff --git a/docs/ru/operations/table_engines/hdfs.md b/docs/ru/engines/table_engines/integrations/hdfs.md similarity index 94% rename from docs/ru/operations/table_engines/hdfs.md rename to docs/ru/engines/table_engines/integrations/hdfs.md index 4f892b1e492..26b97a99f77 100644 --- a/docs/ru/operations/table_engines/hdfs.md +++ b/docs/ru/engines/table_engines/integrations/hdfs.md @@ -1,6 +1,6 @@ # HDFS {#table_engines-hdfs} -Управляет данными в HDFS. Данный движок похож на движки [File](file.md) и [URL](url.md). +Управляет данными в HDFS. Данный движок похож на движки [File](../special/file.md) и [URL](../special/url.md). ## Использование движка {#ispolzovanie-dvizhka} @@ -9,7 +9,7 @@ ENGINE = HDFS(URI, format) ``` В параметр `URI` нужно передавать полный URI файла в HDFS. -Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../interfaces/formats.md#formats). +Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../../interfaces/formats.md#formats). Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения. **Пример:** @@ -56,7 +56,7 @@ SELECT * FROM hdfs_engine_table LIMIT 2 - `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). -Конструкция с `{}` аналогична табличной функции [remote](../../query_language/table_functions/remote.md). +Конструкция с `{}` аналогична табличной функции [remote](../../../engines/table_engines/integrations/hdfs.md). **Пример** diff --git a/docs/ru/engines/table_engines/integrations/index.md b/docs/ru/engines/table_engines/integrations/index.md new file mode 100644 index 00000000000..716d00cdd98 --- /dev/null +++ b/docs/ru/engines/table_engines/integrations/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Integrations +toc_priority: 30 +--- + diff --git a/docs/ru/operations/table_engines/jdbc.md b/docs/ru/engines/table_engines/integrations/jdbc.md similarity index 95% rename from docs/ru/operations/table_engines/jdbc.md rename to docs/ru/engines/table_engines/integrations/jdbc.md index d9a66244849..ae461a539be 100644 --- a/docs/ru/operations/table_engines/jdbc.md +++ b/docs/ru/engines/table_engines/integrations/jdbc.md @@ -4,7 +4,7 @@ Для реализации соединения по JDBC ClickHouse использует отдельную программу [clickhouse-jdbc-bridge](https://github.com/alex-krash/clickhouse-jdbc-bridge), которая должна запускаться как демон. -Движок поддерживает тип данных [Nullable](../../data_types/nullable.md). +Движок поддерживает тип данных [Nullable](../../../engines/table_engines/integrations/jdbc.md). ## Создание таблицы {#sozdanie-tablitsy} @@ -82,6 +82,6 @@ FROM jdbc_table ## Смотрите также {#smotrite-takzhe} -- [Табличная функция JDBC](../../query_language/table_functions/jdbc.md). +- [Табличная функция JDBC](../../../engines/table_engines/integrations/jdbc.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/jdbc/) diff --git a/docs/ru/operations/table_engines/kafka.md b/docs/ru/engines/table_engines/integrations/kafka.md similarity index 95% rename from docs/ru/operations/table_engines/kafka.md rename to docs/ru/engines/table_engines/integrations/kafka.md index 960eecf49d0..c43a7b5d37d 100644 --- a/docs/ru/operations/table_engines/kafka.md +++ b/docs/ru/engines/table_engines/integrations/kafka.md @@ -33,7 +33,7 @@ SETTINGS - `kafka_broker_list` – перечень брокеров, разделенный запятыми (`localhost:9092`). - `kafka_topic_list` – перечень необходимых топиков Kafka. - `kafka_group_name` – группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы. -- `kafka_format` – формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../interfaces/formats.md). +- `kafka_format` – формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md). Опциональные параметры: @@ -123,7 +123,7 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format SELECT level, sum(total) FROM daily GROUP BY level; ``` -Для улучшения производительности полученные сообщения группируются в блоки размера [max\_insert\_block\_size](../settings/settings.md#settings-max_insert_block_size). Если блок не удалось сформировать за [stream\_flush\_interval\_ms](../settings/settings.md) миллисекунд, то данные будут сброшены в таблицу независимо от полноты блока. +Для улучшения производительности полученные сообщения группируются в блоки размера [max\_insert\_block\_size](../../../operations/settings/settings.md#settings-max_insert_block_size). Если блок не удалось сформировать за [stream\_flush\_interval\_ms](../../../operations/settings/settings.md) миллисекунд, то данные будут сброшены в таблицу независимо от полноты блока. Чтобы остановить получение данных топика или изменить логику преобразования, отсоедините материализованное представление: diff --git a/docs/ru/operations/table_engines/mysql.md b/docs/ru/engines/table_engines/integrations/mysql.md similarity index 89% rename from docs/ru/operations/table_engines/mysql.md rename to docs/ru/engines/table_engines/integrations/mysql.md index 09ca9077c2c..7260b182c6d 100644 --- a/docs/ru/operations/table_engines/mysql.md +++ b/docs/ru/engines/table_engines/integrations/mysql.md @@ -13,12 +13,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); ``` -Смотрите подробное описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query). +Смотрите подробное описание запроса [CREATE TABLE](../../../engines/table_engines/integrations/mysql.md#create-table-query). Структура таблицы может отличаться от исходной структуры таблицы MySQL: - Имена столбцов должны быть такими же, как в исходной таблице MySQL, но вы можете использовать только некоторые из этих столбцов и в любом порядке. -- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [приводить](../../query_language/functions/type_conversion_functions.md#type_conversion_function-cast) значения к типам данных ClickHouse. +- Типы столбцов могут отличаться от типов в исходной таблице MySQL. ClickHouse пытается [приводить](../../../engines/table_engines/integrations/mysql.md#type_conversion_function-cast) значения к типам данных ClickHouse. **Параметры движка** @@ -92,7 +92,7 @@ SELECT * FROM mysql_table ## Смотрите также {#smotrite-takzhe} -- [Табличная функция ‘mysql’](../../query_language/table_functions/mysql.md) -- [Использование MySQL в качестве источника для внешнего словаря](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-mysql) +- [Табличная функция ‘mysql’](../../../engines/table_engines/integrations/mysql.md) +- [Использование MySQL в качестве источника для внешнего словаря](../../../engines/table_engines/integrations/mysql.md#dicts-external_dicts_dict_sources-mysql) [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/mysql/) diff --git a/docs/ru/operations/table_engines/odbc.md b/docs/ru/engines/table_engines/integrations/odbc.md similarity index 90% rename from docs/ru/operations/table_engines/odbc.md rename to docs/ru/engines/table_engines/integrations/odbc.md index b3dde77890c..6124a928315 100644 --- a/docs/ru/operations/table_engines/odbc.md +++ b/docs/ru/engines/table_engines/integrations/odbc.md @@ -4,7 +4,7 @@ Чтобы использование ODBC было безопасным, ClickHouse использует отдельную программу `clickhouse-odbc-bridge`. Если драйвер ODBC подгружать непосредственно из `clickhouse-server`, то проблемы с драйвером могут привести к аварийной остановке сервера ClickHouse. ClickHouse автоматически запускает `clickhouse-odbc-bridge` по мере необходимости. Программа устанавливается из того же пакета, что и `clickhouse-server`. -Движок поддерживает тип данных [Nullable](../../data_types/nullable.md). +Движок поддерживает тип данных [Nullable](../../../engines/table_engines/integrations/odbc.md). ## Создание таблицы {#sozdanie-tablitsy} @@ -18,12 +18,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ENGINE = ODBC(connection_settings, external_database, external_table) ``` -Смотрите подробное описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query). +Смотрите подробное описание запроса [CREATE TABLE](../../../engines/table_engines/integrations/odbc.md#create-table-query). Структура таблицы может отличаться от структуры исходной таблицы в удалённой СУБД: - Имена столбцов должны быть такими же, как в исходной таблице, но вы можете использовать только некоторые из этих столбцов и в любом порядке. -- Типы столбцов могут отличаться от типов аналогичных столбцов в исходной таблице. ClickHouse пытается [приводить](../../query_language/functions/type_conversion_functions.md#type_conversion_function-cast) значения к типам данных ClickHouse. +- Типы столбцов могут отличаться от типов аналогичных столбцов в исходной таблице. ClickHouse пытается [приводить](../../../engines/table_engines/integrations/odbc.md#type_conversion_function-cast) значения к типам данных ClickHouse. **Параметры движка** @@ -119,7 +119,7 @@ SELECT * FROM odbc_t ## Смотрите также {#smotrite-takzhe} -- [Внешние словари ODBC](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) -- [Табличная функция odbc](../../query_language/table_functions/odbc.md) +- [Внешние словари ODBC](../../../engines/table_engines/integrations/odbc.md#dicts-external_dicts_dict_sources-odbc) +- [Табличная функция odbc](../../../engines/table_engines/integrations/odbc.md) [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/odbc/) diff --git a/docs/ru/engines/table_engines/log_family/index.md b/docs/ru/engines/table_engines/log_family/index.md new file mode 100644 index 00000000000..a64371200a6 --- /dev/null +++ b/docs/ru/engines/table_engines/log_family/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Log Family +toc_priority: 29 +--- + diff --git a/docs/ru/operations/table_engines/log.md b/docs/ru/engines/table_engines/log_family/log.md similarity index 100% rename from docs/ru/operations/table_engines/log.md rename to docs/ru/engines/table_engines/log_family/log.md diff --git a/docs/ru/operations/table_engines/log_family.md b/docs/ru/engines/table_engines/log_family/log_family.md similarity index 97% rename from docs/ru/operations/table_engines/log_family.md rename to docs/ru/engines/table_engines/log_family/log_family.md index 597d331981c..f132c2e8d33 100644 --- a/docs/ru/operations/table_engines/log_family.md +++ b/docs/ru/engines/table_engines/log_family/log_family.md @@ -20,7 +20,7 @@ Во время запросов `INSERT` таблица блокируется, а другие запросы на чтение и запись ожидают разблокировки таблицы. Если запросов на запись данных нет, то можно выполнять любое количество конкуретных запросов на чтение. -- Не поддерживают операции [мутации](../../query_language/alter.md#alter-mutations). +- Не поддерживают операции [мутации](../../../engines/table_engines/log_family/log_family.md#alter-mutations). - Не поддерживают индексы. diff --git a/docs/ru/operations/table_engines/stripelog.md b/docs/ru/engines/table_engines/log_family/stripelog.md similarity index 98% rename from docs/ru/operations/table_engines/stripelog.md rename to docs/ru/engines/table_engines/log_family/stripelog.md index 3f69e1bdd73..4eb4d4620de 100644 --- a/docs/ru/operations/table_engines/stripelog.md +++ b/docs/ru/engines/table_engines/log_family/stripelog.md @@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE = StripeLog ``` -Смотрите подробное описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query). +Смотрите подробное описание запроса [CREATE TABLE](../../../engines/table_engines/log_family/stripelog.md#create-table-query). ## Запись данных {#table_engines-stripelog-writing-the-data} diff --git a/docs/ru/operations/table_engines/tinylog.md b/docs/ru/engines/table_engines/log_family/tinylog.md similarity index 100% rename from docs/ru/operations/table_engines/tinylog.md rename to docs/ru/engines/table_engines/log_family/tinylog.md diff --git a/docs/ru/operations/table_engines/aggregatingmergetree.md b/docs/ru/engines/table_engines/mergetree_family/aggregatingmergetree.md similarity index 95% rename from docs/ru/operations/table_engines/aggregatingmergetree.md rename to docs/ru/engines/table_engines/mergetree_family/aggregatingmergetree.md index 64ae3aa037c..8fdf063f569 100644 --- a/docs/ru/operations/table_engines/aggregatingmergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/aggregatingmergetree.md @@ -4,7 +4,7 @@ Таблицы типа `AggregatingMergeTree` могут использоваться для инкрементальной агрегации данных, в том числе, для агрегирующих материализованных представлений. -Движок обрабатывает все столбцы типа [AggregateFunction](../../data_types/nested_data_structures/aggregatefunction.md). +Движок обрабатывает все столбцы типа [AggregateFunction](../../../engines/table_engines/mergetree_family/aggregatingmergetree.md). Использование `AggregatingMergeTree` оправдано только в том случае, когда это уменьшает количество строк на порядки. @@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Описание параметров запроса смотрите в [описании запроса](../../query_language/create.md). +Описание параметров запроса смотрите в [описании запроса](../../../engines/table_engines/mergetree_family/aggregatingmergetree.md). **Секции запроса** diff --git a/docs/ru/operations/table_engines/collapsingmergetree.md b/docs/ru/engines/table_engines/mergetree_family/collapsingmergetree.md similarity index 99% rename from docs/ru/operations/table_engines/collapsingmergetree.md rename to docs/ru/engines/table_engines/mergetree_family/collapsingmergetree.md index 38d4d475e07..5179ac06fa5 100644 --- a/docs/ru/operations/table_engines/collapsingmergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/collapsingmergetree.md @@ -21,7 +21,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Подробности про `CREATE TABLE` смотрите в [описании запроса](../../query_language/create.md). +Подробности про `CREATE TABLE` смотрите в [описании запроса](../../../engines/table_engines/mergetree_family/collapsingmergetree.md). **Параметры CollapsingMergeTree** diff --git a/docs/ru/operations/table_engines/custom_partitioning_key.md b/docs/ru/engines/table_engines/mergetree_family/custom_partitioning_key.md similarity index 91% rename from docs/ru/operations/table_engines/custom_partitioning_key.md rename to docs/ru/engines/table_engines/mergetree_family/custom_partitioning_key.md index c2b846ef3c1..039ab1ba0cf 100644 --- a/docs/ru/operations/table_engines/custom_partitioning_key.md +++ b/docs/ru/engines/table_engines/mergetree_family/custom_partitioning_key.md @@ -1,6 +1,6 @@ # Произвольный ключ партиционирования {#proizvolnyi-kliuch-partitsionirovaniia} -Партиционирование данных доступно для таблиц семейства [MergeTree](mergetree.md) (включая [реплицированные таблицы](replication.md)). Таблицы [MaterializedView](materializedview.md), созданные на основе таблиц MergeTree, также поддерживают партиционирование. +Партиционирование данных доступно для таблиц семейства [MergeTree](mergetree.md) (включая [реплицированные таблицы](replication.md)). Таблицы [MaterializedView](../special/materializedview.md), созданные на основе таблиц MergeTree, также поддерживают партиционирование. Партиция – это набор записей в таблице, объединенных по какому-либо критерию. Например, партиция может быть по месяцу, по дню или по типу события. Данные для разных партиций хранятся отдельно. Это позволяет оптимизировать работу с данными, так как при обработке запросов будет использоваться только необходимое подмножество из всевозможных данных. Например, при получении данных за определенный месяц, ClickHouse будет считывать данные только за этот месяц. @@ -33,7 +33,7 @@ ORDER BY (CounterID, StartDate, intHash32(UserID)); !!! info "Info" Не рекомендуется делать слишком гранулированное партиционирование – то есть задавать партиции по столбцу, в котором будет слишком большой разброс значений (речь идет о порядке более тысячи партиций). Это приведет к скоплению большого числа файлов и файловых дескрипторов в системе, что может значительно снизить производительность запросов `SELECT`. -Чтобы получить набор кусков и партиций таблицы, можно воспользоваться системной таблицей [system.parts](../system_tables.md#system_tables-parts). В качестве примера рассмотрим таблицу `visits`, в которой задано партиционирование по месяцам. Выполним `SELECT` для таблицы `system.parts`: +Чтобы получить набор кусков и партиций таблицы, можно воспользоваться системной таблицей [system.parts](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#system_tables-parts). В качестве примера рассмотрим таблицу `visits`, в которой задано партиционирование по месяцам. Выполним `SELECT` для таблицы `system.parts`: ``` sql SELECT @@ -74,7 +74,7 @@ WHERE table = 'visits' Как видно из примера выше, таблица содержит несколько отдельных кусков для одной и той же партиции (например, куски `201901_1_3_1` и `201901_1_9_2` принадлежат партиции `201901`). Это означает, что эти куски еще не были объединены – в файловой системе они хранятся отдельно. После того как будет выполнено автоматическое слияние данных (выполняется примерно спустя 10 минут после вставки данных), исходные куски будут объединены в один более крупный кусок и помечены как неактивные. -Вы можете запустить внеочередное слияние данных с помощью запроса [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize). Пример: +Вы можете запустить внеочередное слияние данных с помощью запроса [OPTIMIZE](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#misc_operations-optimize). Пример: ``` sql OPTIMIZE TABLE visits PARTITION 201902; @@ -115,12 +115,12 @@ drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached Директория `detached` содержит куски, отсоединенные от таблицы с помощью запроса [DETACH](#alter_detach-partition). Поврежденные куски также попадают в эту директорию – они не удаляются с сервера. -Сервер не использует куски из директории `detached`. Вы можете в любое время добавлять, удалять, модифицировать данные в директории detached - сервер не будет об этом знать, пока вы не сделаете запрос [ATTACH](../../query_language/alter.md#alter_attach-partition). +Сервер не использует куски из директории `detached`. Вы можете в любое время добавлять, удалять, модифицировать данные в директории detached - сервер не будет об этом знать, пока вы не сделаете запрос [ATTACH](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#alter_attach-partition). Следует иметь в виду, что при работающем сервере нельзя вручную изменять набор кусков на файловой системе, так как сервер не будет знать об этом. Для нереплицируемых таблиц, вы можете это делать при остановленном сервере, однако это не рекомендуется. Для реплицируемых таблиц, набор кусков нельзя менять в любом случае. -ClickHouse позволяет производить различные манипуляции с кусками: удалять, копировать из одной таблицы в другую или создавать их резервные копии. Подробнее см. в разделе [Манипуляции с партициями и кусками](../../query_language/alter.md#alter_manipulations-with-partitions). +ClickHouse позволяет производить различные манипуляции с кусками: удалять, копировать из одной таблицы в другую или создавать их резервные копии. Подробнее см. в разделе [Манипуляции с партициями и кусками](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#alter_manipulations-with-partitions). [Оригинальная статья:](https://clickhouse.tech/docs/ru/operations/table_engines/custom_partitioning_key/) diff --git a/docs/ru/operations/table_engines/graphitemergetree.md b/docs/ru/engines/table_engines/mergetree_family/graphitemergetree.md similarity index 93% rename from docs/ru/operations/table_engines/graphitemergetree.md rename to docs/ru/engines/table_engines/mergetree_family/graphitemergetree.md index c128da7ac02..305300fc9a5 100644 --- a/docs/ru/operations/table_engines/graphitemergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/graphitemergetree.md @@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Смотрите описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query). +Смотрите описание запроса [CREATE TABLE](../../../engines/table_engines/mergetree_family/graphitemergetree.md#create-table-query). В таблице должны быть столбцы для следующих данных: @@ -74,7 +74,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ## Конфигурация rollup {#rollup-configuration} -Настройки прореживания данных задаются параметром [graphite\_rollup](../server_settings/settings.md#server_settings-graphite_rollup) в конфигурации сервера . Имя параметра может быть любым. Можно создать несколько конфигураций и использовать их для разных таблиц. +Настройки прореживания данных задаются параметром [graphite\_rollup](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-graphite_rollup) в конфигурации сервера . Имя параметра может быть любым. Можно создать несколько конфигураций и использовать их для разных таблиц. Структура конфигурации rollup: diff --git a/docs/ru/engines/table_engines/mergetree_family/index.md b/docs/ru/engines/table_engines/mergetree_family/index.md new file mode 100644 index 00000000000..6a23ef23a8e --- /dev/null +++ b/docs/ru/engines/table_engines/mergetree_family/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: MergeTree Family +toc_priority: 28 +--- + diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/engines/table_engines/mergetree_family/mergetree.md similarity index 87% rename from docs/ru/operations/table_engines/mergetree.md rename to docs/ru/engines/table_engines/mergetree_family/mergetree.md index f8bbc983b74..e0ab59c8a05 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/mergetree.md @@ -23,7 +23,7 @@ При необходимости можно задать способ сэмплирования данных в таблице. !!! info "Info" - Движок [Merge](merge.md) не относится к семейству `*MergeTree`. + Движок [Merge](../special/merge.md) не относится к семейству `*MergeTree`. ## Создание таблицы {#table_engine-mergetree-creating-a-table} @@ -44,7 +44,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Описание параметров смотрите в [описании запроса CREATE](../../query_language/create.md). +Описание параметров смотрите в [описании запроса CREATE](../../../engines/table_engines/mergetree_family/mergetree.md). !!! note "Note" `INDEX` — экспериментальная возможность, смотрите [Индексы пропуска данных](#table_engine-mergetree-data_skipping-indexes). @@ -55,7 +55,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md). - Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../data_types/date.md). В этом случае имена партиций имеют формат `"YYYYMM"`. + Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../../engines/table_engines/mergetree_family/mergetree.md). В этом случае имена партиций имеют формат `"YYYYMM"`. - `ORDER BY` — ключ сортировки. @@ -84,7 +84,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage). - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage). - `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. До версии 19.11, размер гранул ограничивался только настройкой `index_granularity`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`. - - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../server_settings/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера". + - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server_configuration_parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера". - `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов. - `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день). @@ -100,7 +100,7 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa В примере мы устанавливаем партиционирование по месяцам. -Также мы задаем выражение для сэмплирования в виде хэша по идентификатору посетителя. Это позволяет псевдослучайным образом перемешать данные в таблице для каждого `CounterID` и `EventDate`. Если при выборке данных задать секцию [SAMPLE](../../query_language/select.md#select-sample-clause), то ClickHouse вернёт равномерно-псевдослучайную выборку данных для подмножества посетителей. +Также мы задаем выражение для сэмплирования в виде хэша по идентификатору посетителя. Это позволяет псевдослучайным образом перемешать данные в таблице для каждого `CounterID` и `EventDate`. Если при выборке данных задать секцию [SAMPLE](../../../engines/table_engines/mergetree_family/mergetree.md#select-sample-clause), то ClickHouse вернёт равномерно-псевдослучайную выборку данных для подмножества посетителей. `index_granularity` можно было не указывать, поскольку 8192 — это значение по умолчанию. @@ -122,9 +122,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **Параметры MergeTree()** -- `date-column` — имя столбца с типом [Date](../../data_types/date.md). На основе этого столбца ClickHouse автоматически создаёт партиции по месяцам. Имена партиций имеют формат `"YYYYMM"`. +- `date-column` — имя столбца с типом [Date](../../../engines/table_engines/mergetree_family/mergetree.md). На основе этого столбца ClickHouse автоматически создаёт партиции по месяцам. Имена партиций имеют формат `"YYYYMM"`. - `sampling_expression` — выражение для сэмплирования. -- `(primary, key)` — первичный ключ. Тип — [Tuple()](../../data_types/tuple.md) +- `(primary, key)` — первичный ключ. Тип — [Tuple()](../../../engines/table_engines/mergetree_family/mergetree.md) - `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. Для большинства задач подходит значение 8192. **Пример** @@ -214,7 +214,7 @@ ClickHouse не требует уникального первичного кл В этом сценарии имеет смысл оставить в первичном ключе всего несколько столбцов, которые обеспечат эффективную фильтрацию по индексу, а остальные столбцы-измерения добавить в выражение ключа сортировки. -[ALTER ключа сортировки](../../query_language/alter.md) — лёгкая операция, так как при одновременном добавлении нового столбца в таблицу и ключ сортировки не нужно изменять данные кусков (они остаются упорядоченными и по новому выражению ключа). +[ALTER ключа сортировки](../../../engines/table_engines/mergetree_family/mergetree.md) — лёгкая операция, так как при одновременном добавлении нового столбца в таблицу и ключ сортировки не нужно изменять данные кусков (они остаются упорядоченными и по новому выражению ключа). ### Использование индексов и партиций в запросах {#ispolzovanie-indeksov-i-partitsii-v-zaprosakh} @@ -246,7 +246,7 @@ ClickHouse будет использовать индекс по первичн SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ``` -Чтобы проверить, сможет ли ClickHouse использовать индекс при выполнении запроса, используйте настройки [force\_index\_by\_date](../settings/settings.md#settings-force_index_by_date) и [force\_primary\_key](../settings/settings.md#settings-force_primary_key). +Чтобы проверить, сможет ли ClickHouse использовать индекс при выполнении запроса, используйте настройки [force\_index\_by\_date](../../../operations/settings/settings.md#settings-force_index_by_date) и [force\_primary\_key](../../../operations/settings/settings.md#settings-force_primary_key). Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. @@ -304,7 +304,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Поддержанные типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. - Фильтром могут пользоваться функции: [equals](../../query_language/functions/comparison_functions.md), [notEquals](../../query_language/functions/comparison_functions.md), [in](../../query_language/functions/in_functions.md), [notIn](../../query_language/functions/in_functions.md). + Фильтром могут пользоваться функции: [equals](../../../engines/table_engines/mergetree_family/mergetree.md), [notEquals](../../../engines/table_engines/mergetree_family/mergetree.md), [in](../../../engines/table_engines/mergetree_family/mergetree.md), [notIn](../../../engines/table_engines/mergetree_family/mergetree.md). **Примеры** @@ -321,21 +321,21 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT | Function (operator) / Index | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter | |----------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| -| [equals (=, ==)](../../query_language/functions/comparison_functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, \<\>)](../../query_language/functions/comparison_functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](../../query_language/functions/string_search_functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ | -| [notLike](../../query_language/functions/string_search_functions.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [startsWith](../../query_language/functions/string_functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [endsWith](../../query_language/functions/string_functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [multiSearchAny](../../query_language/functions/string_search_functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [in](../../query_language/functions/in_functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../query_language/functions/in_functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (\<)](../../query_language/functions/comparison_functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greater (\>)](../../query_language/functions/comparison_functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [lessOrEquals (\<=)](../../query_language/functions/comparison_functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greaterOrEquals (\>=)](../../query_language/functions/comparison_functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../query_language/functions/array_functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../query_language/functions/array_functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [equals (=, ==)](../../../engines/table_engines/mergetree_family/mergetree.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, \<\>)](../../../engines/table_engines/mergetree_family/mergetree.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](../../../engines/table_engines/mergetree_family/mergetree.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ | +| [notLike](../../../engines/table_engines/mergetree_family/mergetree.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | +| [startsWith](../../../engines/table_engines/mergetree_family/mergetree.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | +| [endsWith](../../../engines/table_engines/mergetree_family/mergetree.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | +| [multiSearchAny](../../../engines/table_engines/mergetree_family/mergetree.md#function-multisearchany) | ✗ | ✗ | ✔ | ✔ | ✗ | +| [in](../../../engines/table_engines/mergetree_family/mergetree.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../engines/table_engines/mergetree_family/mergetree.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (\<)](../../../engines/table_engines/mergetree_family/mergetree.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greater (\>)](../../../engines/table_engines/mergetree_family/mergetree.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [lessOrEquals (\<=)](../../../engines/table_engines/mergetree_family/mergetree.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greaterOrEquals (\>=)](../../../engines/table_engines/mergetree_family/mergetree.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../engines/table_engines/mergetree_family/mergetree.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../engines/table_engines/mergetree_family/mergetree.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса. @@ -367,7 +367,7 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT Секция `TTL` может быть установлена как для всей таблицы, так и для каждого отдельного столбца. Правила `TTL` для таблицы позволяют указать целевые диски или тома для фонового перемещения на них частей данных. -Выражения должны возвращать тип [Date](../../data_types/date.md) или [DateTime](../../data_types/datetime.md). +Выражения должны возвращать тип [Date](../../../engines/table_engines/mergetree_family/mergetree.md) или [DateTime](../../../engines/table_engines/mergetree_family/mergetree.md). Для задания времени жизни столбца, например: @@ -376,7 +376,7 @@ TTL time_column TTL time_column + interval ``` -Чтобы задать `interval`, используйте операторы [интервала времени](../../query_language/operators.md#operators-datetime). +Чтобы задать `interval`, используйте операторы [интервала времени](../../../engines/table_engines/mergetree_family/mergetree.md#operators-datetime). ``` sql TTL date_time + INTERVAL 1 MONTH @@ -465,7 +465,7 @@ ALTER TABLE example_table Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку [merge\_with\_ttl\_timeout](#mergetree_setting-merge_with_ttl_timeout). Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера. -Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) перед `SELECT`. +Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table_engines/mergetree_family/mergetree.md#misc_operations-optimize) перед `SELECT`. ## Хранение данных таблицы на нескольких блочных устройствах {#table_engine-mergetree-multiple-volumes} @@ -473,16 +473,16 @@ ALTER TABLE example_table Движки таблиц семейства `MergeTree` могут хранить данные на нескольких блочных устройствах. Это может оказаться полезным, например, при неявном разделении данных одной таблицы на «горячие» и «холодные». Наиболее свежая часть занимает малый объём и запрашивается регулярно, а большой хвост исторических данных запрашивается редко. При наличии в системе нескольких дисков, «горячая» часть данных может быть размещена на быстрых дисках (например, на NVMe SSD или в памяти), а холодная на более медленных (например, HDD). -Минимальной перемещаемой единицей для `MergeTree` является кусок данных (data part). Данные одного куска могут находится только на одном диске. Куски могут перемещаться между дисками в фоне, согласно пользовательским настройкам, а также с помощью запросов [ALTER](../../query_language/alter.md#alter_move-partition). +Минимальной перемещаемой единицей для `MergeTree` является кусок данных (data part). Данные одного куска могут находится только на одном диске. Куски могут перемещаться между дисками в фоне, согласно пользовательским настройкам, а также с помощью запросов [ALTER](../../../engines/table_engines/mergetree_family/mergetree.md#alter_move-partition). ### Термины {#terminy} - Диск — примонтированное в файловой системе блочное устройство. -- Диск по умолчанию — диск, на котором находится путь, указанный в конфигурационной настройке сервера [path](../server_settings/settings.md#server_settings-path). +- Диск по умолчанию — диск, на котором находится путь, указанный в конфигурационной настройке сервера [path](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-path). - Том (Volume) — упорядоченный набор равноценных дисков (схоже с [JBOD](https://ru.wikipedia.org/wiki/JBOD)) - Политика хранения (StoragePolicy) — множество томов с правилами перемещения данных между ними. -У всех описанных сущностей при создании указываются имена, можно найти в системных таблицах [system.storage\_policies](../system_tables.md#system_tables-storage_policies) и [system.disks](../system_tables.md#system_tables-disks). Имя политики хранения можно указать в настройке `storage_policy` движков таблиц семейства `MergeTree`. +У всех описанных сущностей при создании указываются имена, можно найти в системных таблицах [system.storage\_policies](../../../engines/table_engines/mergetree_family/mergetree.md#system_tables-storage_policies) и [system.disks](../../../engines/table_engines/mergetree_family/mergetree.md#system_tables-disks). Имя политики хранения можно указать в настройке `storage_policy` движков таблиц семейства `MergeTree`. ### Конфигурация {#table_engine-mergetree-multiple-volumes-configure} @@ -616,9 +616,9 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' В таблицах `MergeTree` данные попадают на диск несколькими способами: - В результате вставки (запрос `INSERT`). -- В фоновых операциях слияний и [мутаций](../../query_language/alter.md#alter-mutations). +- В фоновых операциях слияний и [мутаций](../../../engines/table_engines/mergetree_family/mergetree.md#alter-mutations). - При скачивании данных с другой реплики. -- В результате заморозки партиций [ALTER TABLE … FREEZE PARTITION](../../query_language/alter.md#alter_freeze-partition). +- В результате заморозки партиций [ALTER TABLE … FREEZE PARTITION](../../../engines/table_engines/mergetree_family/mergetree.md#alter_freeze-partition). Во всех случаях, кроме мутаций и заморозки партиций, при записи куска выбирается том и диск в соответствии с указанной конфигурацией хранилища: @@ -627,9 +627,8 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Мутации и запросы заморозки партиций в реализации используют [жесткие ссылки](https://ru.wikipedia.org/wiki/%D0%96%D1%91%D1%81%D1%82%D0%BA%D0%B0%D1%8F_%D1%81%D1%81%D1%8B%D0%BB%D0%BA%D0%B0). Жесткие ссылки между различными дисками не поддерживаются, поэтому в случае таких операций куски размещаются на тех же дисках, что и исходные. -В фоне куски перемещаются между томами на основе информации о занятом месте (настройка `move_factor`) по порядку, в котором указаны тома в конфигурации. Данные никогда не перемещаются с последнего тома и на первый том. Следить за фоновыми перемещениями можно с помощью системных таблиц [system.part\_log](../system_tables.md#system_tables-part-log) (поле `type = MOVE_PART`) и [system.parts](../system_tables.md#system_tables-parts) (поля `path` и `disk`). Также подробная информация о перемещениях доступна в логах сервера. - -С помощью запроса [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../query_language/alter.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. +В фоне куски перемещаются между томами на основе информации о занятом месте (настройка `move_factor`) по порядку, в котором указаны тома в конфигурации. Данные никогда не перемещаются с последнего тома и на первый том. Следить за фоновыми перемещениями можно с помощью системных таблиц [system.part\_log](../../../engines/table_engines/mergetree_family/mergetree.md#system_tables-part-log) (поле `type = MOVE_PART`) и [system.parts](../../../engines/table_engines/mergetree_family/mergetree.md#system_tables-parts) (поля `path` и `disk`). Также подробная информация о перемещениях доступна в логах сервера. +С помощью запроса [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../engines/table_engines/mergetree_family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке. Перемещения данных не взаимодействуют с репликацией данных, поэтому на разных репликах одной и той же таблицы могут быть указаны разные политики хранения. diff --git a/docs/ru/operations/table_engines/replacingmergetree.md b/docs/ru/engines/table_engines/mergetree_family/replacingmergetree.md similarity index 96% rename from docs/ru/operations/table_engines/replacingmergetree.md rename to docs/ru/engines/table_engines/mergetree_family/replacingmergetree.md index b403e485741..40a1eb1a9c6 100644 --- a/docs/ru/operations/table_engines/replacingmergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/replacingmergetree.md @@ -21,7 +21,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Описание параметров запроса смотрите в [описании запроса](../../query_language/create.md). +Описание параметров запроса смотрите в [описании запроса](../../../engines/table_engines/mergetree_family/replacingmergetree.md). **Параметры ReplacingMergeTree** diff --git a/docs/ru/operations/table_engines/replication.md b/docs/ru/engines/table_engines/mergetree_family/replication.md similarity index 97% rename from docs/ru/operations/table_engines/replication.md rename to docs/ru/engines/table_engines/mergetree_family/replication.md index 67f8d5f0161..14a50a2b94b 100644 --- a/docs/ru/operations/table_engines/replication.md +++ b/docs/ru/engines/table_engines/mergetree_family/replication.md @@ -14,7 +14,7 @@ Репликация не зависит от шардирования. На каждом шарде репликация работает независимо. -Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../query_language/alter.md#query_language_queries_alter)). +Реплицируются сжатые данные запросов `INSERT`, `ALTER` (см. подробности в описании запроса [ALTER](../../../engines/table_engines/mergetree_family/replication.md#query_language_queries_alter)). Запросы `CREATE`, `DROP`, `ATTACH`, `DETACH` и `RENAME` выполняются на одном сервере и не реплицируются: @@ -24,7 +24,7 @@ ClickHouse хранит метаинформацию о репликах в [Apache ZooKeeper](https://zookeeper.apache.org). Используйте ZooKeeper 3.4.5 или новее. -Для использовании репликации, установите параметры в секции [zookeeper](../server_settings/settings.md#server-settings_zookeeper) конфигурации сервера. +Для использовании репликации, установите параметры в секции [zookeeper](../../../operations/server_configuration_parameters/settings.md#server-settings_zookeeper) конфигурации сервера. !!! attention "Внимание" Не пренебрегайте настройками безопасности. ClickHouse поддерживает [ACL схему](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) `digest` подсистемы безопасности ZooKeeper. @@ -52,7 +52,7 @@ ClickHouse хранит метаинформацию о репликах в [Apa Если в конфигурационном файле не настроен ZooKeeper, то вы не сможете создать реплицируемые таблицы, а уже имеющиеся реплицируемые таблицы будут доступны в режиме только на чтение. -При запросах `SELECT`, ZooKeeper не используется, т.е. репликация не влияет на производительность `SELECT` и запросы работают так же быстро, как и для нереплицируемых таблиц. При запросах к распределенным реплицированным таблицам поведение ClickHouse регулируется настройками [max\_replica\_delay\_for\_distributed\_queries](../settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../settings/settings.md). +При запросах `SELECT`, ZooKeeper не используется, т.е. репликация не влияет на производительность `SELECT` и запросы работают так же быстро, как и для нереплицируемых таблиц. При запросах к распределенным реплицированным таблицам поведение ClickHouse регулируется настройками [max\_replica\_delay\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../../../operations/settings/settings.md). При каждом запросе `INSERT`, делается около десятка записей в ZooKeeper в рамках нескольких транзакций. (Чтобы быть более точным, это для каждого вставленного блока данных; запрос INSERT содержит один блок или один блок на `max_insert_block_size = 1048576` строк.) Это приводит к некоторому увеличению задержек при `INSERT`, по сравнению с нереплицируемыми таблицами. Но если придерживаться обычных рекомендаций - вставлять данные пачками не более одного `INSERT` в секунду, то это не составляет проблем. На всём кластере ClickHouse, использующим для координации один кластер ZooKeeper, может быть в совокупности несколько сотен `INSERT` в секунду. Пропускная способность при вставке данных (количество строчек в секунду) такая же высокая, как для нереплицируемых таблиц. @@ -64,7 +64,7 @@ ClickHouse хранит метаинформацию о репликах в [Apa Каждый блок данных записывается атомарно. Запрос INSERT разбивается на блоки данных размером до `max_insert_block_size = 1048576` строк. То есть, если в запросе `INSERT` менее 1048576 строк, то он делается атомарно. -Блоки данных дедуплицируются. При многократной записи одного и того же блока данных (блоков данных одинакового размера, содержащих одни и те же строчки в одном и том же порядке), блок будет записан только один раз. Это сделано для того, чтобы в случае сбоя в сети, когда клиентское приложение не может понять, были ли данные записаны в БД, можно было просто повторить запрос `INSERT`. При этом не имеет значения, на какую реплику будут отправлены INSERT-ы с одинаковыми данными. Запрос `INSERT` идемпотентный. Параметры дедуплицирования регулируются настройками сервера [merge\_tree](../server_settings/settings.md#server_settings-merge_tree) +Блоки данных дедуплицируются. При многократной записи одного и того же блока данных (блоков данных одинакового размера, содержащих одни и те же строчки в одном и том же порядке), блок будет записан только один раз. Это сделано для того, чтобы в случае сбоя в сети, когда клиентское приложение не может понять, были ли данные записаны в БД, можно было просто повторить запрос `INSERT`. При этом не имеет значения, на какую реплику будут отправлены INSERT-ы с одинаковыми данными. Запрос `INSERT` идемпотентный. Параметры дедуплицирования регулируются настройками сервера [merge\_tree](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-merge_tree) При репликации, по сети передаются только исходные вставляемые данные. Дальнейшие преобразования данных (слияния) координируются и делаются на всех репликах одинаковым образом. За счёт этого минимизируется использование сети, и благодаря этому, репликация хорошо работает при расположении реплик в разных дата-центрах. (Стоит заметить, что дублирование данных в разных дата-центрах, по сути, является основной задачей репликации). diff --git a/docs/ru/operations/table_engines/summingmergetree.md b/docs/ru/engines/table_engines/mergetree_family/summingmergetree.md similarity index 91% rename from docs/ru/operations/table_engines/summingmergetree.md rename to docs/ru/engines/table_engines/mergetree_family/summingmergetree.md index 6b1a41384c0..b69f58c3dac 100644 --- a/docs/ru/operations/table_engines/summingmergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/summingmergetree.md @@ -19,7 +19,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Описание параметров запроса смотрите в [описании запроса](../../query_language/create.md). +Описание параметров запроса смотрите в [описании запроса](../../../engines/table_engines/mergetree_family/summingmergetree.md). **Параметры SummingMergeTree** @@ -91,7 +91,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key При вставке данных в таблицу они сохраняются как есть. Периодически ClickHouse выполняет слияние вставленных кусков данных и именно в этот момент производится суммирование и замена многих строк с одинаковым первичным ключом на одну для каждого результирующего куска данных. -ClickHouse может слить куски данных таким образом, что не все строки с одинаковым первичным ключом окажутся в одном финальном куске, т.е. суммирование будет не полным. Поэтому, при выборке данных (`SELECT`) необходимо использовать агрегатную функцию [sum()](../../query_language/agg_functions/reference.md#agg_function-sum) и секцию `GROUP BY` как описано в примере выше. +ClickHouse может слить куски данных таким образом, что не все строки с одинаковым первичным ключом окажутся в одном финальном куске, т.е. суммирование будет не полным. Поэтому, при выборке данных (`SELECT`) необходимо использовать агрегатную функцию [sum()](../../../engines/table_engines/mergetree_family/summingmergetree.md#agg_function-sum) и секцию `GROUP BY` как описано в примере выше. ### Общие правила суммирования {#obshchie-pravila-summirovaniia} @@ -105,7 +105,7 @@ ClickHouse может слить куски данных таким образо ### Суммирование в столбцах AggregateFunction {#summirovanie-v-stolbtsakh-aggregatefunction} -Для столбцов типа [AggregateFunction](../../data_types/nested_data_structures/aggregatefunction.md#data_type-aggregatefunction) ClickHouse выполняет агрегацию согласно заданной функции, повторяя поведение движка [AggregatingMergeTree](aggregatingmergetree.md). +Для столбцов типа [AggregateFunction](../../../engines/table_engines/mergetree_family/summingmergetree.md#data_type-aggregatefunction) ClickHouse выполняет агрегацию согласно заданной функции, повторяя поведение движка [AggregatingMergeTree](aggregatingmergetree.md). ### Вложенные структуры {#vlozhennye-struktury} @@ -127,7 +127,7 @@ ClickHouse может слить куски данных таким образо [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)] ``` -При запросе данных используйте функцию [sumMap(key, value)](../../query_language/agg_functions/reference.md) для агрегации `Map`. +При запросе данных используйте функцию [sumMap(key, value)](../../../engines/table_engines/mergetree_family/summingmergetree.md) для агрегации `Map`. Для вложенной структуры данных не нужно указывать её столбцы в кортеже столбцов для суммирования. diff --git a/docs/ru/operations/table_engines/versionedcollapsingmergetree.md b/docs/ru/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md similarity index 99% rename from docs/ru/operations/table_engines/versionedcollapsingmergetree.md rename to docs/ru/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md index a93b56f3c52..90647edd0eb 100644 --- a/docs/ru/operations/table_engines/versionedcollapsingmergetree.md +++ b/docs/ru/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md @@ -24,7 +24,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -Подробности про `CREATE TABLE` смотрите в [описании запроса](../../query_language/create.md). +Подробности про `CREATE TABLE` смотрите в [описании запроса](../../../engines/table_engines/mergetree_family/versionedcollapsingmergetree.md). **Параметры движка** diff --git a/docs/ru/operations/table_engines/buffer.md b/docs/ru/engines/table_engines/special/buffer.md similarity index 100% rename from docs/ru/operations/table_engines/buffer.md rename to docs/ru/engines/table_engines/special/buffer.md diff --git a/docs/ru/operations/table_engines/dictionary.md b/docs/ru/engines/table_engines/special/dictionary.md similarity index 94% rename from docs/ru/operations/table_engines/dictionary.md rename to docs/ru/engines/table_engines/special/dictionary.md index 9f764a65ffa..fac22b5d2f2 100644 --- a/docs/ru/operations/table_engines/dictionary.md +++ b/docs/ru/engines/table_engines/special/dictionary.md @@ -1,6 +1,6 @@ # Dictionary {#dictionary} -Движок `Dictionary` отображает данные [словаря](../../query_language/dicts/external_dicts.md) как таблицу ClickHouse. +Движок `Dictionary` отображает данные [словаря](../../../engines/table_engines/special/dictionary.md) как таблицу ClickHouse. Рассмотрим для примера словарь `products` со следующей конфигурацией: @@ -57,7 +57,7 @@ WHERE name = 'products' └──────────┴──────┴────────┴─────────────────┴─────────────────┴─────────────────┴───────────────┴─────────────────┘ ``` -В таком виде данные из словаря можно получить при помощи функций [dictGet\*](../../query_language/functions/ext_dict_functions.md#ext_dict_functions). +В таком виде данные из словаря можно получить при помощи функций [dictGet\*](../../../engines/table_engines/special/dictionary.md#ext_dict_functions). Такое представление неудобно, когда нам необходимо получить данные в чистом виде, а также при выполнении операции `JOIN`. Для этих случаев можно использовать движок `Dictionary`, который отобразит данные словаря в таблицу. diff --git a/docs/ru/operations/table_engines/distributed.md b/docs/ru/engines/table_engines/special/distributed.md similarity index 92% rename from docs/ru/operations/table_engines/distributed.md rename to docs/ru/engines/table_engines/special/distributed.md index 73547e76692..9fc7b470f1d 100644 --- a/docs/ru/operations/table_engines/distributed.md +++ b/docs/ru/engines/table_engines/special/distributed.md @@ -61,12 +61,12 @@ logs - имя кластера в конфигурационном файле с В качестве параметров для каждого сервера указываются `host`, `port` и, не обязательно, `user`, `password`, `secure`, `compression`: - `host` - адрес удалённого сервера. Может быть указан домен, или IPv4 или IPv6 адрес. В случае указания домена, при старте сервера делается DNS запрос, и результат запоминается на всё время работы сервера. Если DNS запрос неуспешен, то сервер не запускается. Если вы изменяете DNS-запись, перезапустите сервер. - `port` - TCP-порт для межсерверного взаимодействия (в конфиге - tcp\_port, обычно 9000). Не перепутайте с http\_port. -- `user` - имя пользователя для соединения с удалённым сервером. по умолчанию - default. Этот пользователь должен иметь доступ для соединения с указанным сервером. Доступы настраиваются в файле users.xml, подробнее смотрите в разделе [Права доступа](../../operations/access_rights.md). +- `user` - имя пользователя для соединения с удалённым сервером. по умолчанию - default. Этот пользователь должен иметь доступ для соединения с указанным сервером. Доступы настраиваются в файле users.xml, подробнее смотрите в разделе [Права доступа](../../../operations/access_rights.md). - `password` - пароль для соединения с удалённым сервером, в открытом виде. по умолчанию - пустая строка. - `secure` - Использовать шифрованное соединение ssl, Обычно используется с портом `port` = 9440. Сервер должен слушать порт 9440 с корректными настройками сертификатов. - `compression` - Использовать сжатие данных. По умолчанию: true. -При указании реплик, для каждого из шардов, при чтении, будет выбрана одна из доступных реплик. Можно настроить алгоритм балансировки нагрузки (то есть, предпочтения, на какую из реплик идти) - см. настройку [load\_balancing](../settings/settings.md#settings-load_balancing). +При указании реплик, для каждого из шардов, при чтении, будет выбрана одна из доступных реплик. Можно настроить алгоритм балансировки нагрузки (то есть, предпочтения, на какую из реплик идти) - см. настройку [load\_balancing](../../../operations/settings/settings.md#settings-load_balancing). Если соединение с сервером не установлено, то будет произведена попытка соединения с небольшим таймаутом. Если соединиться не удалось, то будет выбрана следующая реплика, и так для всех реплик. Если попытка соединения для всех реплик не удалась, то будут снова произведены попытки соединения по кругу, и так несколько раз. Это работает в пользу отказоустойчивости, хотя и не обеспечивает полную отказоустойчивость: удалённый сервер может принять соединение, но не работать, или плохо работать. @@ -78,7 +78,7 @@ logs - имя кластера в конфигурационном файле с Движок Distributed позволяет работать с кластером, как с локальным сервером. При этом, кластер является неэластичным: вы должны прописать его конфигурацию в конфигурационный файл сервера (лучше всех серверов кластера). -Как видно, движок Distributed требует прописывания кластера в конфигурационный файл; кластера из конфигурационного файла обновляются налету, без перезапуска сервера. Если вам необходимо каждый раз отправлять запрос на неизвестный набор шардов и реплик, вы можете не создавать Distributed таблицу, а воспользоваться табличной функцией remote. Смотрите раздел [Табличные функции](../../query_language/table_functions/index.md). +Как видно, движок Distributed требует прописывания кластера в конфигурационный файл; кластера из конфигурационного файла обновляются налету, без перезапуска сервера. Если вам необходимо каждый раз отправлять запрос на неизвестный набор шардов и реплик, вы можете не создавать Distributed таблицу, а воспользоваться табличной функцией remote. Смотрите раздел [Табличные функции](../../../engines/table_engines/special/distributed.md). Есть два способа записывать данные на кластер: @@ -107,10 +107,10 @@ logs - имя кластера в конфигурационном файле с - используются запросы, требующие соединение данных (IN, JOIN) по определённому ключу - тогда если данные шардированы по этому ключу, то можно использовать локальные IN, JOIN вместо GLOBAL IN, GLOBAL JOIN, что кардинально более эффективно. - используется большое количество серверов (сотни и больше) и большое количество маленьких запросов (запросы отдельных клиентов - сайтов, рекламодателей, партнёров) - тогда, для того, чтобы маленькие запросы не затрагивали весь кластер, имеет смысл располагать данные одного клиента на одном шарде, или (вариант, который используется в Яндекс.Метрике) сделать двухуровневое шардирование: разбить весь кластер на «слои», где слой может состоять из нескольких шардов; данные для одного клиента располагаются на одном слое, но в один слой можно по мере необходимости добавлять шарды, в рамках которых данные распределены произвольным образом; создаются распределённые таблицы на каждый слой и одна общая распределённая таблица для глобальных запросов. -Запись данных осуществляется полностью асинхронно. При вставке в таблицу, блок данных сначала записывается в файловую систему. Затем, в фоновом режиме отправляются на удалённые серверы при первой возможности. Период отправки регулируется настройками [distributed\_directory\_monitor\_sleep\_time\_ms](../settings/settings.md#distributed_directory_monitor_sleep_time_ms) и [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../settings/settings.md#distributed_directory_monitor_max_sleep_time_ms). Движок таблиц `Distributed` отправляет каждый файл со вставленными данными отдельно, но можно включить пакетную отправку данных настройкой [distributed\_directory\_monitor\_batch\_inserts](../settings/settings.md#distributed_directory_monitor_batch_inserts). Эта настройка улучшает производительность кластера за счет более оптимального использования ресурсов сервера-отправителя и сети. Необходимо проверять, что данные отправлены успешно, для этого проверьте список файлов (данных, ожидающих отправки) в каталоге таблицы `/var/lib/clickhouse/data/database/table/`. +Запись данных осуществляется полностью асинхронно. При вставке в таблицу, блок данных сначала записывается в файловую систему. Затем, в фоновом режиме отправляются на удалённые серверы при первой возможности. Период отправки регулируется настройками [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) и [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms). Движок таблиц `Distributed` отправляет каждый файл со вставленными данными отдельно, но можно включить пакетную отправку данных настройкой [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts). Эта настройка улучшает производительность кластера за счет более оптимального использования ресурсов сервера-отправителя и сети. Необходимо проверять, что данные отправлены успешно, для этого проверьте список файлов (данных, ожидающих отправки) в каталоге таблицы `/var/lib/clickhouse/data/database/table/`. Если после INSERT-а в Distributed таблицу, сервер перестал существовать или был грубо перезапущен (например, в следствие аппаратного сбоя), то записанные данные могут быть потеряны. Если в директории таблицы обнаружен повреждённый кусок данных, то он переносится в поддиректорию broken и больше не используется. -При выставлении опции max\_parallel\_replicas выполнение запроса распараллеливается по всем репликам внутри одного шарда. Подробнее смотрите раздел [max\_parallel\_replicas](../settings/settings.md#settings-max_parallel_replicas). +При выставлении опции max\_parallel\_replicas выполнение запроса распараллеливается по всем репликам внутри одного шарда. Подробнее смотрите раздел [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/distributed/) diff --git a/docs/ru/operations/table_engines/external_data.md b/docs/ru/engines/table_engines/special/external_data.md similarity index 100% rename from docs/ru/operations/table_engines/external_data.md rename to docs/ru/engines/table_engines/special/external_data.md diff --git a/docs/ru/operations/table_engines/file.md b/docs/ru/engines/table_engines/special/file.md similarity index 81% rename from docs/ru/operations/table_engines/file.md rename to docs/ru/engines/table_engines/special/file.md index 2d248c22081..138c2e47b89 100644 --- a/docs/ru/operations/table_engines/file.md +++ b/docs/ru/engines/table_engines/special/file.md @@ -14,13 +14,13 @@ File(Format) ``` -`Format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT` и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../interfaces/formats.md#formats). +`Format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT` и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../../interfaces/formats.md#formats). -Сервер ClickHouse не позволяет указать путь к файлу, с которым будет работать `File`. Используется путь к хранилищу, определенный параметром [path](../server_settings/settings.md) в конфигурации сервера. +Сервер ClickHouse не позволяет указать путь к файлу, с которым будет работать `File`. Используется путь к хранилищу, определенный параметром [path](../../../operations/server_configuration_parameters/settings.md) в конфигурации сервера. При создании таблицы с помощью `File(Format)` сервер ClickHouse создает в хранилище каталог с именем таблицы, а после добавления в таблицу данных помещает туда файл `data.Format`. -Можно вручную создать в хранилище каталог таблицы, поместить туда файл, затем на сервере ClickHouse добавить ([ATTACH](../../query_language/misc.md)) информацию о таблице, соответствующей имени каталога и прочитать из файла данные. +Можно вручную создать в хранилище каталог таблицы, поместить туда файл, затем на сервере ClickHouse добавить ([ATTACH](../../../engines/table_engines/special/file.md)) информацию о таблице, соответствующей имени каталога и прочитать из файла данные. !!! warning "Warning" Будьте аккуратны с этой функциональностью, поскольку сервер ClickHouse не отслеживает внешние изменения данных. Если в файл будет производиться запись одновременно со стороны сервера ClickHouse и с внешней стороны, то результат непредсказуем. @@ -58,7 +58,7 @@ SELECT * FROM file_engine_table ## Использование движка в clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local} -В [clickhouse-local](../utils/clickhouse-local.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. +В [clickhouse-local](../../../engines/table_engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. **Пример:** diff --git a/docs/ru/engines/table_engines/special/generate.md b/docs/ru/engines/table_engines/special/generate.md new file mode 100644 index 00000000000..87004bfe5b1 --- /dev/null +++ b/docs/ru/engines/table_engines/special/generate.md @@ -0,0 +1,59 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +# GenerateRandom {#table_engines-generate} + +Механизм генерации случайных таблиц генерирует случайные данные для данной схемы таблиц. + +Примеры употребления: + +- Используйте в тесте для заполнения воспроизводимого большого стола. +- Генерируйте случайные входные данные для тестов размытия. + +## Использование в сервере ClickHouse {#usage-in-clickhouse-server} + +``` sql +ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) +``` + +То `max_array_length` и `max_string_length` параметры укажите максимальную длину всех +столбцы массива и строки соответственно в генерируемых данных. + +Генерация таблицы движок поддерживает только `SELECT` запросы. + +Он поддерживает все [Тип данных](../../../engines/table_engines/special/generate.md) это может быть сохранено в таблице за исключением `LowCardinality` и `AggregateFunction`. + +**Пример:** + +**1.** Настройка системы `generate_engine_table` стол: + +``` sql +CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) +``` + +**2.** Запрос данных: + +``` sql +SELECT * FROM generate_engine_table LIMIT 3 +``` + +``` text +┌─name─┬──────value─┐ +│ c4xJ │ 1412771199 │ +│ r │ 1791099446 │ +│ 7#$ │ 124312908 │ +└──────┴────────────┘ +``` + +## Детали внедрения {#details-of-implementation} + +- Не поддерживаемый: + - `ALTER` + - `SELECT ... SAMPLE` + - `INSERT` + - Индексы + - Копирование + +[Оригинальная статья](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/ru/engines/table_engines/special/index.md b/docs/ru/engines/table_engines/special/index.md new file mode 100644 index 00000000000..22cebf295c1 --- /dev/null +++ b/docs/ru/engines/table_engines/special/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Special +toc_priority: 31 +--- + diff --git a/docs/ru/operations/table_engines/join.md b/docs/ru/engines/table_engines/special/join.md similarity index 70% rename from docs/ru/operations/table_engines/join.md rename to docs/ru/engines/table_engines/special/join.md index a9c06d05ebf..0ca53f34acb 100644 --- a/docs/ru/operations/table_engines/join.md +++ b/docs/ru/engines/table_engines/special/join.md @@ -1,6 +1,6 @@ # Join {#join} -Подготовленная структура данных для использования в операциях [JOIN](../../query_language/select.md#select-join). +Подготовленная структура данных для использования в операциях [JOIN](../../../engines/table_engines/special/join.md#select-join). ## Создание таблицы {#creating-a-table} @@ -12,12 +12,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE = Join(join_strictness, join_type, k1[, k2, ...]) ``` -Смотрите подробное описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query). +Смотрите подробное описание запроса [CREATE TABLE](../../../engines/table_engines/special/join.md#create-table-query). **Параметры движка** -- `join_strictness` – [строгость JOIN](../../query_language/select.md#select-join-strictness). -- `join_type` – [тип JOIN](../../query_language/select.md#select-join-types). +- `join_strictness` – [строгость JOIN](../../../engines/table_engines/special/join.md#select-join-strictness). +- `join_type` – [тип JOIN](../../../engines/table_engines/special/join.md#select-join-types). - `k1[, k2, ...]` – ключевые столбцы секции `USING` с которыми выполняется операция `JOIN`. Вводите параметры `join_strictness` и `join_type` без кавычек, например, `Join(ANY, LEFT, col1)`. Они должны быть такими же как и в той операции `JOIN`, в которой таблица будет использоваться. Если параметры не совпадают, ClickHouse не генерирует исключение и может возвращать неверные данные. @@ -79,21 +79,21 @@ SELECT joinGet('id_val_join', 'val', toUInt32(1)) Из таблиц нельзя выбрать данные с помощью запроса `SELECT`. Вместо этого, используйте один из следующих методов: - Используйте таблицу как правую в секции `JOIN`. -- Используйте функцию [joinGet](../../query_language/functions/other_functions.md#joinget), которая позволяет извлекать данные из таблицы таким же образом как из словаря. +- Используйте функцию [joinGet](../../../engines/table_engines/special/join.md#joinget), которая позволяет извлекать данные из таблицы таким же образом как из словаря. ### Ограничения и настройки {#join-limitations-and-settings} При создании таблицы, применяются следующие параметры : -- [join\_use\_nulls](../settings/settings.md#join_use_nulls) -- [max\_rows\_in\_join](../settings/query_complexity.md#settings-max_rows_in_join) -- [max\_bytes\_in\_join](../settings/query_complexity.md#settings-max_bytes_in_join) -- [join\_overflow\_mode](../settings/query_complexity.md#settings-join_overflow_mode) -- [join\_any\_take\_last\_row](../settings/settings.md#settings-join_any_take_last_row) +- [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) +- [max\_rows\_in\_join](../../../operations/settings/query_complexity.md#settings-max_rows_in_join) +- [max\_bytes\_in\_join](../../../operations/settings/query_complexity.md#settings-max_bytes_in_join) +- [join\_overflow\_mode](../../../operations/settings/query_complexity.md#settings-join_overflow_mode) +- [join\_any\_take\_last\_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) Таблицы с движком `Join` нельзя использовать в операциях `GLOBAL JOIN`. -Движок `Join` позволяет использовать параметр [join\_use\_nulls](../settings/settings.md#join_use_nulls) в запросе `CREATE TABLE`, который также можно использовать в запросе [SELECT](../../query_language/select.md). Если у вас разные настройки `join_use_nulls`, вы можете получить сообщение об ошибке при объединении таблиц. Это зависит от типа соединения. Когда вы используете функцию [joinGet](../../query_language/functions/other_functions.md#joinget), вам необходимо использовать один и тот же параметр `join_use_nulls` в запросах `CRATE TABLE` и `SELECT`. +Движок `Join` позволяет использовать параметр [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) в запросе `CREATE TABLE`, который также можно использовать в запросе [SELECT](../../../engines/table_engines/special/join.md). Если у вас разные настройки `join_use_nulls`, вы можете получить сообщение об ошибке при объединении таблиц. Это зависит от типа соединения. Когда вы используете функцию [joinGet](../../../engines/table_engines/special/join.md#joinget), вам необходимо использовать один и тот же параметр `join_use_nulls` в запросах `CRATE TABLE` и `SELECT`. ## Хранение данных {#khranenie-dannykh} diff --git a/docs/ru/engines/table_engines/special/materializedview.md b/docs/ru/engines/table_engines/special/materializedview.md new file mode 100644 index 00000000000..2adcdb8df70 --- /dev/null +++ b/docs/ru/engines/table_engines/special/materializedview.md @@ -0,0 +1,5 @@ +# MaterializedView {#materializedview} + +Используется для реализации материализованных представлений (подробнее см. запрос [CREATE TABLE](../../../engines/table_engines/special/materializedview.md)). Для хранения данных, использует другой движок, который был указан при создании представления. При чтении из таблицы, просто использует этот движок. + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/materializedview/) diff --git a/docs/ru/operations/table_engines/memory.md b/docs/ru/engines/table_engines/special/memory.md similarity index 100% rename from docs/ru/operations/table_engines/memory.md rename to docs/ru/engines/table_engines/special/memory.md diff --git a/docs/ru/operations/table_engines/merge.md b/docs/ru/engines/table_engines/special/merge.md similarity index 98% rename from docs/ru/operations/table_engines/merge.md rename to docs/ru/engines/table_engines/special/merge.md index 4f11cf77352..65dd8dc7a2c 100644 --- a/docs/ru/operations/table_engines/merge.md +++ b/docs/ru/engines/table_engines/special/merge.md @@ -52,7 +52,7 @@ FROM WatchLog ## Виртуальные столбцы {#virtualnye-stolbtsy} -- `_table` — содержит имя таблицы, из которой данные были прочитаны. Тип — [String](../../data_types/string.md). +- `_table` — содержит имя таблицы, из которой данные были прочитаны. Тип — [String](../../../engines/table_engines/special/merge.md). В секции `WHERE/PREWHERE` можно установить константное условие на столбец `_table` (например, `WHERE _table='xyz'`). В этом случае операции чтения выполняются только для тех таблиц, для которых выполняется условие на значение `_table`, таким образом, столбец `_table` работает как индекс. diff --git a/docs/ru/operations/table_engines/null.md b/docs/ru/engines/table_engines/special/null.md similarity index 100% rename from docs/ru/operations/table_engines/null.md rename to docs/ru/engines/table_engines/special/null.md diff --git a/docs/ru/operations/table_engines/set.md b/docs/ru/engines/table_engines/special/set.md similarity index 100% rename from docs/ru/operations/table_engines/set.md rename to docs/ru/engines/table_engines/special/set.md diff --git a/docs/ru/operations/table_engines/url.md b/docs/ru/engines/table_engines/special/url.md similarity index 95% rename from docs/ru/operations/table_engines/url.md rename to docs/ru/engines/table_engines/special/url.md index 6f9d22e887d..5f4696286d7 100644 --- a/docs/ru/operations/table_engines/url.md +++ b/docs/ru/engines/table_engines/special/url.md @@ -7,7 +7,7 @@ `Format` должен быть таким, который ClickHouse может использовать в запросах `SELECT` и, если есть необходимость, `INSERT`. Полный список поддерживаемых форматов смотрите в -разделе [Форматы](../../interfaces/formats.md#formats). +разделе [Форматы](../../../interfaces/formats.md#formats). `URL` должен соответствовать структуре Uniform Resource Locator. По указанному URL должен находится сервер работающий по протоколу HTTP или HTTPS. При этом не должно требоваться никаких @@ -17,7 +17,7 @@ соответственно. Для обработки `POST`-запросов удаленный сервер должен поддерживать [Chunked transfer encoding](https://ru.wikipedia.org/wiki/Chunked_transfer_encoding). -Максимальное количество переходов по редиректам при выполнении HTTP-запроса методом GET можно ограничить с помощью настройки [max\_http\_get\_redirects](../settings/settings.md#setting-max_http_get_redirects). +Максимальное количество переходов по редиректам при выполнении HTTP-запроса методом GET можно ограничить с помощью настройки [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects). **Пример:** diff --git a/docs/ru/operations/table_engines/view.md b/docs/ru/engines/table_engines/special/view.md similarity index 100% rename from docs/ru/operations/table_engines/view.md rename to docs/ru/engines/table_engines/special/view.md diff --git a/docs/ru/faq/general.md b/docs/ru/faq/general.md index e5a5e0c00fa..5bfe8ea8f2d 100644 --- a/docs/ru/faq/general.md +++ b/docs/ru/faq/general.md @@ -25,7 +25,7 @@ NLS_LANG=RUSSIAN_RUSSIA.UTF8 ### Секция INTO OUTFILE {#sektsiia-into-outfile} -Добавьте секцию [INTO OUTFILE](../query_language/select/#into-outfile-clause) к своему запросу. +Добавьте секцию [INTO OUTFILE](../sql_reference/statements/select.md#into-outfile-clause) к своему запросу. Например: @@ -33,7 +33,7 @@ NLS_LANG=RUSSIAN_RUSSIA.UTF8 SELECT * FROM table INTO OUTFILE 'file' ``` -По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../query_language/select/#format-clause). +По умолчанию, для выдачи данных ClickHouse использует формат [TabSeparated](../interfaces/formats.md#tabseparated). Чтобы выбрать [формат данных](../interfaces/formats.md), используйте [секцию FORMAT](../sql_reference/statements/select.md#format-clause). Например: @@ -43,7 +43,7 @@ SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV ### Таблица с движком File {#tablitsa-s-dvizhkom-file} -Смотрите [File](../operations/table_engines/file.md). +Смотрите [File](../engines/table_engines/special/file.md). ### Перенаправление в командой строке {#perenapravlenie-v-komandoi-stroke} diff --git a/docs/ru/faq/index.md b/docs/ru/faq/index.md new file mode 100644 index 00000000000..2ee9d51e83b --- /dev/null +++ b/docs/ru/faq/index.md @@ -0,0 +1,6 @@ +--- +toc_folder_title: F.A.Q. +toc_priority: 76 +--- + + diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md index 4f23dbe756d..4a31f4b23a2 100644 --- a/docs/ru/getting_started/tutorial.md +++ b/docs/ru/getting_started/tutorial.md @@ -1,18 +1,19 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# ClickHouse Tutorial {#clickhouse-tutorial} +# Учебник По Клик-Хаусу {#clickhouse-tutorial} -## What to Expect from This Tutorial? {#what-to-expect-from-this-tutorial} +## Чего ожидать от этого урока? {#what-to-expect-from-this-tutorial} -By going through this tutorial, you’ll learn how to set up a simple ClickHouse cluster. It’ll be small, but fault-tolerant and scalable. Then we will use one of the example datasets to fill it with data and execute some demo queries. +Пройдя через этот учебник,вы узнаете, как настроить простой кластер ClickHouse. Он будет небольшим, но отказоустойчивым и масштабируемым. Затем мы будем использовать один из примеров наборов данных, чтобы заполнить его данными и выполнить некоторые демонстрационные запросы. -## Single Node Setup {#single-node-setup} +## Настройка Одного Узла {#single-node-setup} -To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](index.md#install-from-deb-packages) or [rpm](index.md#from-rpm-packages) packages, but there are [alternatives](index.md#from-docker-image) for the operating systems that do no support them. +Чтобы избежать сложностей распределенной среды, мы начнем с развертывания ClickHouse на одном сервере или виртуальной машине. ClickHouse обычно устанавливается из [дебютантка](install.md#install-from-deb-packages) или [оборотов в минуту](install.md#from-rpm-packages) пакеты, но есть и такие [альтернативы](install.md#from-docker-image) для операционных систем, которые их не поддерживают. -For example, you have chosen `deb` packages and executed: +Например, вы выбрали `deb` пакеты и выполненные работы: ``` bash sudo apt-get install dirmngr @@ -24,48 +25,48 @@ sudo apt-get update sudo apt-get install -y clickhouse-server clickhouse-client ``` -What do we have in the packages that got installed: +Что у нас есть в пакетах, которые были установлены: -- `clickhouse-client` package contains [clickhouse-client](../interfaces/cli.md) application, interactive ClickHouse console client. -- `clickhouse-common` package contains a ClickHouse executable file. -- `clickhouse-server` package contains configuration files to run ClickHouse as a server. +- `clickhouse-client` пакет содержит [clickhouse-клиент](../interfaces/cli.md) приложение, интерактивный консольный клиент ClickHouse. +- `clickhouse-common` пакет содержит исполняемый файл ClickHouse. +- `clickhouse-server` пакет содержит файлы конфигурации для запуска ClickHouse в качестве сервера. -Server config files are located in `/etc/clickhouse-server/`. Before going further, please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity; the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration, it’s not handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. The recommended way to override the config elements is to create [files in config.d directory](../operations/configuration_files.md) which serve as “patches” to config.xml. +Файлы конфигурации сервера находятся в `/etc/clickhouse-server/`. Прежде чем идти дальше, пожалуйста, обратите внимание на `` элемент в `config.xml`. Путь определяет место для хранения данных, поэтому он должен быть расположен на Томе с большой емкостью диска; значение по умолчанию равно `/var/lib/clickhouse/`. Если вы хотите настроить конфигурацию, то это не удобно для непосредственного редактирования `config.xml` файл, учитывая, что он может быть переписан при будущих обновлениях пакета. Рекомендуемый способ переопределения элементов конфигурации заключается в создании [файлы в конфигурации.D каталог](../operations/configuration_files.md) которые служат в качестве «patches» к конфигурации.XML. -As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won’t be automatically restarted after updates, either. The way you start the server depends on your init system, usually, it is: +Как вы могли заметить, `clickhouse-server` не запускается автоматически после установки пакета. Он также не будет автоматически перезапущен после обновления. То, как вы запускаете сервер, зависит от вашей системы init, как правило, это так: ``` bash sudo service clickhouse-server start ``` -or +или ``` bash sudo /etc/init.d/clickhouse-server start ``` -The default location for server logs is `/var/log/clickhouse-server/`. The server is ready to handle client connections once it logs the `Ready for connections` message. +По умолчанию для журналов сервера используется следующее расположение `/var/log/clickhouse-server/`. Сервер готов к обработке клиентских подключений, как только он регистрирует `Ready for connections` сообщение. -Once the `clickhouse-server` is up and running, we can use `clickhouse-client` to connect to the server and run some test queries like `SELECT "Hello, world!";`. +Как только это произойдет `clickhouse-server` все готово и работает, мы можем использовать `clickhouse-client` чтобы подключиться к серверу и выполнить некоторые тестовые запросы, такие как `SELECT "Hello, world!";`.
-Quick tips for clickhouse-client -Interactive mode: +Быстрые советы для clickhouse-клиента +Интерактивный режим: ``` bash clickhouse-client clickhouse-client --host=... --port=... --user=... --password=... ``` -Enable multiline queries: +Включить многострочные запросы: ``` bash clickhouse-client -m clickhouse-client --multiline ``` -Run queries in batch-mode: +Запуск запросов в пакетном режиме: ``` bash clickhouse-client --query='SELECT 1' @@ -73,7 +74,7 @@ echo 'SELECT 1' | clickhouse-client clickhouse-client <<< 'SELECT 1' ``` -Insert data from a file in specified format: +Вставка данных из файла в заданном формате: ``` bash clickhouse-client --query='INSERT INTO table VALUES' < data.txt @@ -82,39 +83,39 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
-## Import Sample Dataset {#import-sample-dataset} +## Импорт Образца Набора Данных {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example_datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +Теперь пришло время заполнить наш сервер ClickHouse некоторыми образцами данных. В этом уроке мы будем использовать анонимизированные данные Яндекса.Metrica, первый сервис, который запускает ClickHouse в производственном режиме до того, как он стал открытым исходным кодом (подробнее об этом в [раздел истории](../introduction/history.md)). Есть [несколько способов импорта Яндекса.Набор метрика ](example_datasets/metrica.md), и ради учебника мы пойдем с самым реалистичным из них. -### Download and Extract Table Data {#download-and-extract-table-data} +### Загрузка и извлечение данных таблицы {#download-and-extract-table-data} ``` bash curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv ``` -The extracted files are about 10GB in size. +Извлеченные файлы имеют размер около 10 ГБ. -### Create Tables {#create-tables} +### Создавать таблицы {#create-tables} -As in most databases management systems, ClickHouse logically groups tables into “databases”. There’s a `default` database, but we’ll create a new one named `tutorial`: +Как и в большинстве систем управления базами данных, ClickHouse логически группирует таблицы в «databases». Там есть еще один `default` база данных, но мы создадим новую с именем `tutorial`: ``` bash clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" ``` -Syntax for creating tables is way more complicated compared to databases (see [reference](../query_language/create.md). In general `CREATE TABLE` statement has to specify three key things: +Синтаксис для создания таблиц намного сложнее по сравнению с базами данных (см. [ссылка](../sql_reference/statements/create.md). В общем `CREATE TABLE` в заявлении должны быть указаны три ключевых момента: -1. Name of table to create. -2. Table schema, i.e. list of columns and their [data types](../data_types/index.md). -3. [Table engine](../operations/table_engines/index.md) and it’s settings, which determines all the details on how queries to this table will be physically executed. +1. Имя таблицы для создания. +2. Table schema, i.e. list of columns and their [тип данных](../sql_reference/data_types/index.md). +3. [Настольный двигатель](../engines/table_engines/index.md) и это настройки, которые определяют все детали того, как запросы к этой таблице будут физически выполняться. -Yandex.Metrica is a web analytics service, and sample dataset doesn’t cover its full functionality, so there are only two tables to create: +Яндекс.Metrica - это сервис веб-аналитики, и пример набора данных не охватывает его полную функциональность, поэтому для создания необходимо создать только две таблицы: -- `hits` is a table with each action done by all users on all websites covered by the service. -- `visits` is a table that contains pre-built sessions instead of individual actions. +- `hits` это таблица с каждым действием, выполняемым всеми пользователями на всех веб-сайтах, охватываемых сервисом. +- `visits` это таблица, которая содержит предварительно построенные сеансы вместо отдельных действий. -Let’s see and execute the real create table queries for these tables: +Давайте посмотрим и выполним реальные запросы create table для этих таблиц: ``` sql CREATE TABLE tutorial.hits_v1 @@ -457,22 +458,22 @@ SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192 ``` -You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want. +Вы можете выполнить эти запросы с помощью интерактивного режима `clickhouse-client` (просто запустите его в терминале, не указывая заранее запрос) или попробуйте некоторые [альтернативный интерфейс](../interfaces/index.md) если ты хочешь. -As we can see, `hits_v1` uses the [basic MergeTree engine](../operations/table_engines/mergetree.md), while the `visits_v1` uses the [Collapsing](../operations/table_engines/collapsingmergetree.md) variant. +Как мы видим, `hits_v1` использует [базовый движок MergeTree](../engines/table_engines/mergetree_family/mergetree.md), в то время как `visits_v1` использует [Разрушение](../engines/table_engines/mergetree_family/collapsingmergetree.md) вариант. -### Import Data {#import-data} +### Импортировать данные {#import-data} -Data import to ClickHouse is done via [INSERT INTO](../query_language/insert_into.md) query like in many other SQL databases. However, data is usually provided in one of the [supported serialization formats](../interfaces/formats.md) instead of `VALUES` clause (which is also supported). +Импорт данных в ClickHouse осуществляется через [INSERT INTO](../sql_reference/statements/insert_into.md) запрос, как и во многих других базах данных SQL. Однако данные обычно приводятся в одном из следующих документов: [поддерживаемые форматы сериализации](../interfaces/formats.md) вместо `VALUES` предложение (которое также поддерживается). -The files we downloaded earlier are in tab-separated format, so here’s how to import them via console client: +Файлы, которые мы загрузили ранее, находятся в формате с разделенными вкладками, поэтому вот как импортировать их через консольный клиент: ``` bash clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv ``` -ClickHouse has a lot of [settings to tune](../operations/settings/index.md) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: +У ClickHouse их очень много [настройки для настройки](../operations/settings/index.md) и один из способов указать их в консольном клиенте - это через аргументы, как мы видим с помощью `--max_insert_block_size`. Самый простой способ выяснить, какие настройки доступны, что они означают и каковы значения по умолчанию, - это запросить `system.settings` стол: ``` sql SELECT name, value, changed, description @@ -483,23 +484,23 @@ FORMAT TSV max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." ``` -Optionally you can [OPTIMIZE](../query_language/misc/#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: +По желанию вы можете [OPTIMIZE](../sql_reference/misc/#misc_operations-optimize) таблицы после импорта. Таблицы, настроенные с помощью движка из семейства MergeTree, всегда выполняют слияние частей данных в фоновом режиме для оптимизации хранения данных (или, по крайней мере, проверяют, имеет ли это смысл). Эти запросы заставляют механизм таблиц выполнять оптимизацию хранилища прямо сейчас, а не некоторое время спустя: ``` bash clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" ``` -These queries start an I/O and CPU intensive operation, so if the table consistently receives new data, it’s better to leave it alone and let merges run in the background. +Эти запросы запускают интенсивную работу ввода-вывода и процессора, поэтому, если таблица постоянно получает новые данные, лучше оставить ее в покое и позволить слияниям работать в фоновом режиме. -Now we can check if the table import was successful: +Теперь мы можем проверить, был ли импорт таблицы успешным: ``` bash clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" ``` -## Example Queries {#example-queries} +## Пример запроса {#example-queries} ``` sql SELECT @@ -521,18 +522,18 @@ FROM tutorial.visits_v1 WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') ``` -## Cluster Deployment {#cluster-deployment} +## Развертывание Кластера {#cluster-deployment} -ClickHouse cluster is a homogenous cluster. Steps to set up: +Кластер ClickHouse-это однородный кластер. Шаги для настройки: -1. Install ClickHouse server on all machines of the cluster -2. Set up cluster configs in configuration files -3. Create local tables on each instance -4. Create a [Distributed table](../operations/table_engines/distributed.md) +1. Установите сервер ClickHouse на всех компьютерах кластера +2. Настройка конфигураций кластера в файлах конфигурации +3. Создание локальных таблиц на каждом экземпляре +4. Создать [Распространены таблицы](../engines/table_engines/special/distributed.md) -[Distributed table](../operations/table_engines/distributed.md) is actually a kind of “view” to local tables of ClickHouse cluster. SELECT query from a distributed table executes using resources of all cluster’s shards. You may specify configs for multiple clusters and create multiple distributed tables providing views to different clusters. +[Распространены таблицы](../engines/table_engines/special/distributed.md) это на самом деле своего рода «view» к локальным таблицам кластера ClickHouse. Запрос SELECT из распределенной таблицы выполняется с использованием ресурсов всех сегментов кластера. Вы можете указать конфигурации для нескольких кластеров и создать несколько распределенных таблиц, предоставляющих представления для разных кластеров. -Example config for a cluster with three shards, one replica each: +Пример конфигурации для кластера с тремя сегментами, по одной реплике в каждом: ``` xml @@ -559,37 +560,37 @@ Example config for a cluster with three shards, one replica each: ``` -For further demonstration, let’s create a new local table with the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: +Для дальнейшей демонстрации давайте создадим новую локальную таблицу с тем же именем `CREATE TABLE` запрос, который мы использовали для `hits_v1`, но другое имя таблицы: ``` sql CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... ``` -Creating a distributed table providing a view into local tables of the cluster: +Создание распределенной таблицы, предоставляющей представление в локальные таблицы кластера: ``` sql CREATE TABLE tutorial.hits_all AS tutorial.hits_local ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); ``` -A common practice is to create similar Distributed tables on all machines of the cluster. It allows running distributed queries on any machine of the cluster. Also there’s an alternative option to create temporary distributed table for a given SELECT query using [remote](../query_language/table_functions/remote.md) table function. +Распространенной практикой является создание одинаковых распределенных таблиц на всех машинах кластера. Он позволяет выполнять распределенные запросы на любой машине кластера. Кроме того, существует альтернативный вариант создания временной распределенной таблицы для данного запроса SELECT с помощью [удаленный](../sql_reference/table_functions/remote.md) табличная функция. -Let’s run [INSERT SELECT](../query_language/insert_into.md) into the Distributed table to spread the table to multiple servers. +Давай убежим [INSERT SELECT](../sql_reference/statements/insert_into.md) в распределенную таблицу, чтобы распространить таблицу на несколько серверов. ``` sql INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -!!! warning "Notice" - This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utils/clickhouse-copier.md) that can re-shard arbitrary large tables. +!!! warning "Уведомление" + Такой подход не подходит для сегментации больших столов. Есть отдельный инструмент [clickhouse-копировальный аппарат](../operations/utilities/clickhouse-copier.md) это может повторно осколить произвольные большие таблицы. -As you could expect, computationally heavy queries run N times faster if they utilize 3 servers instead of one. +Как и следовало ожидать, вычислительно тяжелые запросы выполняются в N раз быстрее, если они используют 3 сервера вместо одного. -In this case, we have used a cluster with 3 shards, and each contains a single replica. +В этом случае мы использовали кластер с 3 осколками, и каждый из них содержит одну реплику. -To provide resilience in a production environment, we recommend that each shard should contain 2-3 replicas spread between multiple availability zones or datacenters (or at least racks). Note that ClickHouse supports an unlimited number of replicas. +Для обеспечения устойчивости в рабочей среде рекомендуется, чтобы каждый сегмент содержал 2-3 реплики, распределенные между несколькими зонами доступности или центрами обработки данных (или, по крайней мере, стойками). Обратите внимание, что ClickHouse поддерживает неограниченное количество реплик. -Example config for a cluster of one shard containing three replicas: +Пример конфигурации для кластера из одного осколка, содержащего три реплики: ``` xml @@ -613,12 +614,12 @@ Example config for a cluster of one shard containing three replicas: ``` -To enable native replication [ZooKeeper](http://zookeeper.apache.org/) is required. ClickHouse takes care of data consistency on all replicas and runs restore procedure after failure automatically. It’s recommended to deploy the ZooKeeper cluster on separate servers (where no other processes including ClickHouse are running). +Чтобы включить собственную репликацию [Смотритель зоопарка](http://zookeeper.apache.org/) требуемый. ClickHouse заботится о согласованности данных во всех репликах и автоматически запускает процедуру восстановления после сбоя. Рекомендуется развернуть кластер ZooKeeper на отдельных серверах (где не выполняются никакие другие процессы, включая ClickHouse). -!!! note "Note" - ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. +!!! note "Примечание" + ZooKeeper не является строгим требованием: в некоторых простых случаях вы можете дублировать данные, записав их во все реплики из кода вашего приложения. Такой подход является **нет** рекомендуется, чтобы в этом случае ClickHouse не мог гарантировать согласованность данных на всех репликах. Таким образом, это становится ответственностью вашего приложения. -ZooKeeper locations are specified in the configuration file: +Расположение ZooKeeper указано в конфигурационном файле: ``` xml @@ -637,7 +638,7 @@ ZooKeeper locations are specified in the configuration file: ``` -Also, we need to set macros for identifying each shard and replica which are used on table creation: +Кроме того, нам нужно установить макросы для идентификации каждого осколка и реплики, которые используются при создании таблицы: ``` xml @@ -646,7 +647,7 @@ Also, we need to set macros for identifying each shard and replica which are use ``` -If there are no replicas at the moment on replicated table creation, a new first replica is instantiated. If there are already live replicas, the new replica clones data from existing ones. You have an option to create all replicated tables first, and then insert data to it. Another option is to create some replicas and add the others after or during data insertion. +Если в данный момент при создании реплицированной таблицы реплик нет, то создается новая первая реплика. Если уже существуют живые реплики, то новая реплика клонирует данные из существующих. У вас есть возможность сначала создать все реплицированные таблицы, а затем вставить в них данные. Другой вариант-создать некоторые реплики и добавить другие после или во время вставки данных. ``` sql CREATE TABLE tutorial.hits_replica (...) @@ -657,12 +658,12 @@ ENGINE = ReplcatedMergeTree( ... ``` -Here we use [ReplicatedMergeTree](../operations/table_engines/replication.md) table engine. In parameters we specify ZooKeeper path containing shard and replica identifiers. +Здесь мы используем [ReplicatedMergeTree](../engines/table_engines/mergetree_family/replication.md) настольный двигатель. В параметрах мы указываем путь ZooKeeper, содержащий идентификаторы сегментов и реплик. ``` sql INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; ``` -Replication operates in multi-master mode. Data can be loaded into any replica, and the system then syncs it with other instances automatically. Replication is asynchronous so at a given moment, not all replicas may contain recently inserted data. At least one replica should be up to allow data ingestion. Others will sync up data and repair consistency once they will become active again. Note that this approach allows for the low possibility of a loss of recently inserted data. +Репликация работает в режиме мульти-мастер. Данные могут быть загружены в любую реплику, а затем система автоматически синхронизирует их с другими экземплярами. Репликация является асинхронной, поэтому в данный момент не все реплики могут содержать недавно вставленные данные. По крайней мере, одна реплика должна быть готова, чтобы обеспечить прием данных. Другие будут синхронизировать данные и восстанавливать согласованность, как только они снова станут активными. Обратите внимание, что этот подход допускает низкую вероятность потери недавно вставленных данных. -[Original article](https://clickhouse.tech/docs/en/getting_started/tutorial/) +[Оригинальная статья](https://clickhouse.tech/docs/en/getting_started/tutorial/) diff --git a/docs/ru/guides/apply_catboost_model.md b/docs/ru/guides/apply_catboost_model.md index f6c9799a171..2f94753b7f2 100644 --- a/docs/ru/guides/apply_catboost_model.md +++ b/docs/ru/guides/apply_catboost_model.md @@ -178,7 +178,7 @@ LIMIT 10 ``` !!! note "Примечание" - Функция [modelEvaluate](../query_language/functions/other_functions.md#function-modelevaluate) возвращает кортежи (tuple) с исходными прогнозами по классам для моделей с несколькими классами. + Функция [modelEvaluate](../sql_reference/functions/other_functions.md#function-modelevaluate) возвращает кортежи (tuple) с исходными прогнозами по классам для моделей с несколькими классами. Спрогнозируйте вероятность: @@ -201,7 +201,7 @@ LIMIT 10 ``` !!! note "Примечание" - Подробнее про функцию [exp()](../query_language/functions/math_functions.md). + Подробнее про функцию [exp()](../sql_reference/functions/math_functions.md). Посчитайте логистическую функцию потерь (LogLoss) на всей выборке: @@ -227,4 +227,4 @@ FROM ``` !!! note "Примечание" - Подробнее про функции [avg()](../query_language/agg_functions/reference.md#agg_function-avg), [log()](../query_language/functions/math_functions.md). + Подробнее про функции [avg()](../sql_reference/aggregate_functions/reference.md#agg_function-avg), [log()](../sql_reference/functions/math_functions.md). diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 749e93c34ad..b76e96cc1dc 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -88,7 +88,7 @@ clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {p ``` - `name` — идентификатор подстановки. В консольном клиенте его следует использовать как часть имени параметра `--param_ = value`. -- `data type` — [тип данных](../data_types/index.md) значения. Например, структура данных `(integer, ('string', integer))` может иметь тип данных `Tuple(UInt8, Tuple(String, UInt8))` ([целочисленный](../data_types/int_uint.md) тип может быть и другим). +- `data type` — [тип данных](../sql_reference/data_types/index.md) значения. Например, структура данных `(integer, ('string', integer))` может иметь тип данных `Tuple(UInt8, Tuple(String, UInt8))` ([целочисленный](../sql_reference/data_types/int_uint.md) тип может быть и другим). #### Пример {#primer} diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 8d5a38a3a63..27cab90bdd4 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -99,9 +99,9 @@ world Массивы форматируются в виде списка значений через запятую в квадратных скобках. Элементы массива - числа форматируются как обычно, а даты, даты-с-временем и строки - в одинарных кавычках с такими же правилами экранирования, как указано выше. -[NULL](../query_language/syntax.md) форматируется как `\N`. +[NULL](../sql_reference/syntax.md) форматируется как `\N`. -Каждый элемент структуры типа [Nested](../data_types/nested_data_structures/nested.md) представляется как отдельный массив. +Каждый элемент структуры типа [Nested](../sql_reference/data_types/nested_data_structures/nested.md) представляется как отдельный массив. Например: @@ -302,7 +302,7 @@ SearchPhrase=дизайн штор count()=1064 SearchPhrase=баку count()=1000 ``` -[NULL](../query_language/syntax.md) форматируется как `\N`. +[NULL](../sql_reference/syntax.md) форматируется как `\N`. ``` sql SELECT * FROM t_null FORMAT TSKV @@ -432,7 +432,7 @@ JSON совместим с JavaScript. Для этого, дополнитель Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). -ClickHouse поддерживает [NULL](../query_language/syntax.md), который при выводе JSON будет отображен как `null`. +ClickHouse поддерживает [NULL](../sql_reference/syntax.md), который при выводе JSON будет отображен как `null`. Смотрите также формат [JSONEachRow](#jsoneachrow) . @@ -507,7 +507,7 @@ ClickHouse игнорирует пробелы между элементами **Обработка пропущенных значений** -ClickHouse заменяет опущенные значения значениями по умолчанию для соответствующих [data types](../data_types/index.md). +ClickHouse заменяет опущенные значения значениями по умолчанию для соответствующих [data types](../sql_reference/data_types/index.md). Если указано `DEFAULT expr`, то ClickHouse использует различные правила подстановки в зависимости от настройки [input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields). @@ -552,7 +552,7 @@ CREATE TABLE IF NOT EXISTS example_table ### Использование вложенных структур {#jsoneachrow-nested} -Если у вас есть таблица со столбцами типа [Nested](../data_types/nested_data_structures/nested.md), то в неё можно вставить данные из JSON-документа с такой же структурой. Функциональность включается настройкой [input\_format\_import\_nested\_json](../operations/settings/settings.md#settings-input_format_import_nested_json). +Если у вас есть таблица со столбцами типа [Nested](../sql_reference/data_types/nested_data_structures/nested.md), то в неё можно вставить данные из JSON-документа с такой же структурой. Функциональность включается настройкой [input\_format\_import\_nested\_json](../operations/settings/settings.md#settings-input_format_import_nested_json). Например, рассмотрим следующую таблицу: @@ -626,7 +626,7 @@ SELECT * FROM json_each_row_nested Рисуется полная сетка таблицы и, таким образом, каждая строчка занимает две строки в терминале. Каждый блок результата выводится в виде отдельной таблицы. Это нужно, чтобы можно было выводить блоки без буферизации результата (буферизация потребовалась бы, чтобы заранее вычислить видимую ширину всех значений.) -[NULL](../query_language/syntax.md) выводится как `ᴺᵁᴸᴸ`. +[NULL](../sql_reference/syntax.md) выводится как `ᴺᵁᴸᴸ`. ``` sql SELECT * FROM t_null @@ -728,7 +728,7 @@ FixedString представлены просто как последовате Array представлены как длина в формате varint (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), а затем элементы массива, подряд. -Для поддержки [NULL](../query_language/syntax.md#null-literal) перед каждым значением типа [Nullable](../data_types/nullable.md) следует байт содержащий 1 или 0. Если байт 1, то значение равно NULL, и этот байт интерпретируется как отдельное значение (т.е. после него следует значение следующего поля). Если байт 0, то после байта следует значение поля (не равно NULL). +Для поддержки [NULL](../sql_reference/syntax.md#null-literal) перед каждым значением типа [Nullable](../sql_reference/data_types/nullable.md) следует байт содержащий 1 или 0. Если байт 1, то значение равно NULL, и этот байт интерпретируется как отдельное значение (т.е. после него следует значение следующего поля). Если байт 0, то после байта следует значение поля (не равно NULL). ## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} @@ -740,7 +740,7 @@ Array представлены как длина в формате varint (unsig ## Values {#data-format-values} -Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../query_language/syntax.md) представляется как `NULL`. +Выводит каждую строку в скобках. Строки разделены запятыми. После последней строки запятой нет. Значения внутри скобок также разделены запятыми. Числа выводятся в десятичном виде без кавычек. Массивы выводятся в квадратных скобках. Строки, даты, даты-с-временем выводятся в кавычках. Правила экранирования и особенности парсинга аналогичны формату [TabSeparated](#tabseparated). При форматировании, лишние пробелы не ставятся, а при парсинге - допустимы и пропускаются (за исключением пробелов внутри значений типа массив, которые недопустимы). [NULL](../sql_reference/syntax.md) представляется как `NULL`. Минимальный набор символов, которых вам необходимо экранировать при передаче в Values формате: одинарная кавычка и обратный слеш. @@ -750,7 +750,7 @@ Array представлены как длина в формате varint (unsig Выводит каждое значение на отдельной строке, с указанием имени столбца. Формат удобно использовать для вывода одной-нескольких строк, если каждая строка состоит из большого количества столбцов. -[NULL](../query_language/syntax.md) выводится как `ᴺᵁᴸᴸ`. +[NULL](../sql_reference/syntax.md) выводится как `ᴺᵁᴸᴸ`. Пример: @@ -928,7 +928,7 @@ message MessageType { ``` ClickHouse попытается найти столбец с именем `x.y.z` (или `x_y_z`, или `X.y_Z` и т.п.). -Вложенные сообщения удобно использовать в качестве соответствия для [вложенной структуры данных](../data_types/nested_data_structures/nested.md). +Вложенные сообщения удобно использовать в качестве соответствия для [вложенной структуры данных](../sql_reference/data_types/nested_data_structures/nested.md). Значения по умолчанию, определённые в схеме `proto2`, например, @@ -940,7 +940,7 @@ message MessageType { } ``` -не применяются; вместо них используются определенные в таблице [значения по умолчанию](../query_language/create.md#create-default-values). +не применяются; вместо них используются определенные в таблице [значения по умолчанию](../sql_reference/statements/create.md#create-default-values). ClickHouse пишет и читает сообщения `Protocol Buffers` в формате `length-delimited`. Это означает, что перед каждым сообщением пишется его длина в формате [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). См. также [как читать и записывать сообщения Protocol Buffers в формате length-delimited в различных языках программирования](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). @@ -951,25 +951,25 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в ### Соответствие типов данных {#sootvetstvie-tipov-dannykh} -Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../data_types/index.md) ClickHouse для запросов `INSERT` и `SELECT`. +Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql_reference/data_types/index.md) ClickHouse для запросов `INSERT` и `SELECT`. | Тип данных Parquet (`INSERT`) | Тип данных ClickHouse | Тип данных Parquet (`SELECT`) | |-------------------------------|---------------------------------------------|-------------------------------| -| `UINT8`, `BOOL` | [UInt8](../data_types/int_uint.md) | `UINT8` | -| `INT8` | [Int8](../data_types/int_uint.md) | `INT8` | -| `UINT16` | [UInt16](../data_types/int_uint.md) | `UINT16` | -| `INT16` | [Int16](../data_types/int_uint.md) | `INT16` | -| `UINT32` | [UInt32](../data_types/int_uint.md) | `UINT32` | -| `INT32` | [Int32](../data_types/int_uint.md) | `INT32` | -| `UINT64` | [UInt64](../data_types/int_uint.md) | `UINT64` | -| `INT64` | [Int64](../data_types/int_uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../data_types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](../data_types/float.md) | `DOUBLE` | -| `DATE32` | [Date](../data_types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../data_types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../data_types/string.md) | `STRING` | -| — | [FixedString](../data_types/fixedstring.md) | `STRING` | -| `DECIMAL` | [Decimal](../data_types/decimal.md) | `DECIMAL` | +| `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | `UINT8` | +| `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | `INT8` | +| `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | `UINT16` | +| `INT16` | [Int16](../sql_reference/data_types/int_uint.md) | `INT16` | +| `UINT32` | [UInt32](../sql_reference/data_types/int_uint.md) | `UINT32` | +| `INT32` | [Int32](../sql_reference/data_types/int_uint.md) | `INT32` | +| `UINT64` | [UInt64](../sql_reference/data_types/int_uint.md) | `UINT64` | +| `INT64` | [Int64](../sql_reference/data_types/int_uint.md) | `INT64` | +| `FLOAT`, `HALF_FLOAT` | [Float32](../sql_reference/data_types/float.md) | `FLOAT` | +| `DOUBLE` | [Float64](../sql_reference/data_types/float.md) | `DOUBLE` | +| `DATE32` | [Date](../sql_reference/data_types/date.md) | `UINT16` | +| `DATE64`, `TIMESTAMP` | [DateTime](../sql_reference/data_types/datetime.md) | `UINT32` | +| `STRING`, `BINARY` | [String](../sql_reference/data_types/string.md) | `STRING` | +| — | [FixedString](../sql_reference/data_types/fixedstring.md) | `STRING` | +| `DECIMAL` | [Decimal](../sql_reference/data_types/decimal.md) | `DECIMAL` | ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`. @@ -991,7 +991,7 @@ $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Pa $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../operations/table_engines/hdfs.md). +Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../engines/table_engines/integrations/hdfs.md). ## ORC {#data-format-orc} @@ -999,24 +999,24 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ ### Соответствие типов данных {#sootvetstvie-tipov-dannykh-1} -Таблица показывает поддержанные типы данных и их соответствие [типам данных](../data_types/index.md) ClickHouse для запросов `INSERT`. +Таблица показывает поддержанные типы данных и их соответствие [типам данных](../sql_reference/data_types/index.md) ClickHouse для запросов `INSERT`. | Тип данных ORC (`INSERT`) | Тип данных ClickHouse | |---------------------------|---------------------------------------| -| `UINT8`, `BOOL` | [UInt8](../data_types/int_uint.md) | -| `INT8` | [Int8](../data_types/int_uint.md) | -| `UINT16` | [UInt16](../data_types/int_uint.md) | -| `INT16` | [Int16](../data_types/int_uint.md) | -| `UINT32` | [UInt32](../data_types/int_uint.md) | -| `INT32` | [Int32](../data_types/int_uint.md) | -| `UINT64` | [UInt64](../data_types/int_uint.md) | -| `INT64` | [Int64](../data_types/int_uint.md) | -| `FLOAT`, `HALF_FLOAT` | [Float32](../data_types/float.md) | -| `DOUBLE` | [Float64](../data_types/float.md) | -| `DATE32` | [Date](../data_types/date.md) | -| `DATE64`, `TIMESTAMP` | [DateTime](../data_types/datetime.md) | -| `STRING`, `BINARY` | [String](../data_types/string.md) | -| `DECIMAL` | [Decimal](../data_types/decimal.md) | +| `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | +| `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | +| `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | +| `INT16` | [Int16](../sql_reference/data_types/int_uint.md) | +| `UINT32` | [UInt32](../sql_reference/data_types/int_uint.md) | +| `INT32` | [Int32](../sql_reference/data_types/int_uint.md) | +| `UINT64` | [UInt64](../sql_reference/data_types/int_uint.md) | +| `INT64` | [Int64](../sql_reference/data_types/int_uint.md) | +| `FLOAT`, `HALF_FLOAT` | [Float32](../sql_reference/data_types/float.md) | +| `DOUBLE` | [Float64](../sql_reference/data_types/float.md) | +| `DATE32` | [Date](../sql_reference/data_types/date.md) | +| `DATE64`, `TIMESTAMP` | [DateTime](../sql_reference/data_types/datetime.md) | +| `STRING`, `BINARY` | [String](../sql_reference/data_types/string.md) | +| `DECIMAL` | [Decimal](../sql_reference/data_types/decimal.md) | ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`. @@ -1032,7 +1032,7 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../operations/table_engines/hdfs.md). +Для обмена данных с Hadoop можно использовать [движок таблиц HDFS](../engines/table_engines/integrations/hdfs.md). ## Схема формата {#formatschema} @@ -1045,6 +1045,6 @@ $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT OR относительно текущей директории на клиенте. Если клиент используется в [batch режиме](../interfaces/cli.md#cli_usage), то в записи схемы допускается только относительный путь, из соображений безопасности. Если для ввода/вывода данных используется [HTTP-интерфейс](../interfaces/http.md), то файл со схемой должен располагаться на сервере в каталоге, -указанном в параметре [format\_schema\_path](../operations/server_settings/settings.md#server_settings-format_schema_path) конфигурации сервера. +указанном в параметре [format\_schema\_path](../operations/server_configuration_parameters/settings.md#server_configuration_parameters-format_schema_path) конфигурации сервера. [Оригинальная статья](https://clickhouse.tech/docs/ru/interfaces/formats/) diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 9a89a25bec7..add57183824 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -3,7 +3,7 @@ HTTP интерфейс позволяет использовать ClickHouse на любой платформе, из любого языка программирования. У нас он используется для работы из Java и Perl, а также из shell-скриптов. В других отделах, HTTP интерфейс используется из Perl, Python и Go. HTTP интерфейс более ограничен по сравнению с родным интерфейсом, но является более совместимым. По умолчанию, clickhouse-server слушает HTTP на порту 8123 (это можно изменить в конфиге). -Если запросить GET / без параметров, то вернётся строка заданная с помощью настройки [http\_server\_default\_response](../operations/server_settings/settings.md#server_settings-http_server_default_response). Значение по умолчанию «Ok.» (с переводом строки на конце). +Если запросить GET / без параметров, то вернётся строка заданная с помощью настройки [http\_server\_default\_response](../operations/server_configuration_parameters/settings.md#server_configuration_parameters-http_server_default_response). Значение по умолчанию «Ok.» (с переводом строки на конце). ``` bash $ curl 'http://localhost:8123/' diff --git a/docs/ru/interfaces/mysql.md b/docs/ru/interfaces/mysql.md index 146947342cd..d550b430c69 100644 --- a/docs/ru/interfaces/mysql.md +++ b/docs/ru/interfaces/mysql.md @@ -1,6 +1,6 @@ # MySQL-интерфейс {#mysql-interface} -ClickHouse поддерживает взаимодействие по протоколу MySQL. Данная функция включается настройкой [mysql\_port](../operations/server_settings/settings.md#server_settings-mysql_port) в конфигурационном файле: +ClickHouse поддерживает взаимодействие по протоколу MySQL. Данная функция включается настройкой [mysql\_port](../operations/server_configuration_parameters/settings.md#server_configuration_parameters-mysql_port) в конфигурационном файле: ``` xml 9004 diff --git a/docs/ru/interfaces/third-party/index.md b/docs/ru/interfaces/third-party/index.md new file mode 100644 index 00000000000..16d315d059c --- /dev/null +++ b/docs/ru/interfaces/third-party/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Third-Party +toc_priority: 24 +--- + diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index b835dc949bf..c05ff4d062f 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -35,7 +35,7 @@ - [graphouse](https://github.com/yandex/graphouse) - [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) + - [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse) - - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - оптимизирует партиции таблиц [\*GraphiteMergeTree](../../operations/table_engines/graphitemergetree.md#graphitemergetree) согласно правилам в [конфигурации rollup](../../operations/table_engines/graphitemergetree.md#rollup-configuration) + - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - оптимизирует партиции таблиц [\*GraphiteMergeTree](../../engines/table_engines/mergetree_family/graphitemergetree.md#graphitemergetree) согласно правилам в [конфигурации rollup](../../engines/table_engines/mergetree_family/graphitemergetree.md#rollup-configuration) - [Grafana](https://grafana.com/) - [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) - [Prometheus](https://prometheus.io/) @@ -72,7 +72,7 @@ - [RClickhouse](https://github.com/IMSMWU/RClickhouse) (использует [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) - Java - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (использует [JDBC](../../query_language/table_functions/jdbc.md)) + - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (использует [JDBC](../../sql_reference/table_functions/jdbc.md)) - Scala - [Akka](https://akka.io) - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md index ef841b2fa05..1b7d56b19d1 100644 --- a/docs/ru/introduction/adopters.md +++ b/docs/ru/introduction/adopters.md @@ -1,79 +1,80 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# ClickHouse Adopters {#clickhouse-adopters} +# Усыновители ClickHouse {#clickhouse-adopters} -!!! warning "Disclaimer" - The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. +!!! warning "Оговорка" + Следующий список компаний, использующих ClickHouse, и их истории успеха собраны из открытых источников, поэтому они могут отличаться от текущей реальности. Мы были бы очень признательны, если бы вы поделились историей принятия ClickHouse в свою компанию и [добавьте его в список](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), но, пожалуйста, убедитесь, что у вас не будет никаких проблем с NDA, сделав это. Предоставление обновлений с публикациями от других компаний также полезно. -| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|-----------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [2gis](https://2gis.ru) | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | -| [Aloha Browser](https://alohabrowser.com/) | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | -| [Amadeus](https://amadeus.com/) | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| [Appsflyer](https://www.appsflyer.com) | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| [ArenaData](https://arenadata.tech/) | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| [Badoo](https://badoo.com) | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | -| [Benocs](https://www.benocs.com/) | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | -| [Bloomberg](https://www.bloomberg.com/) | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| [Bloxy](https://bloxy.info) | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| `Dataliance/UltraPower` | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| [CARTO](https://carto.com/) | Business Intelligence | Geo analytics | — | — | [Geospatial processing with Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| [CERN](http://public.web.cern.ch/public/) | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| [Cisco](http://cisco.com/) | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | -| [Citadel Securities](https://www.citadelsecurities.com/) | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| [Citymobil](https://city-mobil.ru) | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | -| [ContentSquare](https://contentsquare.com) | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| [Cloudflare](https://cloudflare.com) | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| [Corunet](https://coru.net/) | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| [CraiditX 氪信](https://creditx.com) | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| [Criteo/Storetail](https://www.criteo.com/) | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| [Deutsche Bank](https://db.com) | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| [Diva-e](https://www.diva-e.com) | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| [Exness](https://www.exness.com) | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| [Geniee](https://geniee.co.jp) | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| [HUYA](https://www.huya.com/) | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](https://www.idealista.com) | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| [Infovista](https://www.infovista.com/) | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| [InnoGames](https://www.innogames.com) | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| [Integros](https://integros.com) | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Kodiak Data](https://www.kodiakdata.com/) | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| [Kontur](https://kontur.ru) | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| [LifeStreet](https://lifestreet.com/) | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | -| [Mail.ru Cloud Solutions](https://mcs.mail.ru/) | Cloud services | Main product | — | — | [Running ClickHouse Instance, in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| [MessageBird](https://www.messagebird.com) | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | — | — | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| [OneAPM](https://www.oneapm.com/) | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| [Pragma Innovation](http://www.pragma-innovation.fr/) | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| [QINGCLOUD](https://www.qingcloud.com/) | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| [Qrator](https://qrator.net) | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| [Beijing PERCENT Information Technology Co., Ltd.](https://www.percent.cn/) | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| [Rambler](https://rambler.ru) | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| [Tencent](https://www.tencent.com) | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| [Traffic Stars](https://trafficstars.com/) | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| [S7 Airlines](https://www.s7.ru) | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| [SEMrush](https://www.semrush.com/) | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| [scireum GmbH](https://www.scireum.de/) | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| [Sentry](https://sentry.io/) | Software developer | Backend for product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr) | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| [seo.do](https://seo.do/) | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| [Sina](http://english.sina.com/index.html) | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| [SMI2](https://smi2.ru/) | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | -| [Splunk](https://www.splunk.com/) | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| [Spotify](https://www.spotify.com) | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| [Tencent](https://www.tencent.com) | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | -| [VKontakte](https://vk.com) | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| [Wisebits](https://wisebits.com/) | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Xiaoxin Tech.](https://www.xiaoheiban.cn/) | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| [Ximalaya](https://www.ximalaya.com/) | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| [Yandex Cloud](https://cloud.yandex.ru/services/managed-clickhouse) | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| [Yandex DataLens](https://cloud.yandex.ru/services/datalens) | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | -| [Yandex Market](https://market.yandex.ru/) | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| [Yandex Metrica](https://metrica.yandex.com) | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | -| [ЦВТ](https://htc-cs.ru/) | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| [МКБ](https://mkb.ru/) | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| [金数据](https://jinshuju.net) | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| Компания | Промышленность | Usecase | Размер кластера | (Un)Сжатый Размер Данных\* | Ссылка | +|---------------------------------------------------------------------------------|----------------------------------------|-----------------------------|------------------------------------------------------------|------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [2ГИС](https://2gis.ru) | Карты | Мониторинг | — | — | [Говорить по-русски, июль 2019](https://youtu.be/58sPkXfq6nw) | +| [Браузер Aloha](https://alohabrowser.com/) | Мобильное приложение | Серверная часть браузера | — | — | [Слайды на русском языке, май 2019 года](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | +| [Компания Amadeus](https://amadeus.com/) | Путешествовать | Аналитика | — | — | [Пресс-Релиз, Апрель 2018 Года](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | +| [Компания](https://www.appsflyer.com) | Мобильная аналитика | Главный продукт | — | — | [Говорить по-русски, июль 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| [ArenaData](https://arenadata.tech/) | Платформа данных | Главный продукт | — | — | [Слайды на русском языке, декабрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| [На Badoo](https://badoo.com) | Знакомства | Таймсерии | — | — | [Слайды на русском языке, декабрь 2019 года](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | +| [Бенокс](https://www.benocs.com/) | Сетевая телеметрия и аналитика | Главный продукт | — | — | [Слайды на английском языке, октябрь 2017 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| [Блумберг](https://www.bloomberg.com/) | Финансы, СМИ | Мониторинг | 102 сервера | — | [Слайды, Май 2018 Года](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| [Блокси](https://bloxy.info) | Блокчейн | Аналитика | — | — | [Слайды на русском языке, август 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| `Dataliance/UltraPower` | Телекоммуникационный | Аналитика | — | — | [Слайды на китайском языке, январь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | +| [CARTO](https://carto.com/) | Бизнес-разведка | Гео аналитика | — | — | [Геопространственная обработка с помощью Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| [CERN](http://public.web.cern.ch/public/) | Исследование | Эксперимент | — | — | [Пресс-релиз, апрель 2012 года](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | +| [Компании Cisco](http://cisco.com/) | Сетевой | Анализ трафика | — | — | [Молниеносный разговор, октябрь 2019 года](https://youtu.be/-hI1vDR2oPY?t=5057) | +| [Ценные Бумаги Цитадели](https://www.citadelsecurities.com/) | Финансы | — | — | — | [Взнос, Март 2019 Года](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| [Ситимобил](https://city-mobil.ru) | Такси | Аналитика | — | — | [Запись в блоге на русском языке, март 2020 года](https://habr.com/en/company/citymobil/blog/490660/) | +| [ContentSquare](https://contentsquare.com) | Веб-аналитика | Главный продукт | — | — | [Запись в блоге на французском языке, ноябрь 2018 года](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| [Cloudflare](https://cloudflare.com) | CDN | Анализ трафика | 36 серверов | — | [Сообщение в блоге, май 2017 года](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Сообщение в блоге, март 2018 года](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| [Корунет](https://coru.net/) | Аналитика | Главный продукт | — | — | [Слайды на английском языке, апрель 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| [CraiditX 氪信](https://creditx.com) | Финансовый ИИ | Анализ | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| [Criteo / Storetail](https://www.criteo.com/) | Розничная торговля | Главный продукт | — | — | [Слайды на английском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| [Дойче банк](https://db.com) | Финансы | Би аналитика | — | — | [Слайды на английском языке, октябрь 2019 года](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| [Дива-е](https://www.diva-e.com) | Цифровой Консалтинг | Главный продукт | — | — | [Слайды на английском языке, сентябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| [Компания Exness](https://www.exness.com) | Торговый | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, май 2019 года](https://youtu.be/_rpU-TvSfZ8?t=3215) | +| [Джинн](https://geniee.co.jp) | Рекламная сеть | Главный продукт | — | — | [Запись в блоге на японском языке, июль 2017 года](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| [HUYA](https://www.huya.com/) | Потоковое видео | Аналитика | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| [Идеалиста](https://www.idealista.com) | Недвижимость | Аналитика | — | — | [Сообщение в блоге на английском языке, апрель 2019 года](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Infovista](https://www.infovista.com/) | Сети | Аналитика | — | — | [Слайды на английском языке, октябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| [Компания innogames](https://www.innogames.com) | Игры | Метрики, Ведение Журнала | — | — | [Слайды на русском языке, сентябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| [Интегрос](https://integros.com) | Платформа для видеосервисов | Аналитика | — | — | [Слайды на русском языке, май 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| [Данные По Кадьяку](https://www.kodiakdata.com/) | Облака | Главный продукт | — | — | [Слайды на английском языке, апрель 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | +| [Контур](https://kontur.ru) | Разработка программного обеспечения | Метрика | — | — | [Говорить по-русски, ноябрь 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| [LifeStreet](https://lifestreet.com/) | Рекламная сеть | Главный продукт | 75 серверов (3 реплики) | 5.27 ПИБ | [Запись в блоге на русском языке, февраль 2017 года](https://habr.com/en/post/322620/) | +| [Mail.ru Облачные Решения](https://mcs.mail.ru/) | Облачные сервисы | Главный продукт | — | — | [Запуск экземпляра ClickHouse на русском языке](https://mcs.mail.ru/help/db-create/clickhouse#) | +| [MessageBird](https://www.messagebird.com) | Электросвязь | Статистика | — | — | [Слайды на английском языке, ноябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| [MGID](https://www.mgid.com/) | Рекламная сеть | Веб-аналитика | — | — | [Наш опыт внедрения аналитической СУБД ClickHouse на русском языке](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| [OneAPM](https://www.oneapm.com/) | Мониторинг и анализ данных | Главный продукт | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| [ПРАГМА Инноваций](http://www.pragma-innovation.fr/) | Телеметрия и анализ Больших Данных | Главный продукт | — | — | [Слайды на английском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| [QINGCLOUD](https://www.qingcloud.com/) | Облачные сервисы | Главный продукт | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | +| [Qrator](https://qrator.net) | Защита от DDoS-атак | Главный продукт | — | — | [Сообщение В Блоге, Март 2019 Года](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| [Beijing PERCENT Information Technology Co., Лимитед.](https://www.percent.cn/) | Аналитика | Главный продукт | — | — | [Слайды на китайском языке, июнь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| [Бродяга](https://rambler.ru) | Интернет услуги | Аналитика | — | — | [Говорить по-русски, апрель 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| [Tencent](https://www.tencent.com) | Обмен сообщениями | Регистрация | — | — | [Говорить по-китайски, ноябрь 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | +| [Движения Звезд](https://trafficstars.com/) | Рекламная сеть | — | — | — | [Слайды на русском языке, май 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| [S7 Airlines](https://www.s7.ru) | Авиакомпании | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, март 2019 года](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| [Общий](https://www.semrush.com/) | Маркетинг | Главный продукт | — | — | [Слайды на русском языке, август 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| [scireum ГмбХ](https://www.scireum.de/) | электронная коммерция | Главный продукт | — | — | [Говорить по-немецки, февраль 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| [Караул](https://sentry.io/) | Разработчик | Бэкэнд для продукта | — | — | [Сообщение в блоге на английском языке, май 2019 года](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr) | Государственное Социальное Обеспечение | Аналитика | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| [СЕО.делать](https://seo.do/) | Аналитика | Главный продукт | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | +| [Зина](http://english.sina.com/index.html) | Новости | — | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| [SMI2](https://smi2.ru/) | Новости | Аналитика | — | — | [Запись в блоге на русском языке, ноябрь 2017 года](https://habr.com/ru/company/smi2/blog/314558/) | +| [Чмок](https://www.splunk.com/) | Бизнес-аналитика | Главный продукт | — | — | [Слайды на английском языке, январь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | +| [Спотифай](https://www.spotify.com) | Музыка | Экспериментирование | — | — | [Слайды, Июль 2018 Года](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| [Tencent](https://www.tencent.com) | Большие данные | Обработка данных | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | +| [Убер](https://www.uber.com) | Такси | Регистрация | — | — | [Слайды, Февраль 2020 Года](https://presentations.clickhouse.tech/meetup40/uber.pdf) | +| [ВКонтакте](https://vk.com) | Социальная сеть | Статистика, Ведение Журнала | — | — | [Слайды на русском языке, август 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| [Мудрецы](https://wisebits.com/) | IT-решение | Аналитика | — | — | [Слайды на русском языке, май 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| [Технология Сяосин.](https://www.xiaoheiban.cn/) | Образование | Общая цель | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| [Сималайя](https://www.ximalaya.com/) | Общий доступ к аудио | OLAP | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | +| [Облако Яндекса](https://cloud.yandex.ru/services/managed-clickhouse) | Публичное Облако | Главный продукт | — | — | [Разговор на русском языке, декабрь 2019 года](https://www.youtube.com/watch?v=pgnak9e_E0o) | +| [DataLens Яндекс ](https://cloud.yandex.ru/services/datalens) | Бизнес-разведка | Главный продукт | — | — | [Слайды на русском языке, декабрь 2019 года](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| [Яндекс Маркет](https://market.yandex.ru/) | электронная коммерция | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, январь 2019 года](https://youtu.be/_l1qP0DyBcA?t=478) | +| [Яндекс Метрика](https://metrica.yandex.com) | Веб-аналитика | Главный продукт | 360 серверов в одном кластере, 1862 сервера в одном отделе | 66.41 ПИБ / 5.68 ПИБ | [Слайды, Февраль 2020 Года](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| [ЦВТ](https://htc-cs.ru/) | Разработка программного обеспечения | Метрики, Ведение Журнала | — | — | [Сообщение в блоге, март 2019 года, на русском языке](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| [МКБ](https://mkb.ru/) | Банк | Мониторинг веб-систем | — | — | [Слайды на русском языке, сентябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| [金数据](https://jinshuju.net) | Би аналитика | Главный продукт | — | — | [Слайды на китайском языке, октябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | -[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) +[Оригинальная статья](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/ru/introduction/distinctive_features.md b/docs/ru/introduction/distinctive_features.md index 093053a3b89..0cc40e4e162 100644 --- a/docs/ru/introduction/distinctive_features.md +++ b/docs/ru/introduction/distinctive_features.md @@ -59,6 +59,6 @@ ClickHouse предоставляет различные способы разм Используется асинхронная multimaster репликация. После записи на любую доступную реплику, данные распространяются на все остальные реплики в фоне. Система поддерживает полную идентичность данных на разных репликах. Восстановление после большинства сбоев осуществляется автоматически, а в сложных случаях — полуавтоматически. При необходимости, можно [включить кворумную запись](../operations/settings/settings.md) данных. -Подробнее смотрите раздел [Репликация данных](../operations/table_engines/replication.md). +Подробнее смотрите раздел [Репликация данных](../engines/table_engines/mergetree_family/replication.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/introduction/distinctive_features/) diff --git a/docs/ru/introduction/index.md b/docs/ru/introduction/index.md new file mode 100644 index 00000000000..ba80f9c2640 --- /dev/null +++ b/docs/ru/introduction/index.md @@ -0,0 +1,6 @@ +--- +toc_folder_title: Introduction +toc_priority: 1 +--- + + diff --git a/docs/ru/operations/access_rights.md b/docs/ru/operations/access_rights.md index d4cd7793bf1..18c2a25377a 100644 --- a/docs/ru/operations/access_rights.md +++ b/docs/ru/operations/access_rights.md @@ -61,7 +61,7 @@ Здесь видно объявление двух пользователей - `default` и `web`. Пользователя `web` мы добавили самостоятельно. -Пользователь `default` выбирается в случаях, когда имя пользователя не передаётся. Также пользователь `default` может использоваться при распределённой обработке запроса - если в конфигурации кластера для сервера не указаны `user` и `password`. (см. раздел о движке [Distributed](../operations/table_engines/distributed.md)). +Пользователь `default` выбирается в случаях, когда имя пользователя не передаётся. Также пользователь `default` может использоваться при распределённой обработке запроса - если в конфигурации кластера для сервера не указаны `user` и `password`. (см. раздел о движке [Distributed](../engines/table_engines/special/distributed.md)). Пользователь, который используется для обмена информацией между серверами, объединенными в кластер, не должен иметь существенных ограничений или квот - иначе распределённые запросы сломаются. diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 4888f2b418e..601eaa4d6d9 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -1,6 +1,6 @@ # Резервное копирование данных {#rezervnoe-kopirovanie-dannykh} -[Репликация](table_engines/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. +[Репликация](../engines/table_engines/mergetree_family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. Для того чтобы эффективно уменьшить возможные человеческие ошибки, следует тщательно подготовить стратегию резервного копирования и восстановления данных **заранее**. @@ -15,11 +15,11 @@ ## Снимки файловой системы {#snimki-failovoi-sistemy} -Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](table_engines/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным. +Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](../engines/table_engines/special/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным. ## clickhouse-copier {#clickhouse-copier} -[clickhouse-copier](utils/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse. +[clickhouse-copier](utilities/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse. Для небольших объёмов данных можно применять `INSERT INTO ... SELECT ...` в удалённые таблицы. @@ -27,7 +27,7 @@ ClickHouse позволяет использовать запрос `ALTER TABLE ... FREEZE PARTITION ...` для создания локальной копии партиций таблицы. Это реализуется с помощью жестких ссылок (hardlinks) на каталог `/var/lib/clickhouse/shadow/`, поэтому такая копия обычно не занимает дополнительное место на диске для старых данных. Созданные копии файлов не обрабатываются сервером ClickHouse, поэтому вы можете просто оставить их там: у вас будет простая резервная копия, которая не требует дополнительной внешней системы, однако при аппаратных проблемах вы можете утратить и актуальные данные и сохраненную копию. По этой причине, лучше удаленно скопировать их в другое место, а затем удалить локальную копию. Распределенные файловые системы и хранилища объектов по-прежнему являются хорошими вариантами для этого, однако можно использовать и обычные присоединенные файловые серверы с достаточно большой ёмкостью (в этом случае передача будет происходить через сетевую файловую систему или, возможно, [rsync](https://en.wikipedia.org/wiki/Rsync)). -Дополнительные сведения о запросах, связанных с манипуляциями партициями, см. в разделе [ALTER](../query_language/alter.md#alter_manipulations-with-partitions). +Дополнительные сведения о запросах, связанных с манипуляциями партициями, см. в разделе [ALTER](../sql_reference/statements/alter.md#alter_manipulations-with-partitions). Для автоматизации этого подхода доступен инструмент от сторонних разработчиков: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). diff --git a/docs/ru/operations/configuration_files.md b/docs/ru/operations/configuration_files.md index 0bcae78a128..19f7ea9d5db 100644 --- a/docs/ru/operations/configuration_files.md +++ b/docs/ru/operations/configuration_files.md @@ -12,7 +12,7 @@ Если указано `remove` - удалить элемент. -Также в конфиге могут быть указаны «подстановки». Если у элемента присутствует атрибут `incl`, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию, путь к файлу с подстановками - `/etc/metrika.xml`. Он может быть изменён в конфигурации сервера в элементе [include\_from](server_settings/settings.md#server_settings-include_from). Значения подстановок указываются в элементах `/yandex/имя_подстановки` этого файла. Если подстановка, заданная в `incl` отсутствует, то в лог попадает соответствующая запись. Чтобы ClickHouse не писал в лог об отсутствии подстановки, необходимо указать атрибут `optional="true"` (например, настройка [macros](server_settings/settings.md)). +Также в конфиге могут быть указаны «подстановки». Если у элемента присутствует атрибут `incl`, то в качестве значения будет использована соответствующая подстановка из файла. По умолчанию, путь к файлу с подстановками - `/etc/metrika.xml`. Он может быть изменён в конфигурации сервера в элементе [include\_from](server_configuration_parameters/settings.md#server_configuration_parameters-include_from). Значения подстановок указываются в элементах `/yandex/имя_подстановки` этого файла. Если подстановка, заданная в `incl` отсутствует, то в лог попадает соответствующая запись. Чтобы ClickHouse не писал в лог об отсутствии подстановки, необходимо указать атрибут `optional="true"` (например, настройка [macros](server_configuration_parameters/settings.md)). Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент. diff --git a/docs/ru/operations/index.md b/docs/ru/operations/index.md index 2e85f1bf816..3df5dbb1f7e 100644 --- a/docs/ru/operations/index.md +++ b/docs/ru/operations/index.md @@ -12,7 +12,7 @@ - [Конфигурационные файлы](configuration_files.md) - [Квоты](quotas.md) - [Системные таблицы](system_tables.md) - - [Конфигурационные параметры сервера](server_settings/index.md) + - [Конфигурационные параметры сервера](server_configuration_parameters/index.md) - [Тестирование севреров с помощью ClickHouse](performance_test.md) - [Настройки](settings/index.md) - [Утилиты](utils/index.md) diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index 8681261bf6f..469d712376b 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -21,7 +21,7 @@ ClickHouse не отслеживает состояние аппаратных Сервер ClickHouse имеет встроенные инструменты мониторинга. -Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](server_settings/settings.md#server_settings-logger). +Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](server_configuration_parameters/settings.md#server_configuration_parameters-logger). ClickHouse собирает: @@ -30,7 +30,7 @@ ClickHouse собирает: Метрики находятся в таблицах [system.metrics](system_tables.md#system_tables-metrics), [system.events](system_tables.md#system_tables-events) и [system.asynchronous\_metrics](system_tables.md#system_tables-asynchronous_metrics). -Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_settings/settings.md#server_settings-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). +Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_configuration_parameters/settings.md#server_configuration_parameters-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/ping`. Если сервер доступен, он отвечает `200 OK`. diff --git a/docs/ru/operations/optimizing_performance/index.md b/docs/ru/operations/optimizing_performance/index.md new file mode 100644 index 00000000000..4f4cbb66d24 --- /dev/null +++ b/docs/ru/operations/optimizing_performance/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Optimizing Performance +toc_priority: 52 +--- + diff --git a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md new file mode 100644 index 00000000000..d2cc9738749 --- /dev/null +++ b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md @@ -0,0 +1,62 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +# Выборки Профилировщик Запросов {#sampling-query-profiler} + +ClickHouse запускает профилировщик выборок, который позволяет анализировать выполнение запросов. С помощью profiler можно найти подпрограммы исходного кода, которые наиболее часто используются во время выполнения запроса. Вы можете отслеживать процессорное время и время работы настенных часов, включая время простоя. + +Чтобы использовать профилировщик: + +- Настройка программы [журнал трассировки](../server_configuration_parameters/settings.md#server_configuration_parameters-trace_log) раздел конфигурации сервера. + + В этом разделе настраиваются следующие параметры: [журнал трассировки](../../operations/optimizing_performance/sampling_query_profiler.md#system_tables-trace_log) системная таблица, содержащая результаты работы профилировщика. Он настроен по умолчанию. Помните, что данные в этой таблице действительны только для работающего сервера. После перезагрузки сервера ClickHouse не очищает таблицу, и все сохраненные адреса виртуальной памяти могут стать недействительными. + +- Настройка программы [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) или [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) настройки. Обе настройки можно использовать одновременно. + + Эти параметры позволяют настроить таймеры профилировщика. Поскольку это параметры сеанса, вы можете получить различную частоту дискретизации для всего сервера, отдельных пользователей или профилей пользователей, для вашего интерактивного сеанса и для каждого отдельного запроса. + +Частота дискретизации по умолчанию составляет одну выборку в секунду, и включены как ЦП, так и реальные таймеры. Эта частота позволяет собрать достаточно информации о кластере ClickHouse. В то же время, работая с такой частотой, профилировщик не влияет на производительность сервера ClickHouse. Если вам нужно профилировать каждый отдельный запрос, попробуйте использовать более высокую частоту дискретизации. + +Для того чтобы проанализировать `trace_log` системная таблица: + +- Установите устройство `clickhouse-common-static-dbg` пакет. Видеть [Установка из пакетов DEB](../../getting_started/install.md#install-from-deb-packages). + +- Разрешить функции самоанализа с помощью [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) установка. + + По соображениям безопасности функции самоанализа по умолчанию отключены. + +- Используйте `addressToLine`, `addressToSymbol` и `demangle` [функции самоанализа](../../operations/optimizing_performance/sampling_query_profiler.md) чтобы получить имена функций и их позиции в коде ClickHouse. Чтобы получить профиль для какого-либо запроса, вам необходимо агрегировать данные из `trace_log` стол. Вы можете агрегировать данные по отдельным функциям или по всем трассировкам стека. + +Если вам нужно визуализировать `trace_log` информация, попробуйте [огнемет](../../interfaces/third-party/gui/#clickhouse-flamegraph) и [speedscope](https://github.com/laplab/clickhouse-speedscope). + +## Пример {#example} + +В этом примере мы: + +- Фильтрация `trace_log` данные по идентификатору запроса и текущей дате. + +- Агрегирование по трассировке стека. + +- Используя функции интроспекции, мы получим отчет о: + + - Имена символов и соответствующие им функции исходного кода. + - Расположение исходных кодов этих функций. + + + +``` sql +SELECT + count(), + arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym +FROM system.trace_log +WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) +GROUP BY trace +ORDER BY count() DESC +LIMIT 10 +``` + +``` text +{% include "operations/performance/sampling_query_profiler_example_result.txt" %} +``` diff --git a/docs/ru/operations/performance/sampling_query_profiler.md b/docs/ru/operations/performance/sampling_query_profiler.md deleted file mode 100644 index 25368fcd883..00000000000 --- a/docs/ru/operations/performance/sampling_query_profiler.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -en_copy: true ---- - -# Sampling Query Profiler {#sampling-query-profiler} - -ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time. - -To use profiler: - -- Setup the [trace\_log](../server_settings/settings.md#server_settings-trace_log) section of the server configuration. - - This section configures the [trace\_log](../system_tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid. - -- Setup the [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. - - These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. - -The default sampling frequency is one sample per second and both CPU and real timers are enabled. This frequency allows collecting enough information about ClickHouse cluster. At the same time, working with this frequency, profiler doesn’t affect ClickHouse server’s performance. If you need to profile each individual query try to use higher sampling frequency. - -To analyze the `trace_log` system table: - -- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting_started/install.md#install-from-deb-packages). - -- Allow introspection functions by the [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) setting. - - For security reasons, introspection functions are disabled by default. - -- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../query_language/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. - -If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). - -## Example {#example} - -In this example we: - -- Filtering `trace_log` data by a query identifier and the current date. - -- Aggregating by stack trace. - -- Using introspection functions, we will get a report of: - - - Names of symbols and corresponding source code functions. - - Source code locations of these functions. - - - -``` sql -SELECT - count(), - arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym -FROM system.trace_log -WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) -GROUP BY trace -ORDER BY count() DESC -LIMIT 10 -``` - -``` text -{% include "operations/performance/sampling_query_profiler_example_result.txt" %} -``` diff --git a/docs/ru/operations/performance/sampling_query_profiler_example_result.txt b/docs/ru/operations/performance/sampling_query_profiler_example_result.txt index a5f6d71ca95..56c2fdf9c65 100644 --- a/docs/ru/operations/performance/sampling_query_profiler_example_result.txt +++ b/docs/ru/operations/performance/sampling_query_profiler_example_result.txt @@ -1,7 +1,3 @@ ---- -en_copy: true ---- - Row 1: ────── count(): 6344 diff --git a/docs/ru/operations/performance_test.md b/docs/ru/operations/performance_test.md index ae4c5752703..391bcddd412 100644 --- a/docs/ru/operations/performance_test.md +++ b/docs/ru/operations/performance_test.md @@ -1,18 +1,19 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# How To Test Your Hardware With ClickHouse {#how-to-test-your-hardware-with-clickhouse} +# Как Протестировать Ваше Оборудование С Помощью ClickHouse {#how-to-test-your-hardware-with-clickhouse} -With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages. +С помощью этой инструкции вы можете запустить базовый тест производительности ClickHouse на любом сервере без установки пакетов ClickHouse. -1. Go to “commits” page: https://github.com/ClickHouse/ClickHouse/commits/master +1. Идти к «commits» страница: https://github.com/ClickHouse/ClickHouse/commits/master -2. Click on the first green check mark or red cross with green “ClickHouse Build Check” and click on the “Details” link near “ClickHouse Build Check”. +2. Нажмите на первую зеленую галочку или красный крест с зеленым цветом «ClickHouse Build Check» и нажмите на кнопку «Details» ссылка рядом «ClickHouse Build Check». -3. Copy the link to “clickhouse” binary for amd64 or aarch64. +3. Скопируйте ссылку на «clickhouse» двоичный код для amd64 или aarch64. -4. ssh to the server and download it with wget: +4. ssh к серверу и скачать его с помощью wget: @@ -23,7 +24,7 @@ With this instruction you can run basic ClickHouse performance test on any serve # Then do: chmod a+x clickhouse -1. Download configs: +1. Скачать конфиги: @@ -33,7 +34,7 @@ With this instruction you can run basic ClickHouse performance test on any serve wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml -1. Download benchmark files: +1. Скачать тест файлы: @@ -41,7 +42,7 @@ With this instruction you can run basic ClickHouse performance test on any serve chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql -1. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction (“hits” table containing 100 million rows). +1. Загрузите тестовые данные в соответствии с [Яндекс.Набор метрика ](../getting_started/example_datasets/metrica.md) инструкция («hits» таблица, содержащая 100 миллионов строк). @@ -49,31 +50,31 @@ With this instruction you can run basic ClickHouse performance test on any serve tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . -1. Run the server: +1. Запустите сервер: ./clickhouse server -1. Check the data: ssh to the server in another terminal +1. Проверьте данные: ssh на сервер в другом терминале ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 -1. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. +1. Отредактируйте текст benchmark-new.sh, изменение «clickhouse-client» к «./clickhouse client» и добавить «–max\_memory\_usage 100000000000» параметр. mcedit benchmark-new.sh -1. Run the benchmark: +1. Выполнить тест: ./benchmark-new.sh hits_100m_obfuscated -1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +1. Отправьте номера и информацию о конфигурации вашего оборудования по адресу clickhouse-feedback@yandex-team.com -All the results are published here: https://clickhouse.tech/benchmark\_hardware.html +Все результаты опубликованы здесь: https://clickhouse-да.технология / benchmark\_hardware.HTML diff --git a/docs/ru/operations/requirements.md b/docs/ru/operations/requirements.md index 9fafe59343f..e6bc2f15e43 100644 --- a/docs/ru/operations/requirements.md +++ b/docs/ru/operations/requirements.md @@ -17,9 +17,9 @@ ClickHouse реализует параллельную обработку дан - Сложности запросов. - Объёма данных, обрабатываемых в запросах. -Для расчета объёма RAM необходимо оценить размер промежуточных данных для операций [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) а также других операций, которыми вы пользуетесь. +Для расчета объёма RAM необходимо оценить размер промежуточных данных для операций [GROUP BY](../sql_reference/statements/select.md#select-group-by-clause), [DISTINCT](../sql_reference/statements/select.md#select-distinct), [JOIN](../sql_reference/statements/select.md#select-join) а также других операций, которыми вы пользуетесь. -ClickHouse может использовать внешнюю память для промежуточных данных. Подробнее смотрите в разделе [GROUP BY во внешней памяти](../query_language/select.md#select-group-by-in-external-memory). +ClickHouse может использовать внешнюю память для промежуточных данных. Подробнее смотрите в разделе [GROUP BY во внешней памяти](../sql_reference/statements/select.md#select-group-by-in-external-memory). ## Файл подкачки {#fail-podkachki} diff --git a/docs/ru/operations/server_settings/index.md b/docs/ru/operations/server_configuration_parameters/index.md similarity index 93% rename from docs/ru/operations/server_settings/index.md rename to docs/ru/operations/server_configuration_parameters/index.md index dae21c2b475..91deb2973a7 100644 --- a/docs/ru/operations/server_settings/index.md +++ b/docs/ru/operations/server_configuration_parameters/index.md @@ -8,4 +8,4 @@ Перед изучением настроек ознакомьтесь с разделом [Конфигурационные файлы](../configuration_files.md#configuration_files), обратите внимание на использование подстановок (атрибуты `incl` и `optional`). -[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_settings/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_configuration_parameters/) diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_configuration_parameters/settings.md similarity index 84% rename from docs/ru/operations/server_settings/settings.md rename to docs/ru/operations/server_configuration_parameters/settings.md index a062f13c400..16f00a82016 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_configuration_parameters/settings.md @@ -58,7 +58,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat База данных по умолчанию. -Перечень баз данных можно получить запросом [SHOW DATABASES](../../query_language/show.md#show-databases). +Перечень баз данных можно получить запросом [SHOW DATABASES](../../operations/server_configuration_parameters/settings.md#show-databases). **Пример** @@ -87,7 +87,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - Указывается абсолютным или относительно конфигурационного файла сервера. - Может содержать wildcard-ы \* и ?. -Смотрите также «[Внешние словари](../../query_language/dicts/external_dicts.md)». +Смотрите также «[Внешние словари](../../operations/server_configuration_parameters/settings.md)». **Пример** @@ -111,7 +111,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat true ``` -## format\_schema\_path {#server_settings-format_schema_path} +## format\_schema\_path {#server_configuration_parameters-format_schema_path} Путь к каталогу со схемами для входных данных. Например со схемами для формата [CapnProto](../../interfaces/formats.md#capnproto). @@ -122,7 +122,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat format_schemas/ ``` -## graphite {#server_settings-graphite} +## graphite {#server_configuration_parameters-graphite} Отправка данных в [Graphite](https://github.com/graphite-project). @@ -133,10 +133,10 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - interval – Период отправки в секундах. - timeout – Таймаут отправки данных в секундах. - root\_path – Префикс для ключей. -- metrics – Отправка данных из таблицы [system.metrics](../system_tables.md#system_tables-metrics). -- events – Отправка дельты данных, накопленной за промежуток времени из таблицы [system.events](../system_tables.md#system_tables-events). -- events\_cumulative – Отправка суммарных данных из таблицы [system.events](../system_tables.md#system_tables-events). -- asynchronous\_metrics – Отправка данных из таблицы [system.asynchronous\_metrics](../system_tables.md#system_tables-asynchronous_metrics). +- metrics – Отправка данных из таблицы [system.metrics](../../operations/server_configuration_parameters/settings.md#system_tables-metrics). +- events – Отправка дельты данных, накопленной за промежуток времени из таблицы [system.events](../../operations/server_configuration_parameters/settings.md#system_tables-events). +- events\_cumulative – Отправка суммарных данных из таблицы [system.events](../../operations/server_configuration_parameters/settings.md#system_tables-events). +- asynchronous\_metrics – Отправка данных из таблицы [system.asynchronous\_metrics](../../operations/server_configuration_parameters/settings.md#system_tables-asynchronous_metrics). Можно определить несколько секций ``, например, для передачи различных данных с различной частотой. @@ -156,11 +156,11 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## graphite\_rollup {#server_settings-graphite-rollup} +## graphite\_rollup {#server_configuration_parameters-graphite-rollup} Настройка прореживания данных для Graphite. -Подробнее читайте в разделе [GraphiteMergeTree](../table_engines/graphitemergetree.md). +Подробнее читайте в разделе [GraphiteMergeTree](../../operations/server_configuration_parameters/settings.md). **Пример** @@ -188,7 +188,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat Порт для обращений к серверу по протоколу HTTP(s). -Если указан `https_port`, то требуется конфигурирование [openSSL](#server_settings-openssl). +Если указан `https_port`, то требуется конфигурирование [openSSL](#server_configuration_parameters-openssl). Если указан `http_port`, то настройка openSSL игнорируется, даже если она задана. @@ -198,7 +198,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat 0000 ``` -## http\_server\_default\_response {#server_settings-http_server_default_response} +## http\_server\_default\_response {#server_configuration_parameters-http_server_default_response} Страница, показываемая по умолчанию, при обращении к HTTP(s) серверу ClickHouse. Значение по умолчанию «Ok.» (с переводом строки на конце). @@ -213,7 +213,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## include\_from {#server_settings-include_from} +## include\_from {#server_configuration_parameters-include_from} Путь к файлу с подстановками. @@ -251,7 +251,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## interserver\_http\_credentials {#server-settings-interserver-http-credentials} -Имя пользователя и пароль, использующиеся для аутентификации при [репликации](../table_engines/replication.md) движками Replicated\*. Это имя пользователя и пароль используются только для взаимодействия между репликами кластера и никак не связаны с аутентификацией клиентов ClickHouse. Сервер проверяет совпадение имени и пароля для соединяющихся с ним реплик, а также использует это же имя и пароль для соединения с другими репликами. Соответственно, эти имя и пароль должны быть прописаны одинаковыми для всех реплик кластера. +Имя пользователя и пароль, использующиеся для аутентификации при [репликации](../../operations/server_configuration_parameters/settings.md) движками Replicated\*. Это имя пользователя и пароль используются только для взаимодействия между репликами кластера и никак не связаны с аутентификацией клиентов ClickHouse. Сервер проверяет совпадение имени и пароля для соединяющихся с ним реплик, а также использует это же имя и пароль для соединения с другими репликами. Соответственно, эти имя и пароль должны быть прописаны одинаковыми для всех реплик кластера. По умолчанию аутентификация не используется. Раздел содержит следующие параметры: @@ -278,7 +278,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat 3 ``` -## listen\_host {#server_settings-listen_host} +## listen\_host {#server_configuration_parameters-listen_host} Ограничение по хостам, с которых может прийти запрос. Если необходимо, чтобы сервер отвечал всем, то надо указать `::`. @@ -289,7 +289,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat 127.0.0.1 ``` -## logger {#server_settings-logger} +## logger {#server_configuration_parameters-logger} Настройки логирования. @@ -342,7 +342,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat Можно не указывать, если реплицируемых таблицы не используются. -Подробнее смотрите в разделе «[Создание реплицируемых таблиц](../../operations/table_engines/replication.md)». +Подробнее смотрите в разделе «[Создание реплицируемых таблиц](../../operations/server_configuration_parameters/settings.md)». **Пример** @@ -352,7 +352,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ## mark\_cache\_size {#server-mark-cache-size} -Приблизительный размер (в байтах) кэша засечек, используемых движками таблиц семейства [MergeTree](../table_engines/mergetree.md). +Приблизительный размер (в байтах) кэша засечек, используемых движками таблиц семейства [MergeTree](../../operations/server_configuration_parameters/settings.md). Кэш общий для сервера, память выделяется по мере необходимости. @@ -400,7 +400,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat Ограничение на удаление таблиц. -Если размер таблицы семейства [MergeTree](../table_engines/mergetree.md) превышает `max_table_size_to_drop` (в байтах), то ее нельзя удалить запросом DROP. +Если размер таблицы семейства [MergeTree](../../operations/server_configuration_parameters/settings.md) превышает `max_table_size_to_drop` (в байтах), то ее нельзя удалить запросом DROP. Если таблицу все же необходимо удалить, не перезапуская при этом сервер ClickHouse, то необходимо создать файл `/flags/force_drop_table` и выполнить запрос DROP. @@ -414,9 +414,9 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat 0 ``` -## merge\_tree {#server_settings-merge_tree} +## merge\_tree {#server_configuration_parameters-merge_tree} -Тонкая настройка таблиц семейства [MergeTree](../table_engines/mergetree.md). +Тонкая настройка таблиц семейства [MergeTree](../../operations/server_configuration_parameters/settings.md). Подробнее смотрите в заголовочном файле MergeTreeSettings.h. @@ -428,7 +428,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## openSSL {#server_settings-openssl} +## openSSL {#server_configuration_parameters-openssl} Настройки клиента/сервера SSL. @@ -487,17 +487,17 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## part\_log {#server_settings-part-log} +## part\_log {#server_configuration_parameters-part-log} -Логирование событий, связанных с данными типа [MergeTree](../table_engines/mergetree.md). Например, события добавления или мержа данных. Лог можно использовать для симуляции алгоритмов слияния, чтобы сравнивать их характеристики. Также, можно визуализировать процесс слияния. +Логирование событий, связанных с данными типа [MergeTree](../../operations/server_configuration_parameters/settings.md). Например, события добавления или мержа данных. Лог можно использовать для симуляции алгоритмов слияния, чтобы сравнивать их характеристики. Также, можно визуализировать процесс слияния. -Запросы логируются не в отдельный файл, а в таблицу [system.part\_log](../system_tables.md#system_tables-part-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются не в отдельный файл, а в таблицу [system.part\_log](../../operations/server_configuration_parameters/settings.md#system_tables-part-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). При настройке логирования используются следующие параметры: - `database` — имя базы данных; - `table` — имя таблицы; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/table_engines/custom_partitioning_key.md); +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server_configuration_parameters/settings.md); - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. **Пример** @@ -511,7 +511,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## path {#server_settings-path} +## path {#server_configuration_parameters-path} Путь к каталогу с данными. @@ -524,17 +524,17 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat /var/lib/clickhouse/ ``` -## query\_log {#server_settings-query-log} +## query\_log {#server_configuration_parameters-query-log} Настройка логирования запросов, принятых с настройкой [log\_queries=1](../settings/settings.md). -Запросы логируются не в отдельный файл, а в системную таблицу [system.query\_log](../system_tables.md#system_tables-query-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются не в отдельный файл, а в системную таблицу [system.query\_log](../../operations/server_configuration_parameters/settings.md#system_tables-query-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). При настройке логирования используются следующие параметры: - `database` — имя базы данных; - `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — [произвольный ключ партиционирования](../../operations/table_engines/custom_partitioning_key.md) для таблицы с логами; +- `partition_by` — [произвольный ключ партиционирования](../../operations/server_configuration_parameters/settings.md) для таблицы с логами; - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -550,17 +550,17 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## query\_thread\_log {#server_settings-query-thread-log} +## query\_thread\_log {#server_configuration_parameters-query-thread-log} Настройка логирования потоков выполнения запросов, принятых с настройкой [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads). -Запросы логируются не в отдельный файл, а в системную таблицу [system.query\_thread\_log](../system_tables.md#system_tables-query-thread-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются не в отдельный файл, а в системную таблицу [system.query\_thread\_log](../../operations/server_configuration_parameters/settings.md#system_tables-query-thread-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). При настройке логирования используются следующие параметры: - `database` — имя базы данных; - `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — [произвольный ключ партиционирования](../../operations/table_engines/custom_partitioning_key.md) для таблицы с логами; +- `partition_by` — [произвольный ключ партиционирования](../../operations/server_configuration_parameters/settings.md) для таблицы с логами; - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -576,15 +576,15 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## trace\_log {#server_settings-trace_log} +## trace\_log {#server_configuration_parameters-trace_log} -Settings for the [trace\_log](../system_tables.md#system_tables-trace_log) system table operation. +Settings for the [trace\_log](../../operations/server_configuration_parameters/settings.md#system_tables-trace_log) system table operation. Parameters: - `database` — Database for storing a table. - `table` — Table name. -- `partition_by` — [Custom partitioning key](../../operations/table_engines/custom_partitioning_key.md) for a system table. +- `partition_by` — [Custom partitioning key](../../operations/server_configuration_parameters/settings.md) for a system table. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. The default server configuration file `config.xml` contains the following settings section: @@ -600,7 +600,7 @@ The default server configuration file `config.xml` contains the following settin ## remote\_servers {#server-settings-remote-servers} -Конфигурация кластеров, которые использует движок таблиц [Distributed](../../operations/table_engines/distributed.md) и табличная функция `cluster`. +Конфигурация кластеров, которые использует движок таблиц [Distributed](../../operations/server_configuration_parameters/settings.md) и табличная функция `cluster`. **Пример** @@ -614,7 +614,7 @@ The default server configuration file `config.xml` contains the following settin - [skip\_unavailable\_shards](../settings/settings.md#settings-skip_unavailable_shards) -## timezone {#server_settings-timezone} +## timezone {#server_configuration_parameters-timezone} Временная зона сервера. @@ -628,7 +628,7 @@ The default server configuration file `config.xml` contains the following settin Europe/Moscow ``` -## tcp\_port {#server_settings-tcp_port} +## tcp\_port {#server_configuration_parameters-tcp_port} Порт для взаимодействия с клиентами по протоколу TCP. @@ -638,9 +638,9 @@ The default server configuration file `config.xml` contains the following settin 9000 ``` -## tcp\_port\_secure {#server_settings-tcp_port-secure} +## tcp\_port\_secure {#server_configuration_parameters-tcp_port-secure} -TCP порт для защищённого обмена данными с клиентами. Используйте с настройкой [OpenSSL](#server_settings-openssl). +TCP порт для защищённого обмена данными с клиентами. Используйте с настройкой [OpenSSL](#server_configuration_parameters-openssl). **Возможные значения** @@ -652,7 +652,7 @@ TCP порт для защищённого обмена данными с кли 9440 ``` -## mysql\_port {#server_settings-mysql_port} +## mysql\_port {#server_configuration_parameters-mysql_port} Порт для взаимодействия с клиентами по протоколу MySQL. @@ -677,7 +677,7 @@ TCP порт для защищённого обмена данными с кли ## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size} -Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства [MergeTree](../table_engines/mergetree.md). +Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства [MergeTree](../../operations/server_configuration_parameters/settings.md). Кеш единый для сервера. Память выделяется по требованию. Кеш используется в том случае, если включена опция [use\_uncompressed\_cache](../settings/settings.md). @@ -689,9 +689,9 @@ TCP порт для защищённого обмена данными с кли 8589934592 ``` -## user\_files\_path {#server_settings-user_files_path} +## user\_files\_path {#server_configuration_parameters-user_files_path} -Каталог с пользовательскими файлами. Используется в табличной функции [file()](../../query_language/table_functions/file.md). +Каталог с пользовательскими файлами. Используется в табличной функции [file()](../../operations/server_configuration_parameters/settings.md). **Пример** @@ -763,7 +763,7 @@ ClickHouse использует ZooKeeper для хранения метадан **Смотрите также** -- [Репликация](../../operations/table_engines/replication.md) +- [Репликация](../../operations/server_configuration_parameters/settings.md) - [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) ## use\_minimalistic\_part\_header\_in\_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} @@ -772,20 +772,20 @@ ClickHouse использует ZooKeeper для хранения метадан Параметр применяется только к семейству таблиц `MergeTree`. Его можно установить: -- Глобально в разделе [merge\_tree](#server_settings-merge_tree) файла `config.xml`. +- Глобально в разделе [merge\_tree](#server_configuration_parameters-merge_tree) файла `config.xml`. ClickHouse использует этот параметр для всех таблиц на сервере. Вы можете изменить настройку в любое время. Существующие таблицы изменяют свое поведение при изменении параметра. - Для каждой отдельной таблицы. - При создании таблицы укажите соответствующую [настройку движка](../table_engines/mergetree.md#table_engine-mergetree-creating-a-table). Поведение существующей таблицы с установленным параметром не изменяется даже при изменении глобального параметра. + При создании таблицы укажите соответствующую [настройку движка](../../operations/server_configuration_parameters/settings.md#table_engine-mergetree-creating-a-table). Поведение существующей таблицы с установленным параметром не изменяется даже при изменении глобального параметра. **Возможные значения** - 0 — функциональность выключена. - 1 — функциональность включена. -Если `use_minimalistic_part_header_in_zookeeper = 1`, то [реплицированные](../table_engines/replication.md) таблицы хранят заголовки кусков данных в компактном виде, используя только одну `znode`. Если таблица содержит много столбцов, этот метод хранения значительно уменьшает объём данных, хранящихся в Zookeeper. +Если `use_minimalistic_part_header_in_zookeeper = 1`, то [реплицированные](../../operations/server_configuration_parameters/settings.md) таблицы хранят заголовки кусков данных в компактном виде, используя только одну `znode`. Если таблица содержит много столбцов, этот метод хранения значительно уменьшает объём данных, хранящихся в Zookeeper. !!! attention "Внимание" После того как вы установили `use_minimalistic_part_header_in_zookeeper = 1`, невозможно откатить ClickHouse до версии, которая не поддерживает этот параметр. Будьте осторожны при обновлении ClickHouse на серверах в кластере. Не обновляйте все серверы сразу. Безопаснее проверять новые версии ClickHouse в тестовой среде или только на некоторых серверах кластера. @@ -808,4 +808,4 @@ ClickHouse использует ZooKeeper для хранения метадан **Значение по умолчанию**: 15. -[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_settings/settings/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/server_configuration_parameters/settings/) diff --git a/docs/ru/operations/settings/query_complexity.md b/docs/ru/operations/settings/query_complexity.md index 94791d79420..5ad28eed0a8 100644 --- a/docs/ru/operations/settings/query_complexity.md +++ b/docs/ru/operations/settings/query_complexity.md @@ -76,11 +76,11 @@ ## max\_bytes\_before\_external\_group\_by {#settings-max_bytes_before_external_group_by} -Включает или отключает выполнение секций `GROUP BY` во внешней памяти. Смотрите [GROUP BY во внешней памяти](../../query_language/select.md#select-group-by-in-external-memory). +Включает или отключает выполнение секций `GROUP BY` во внешней памяти. Смотрите [GROUP BY во внешней памяти](../../sql_reference/statements/select.md#select-group-by-in-external-memory). Возможные значения: -- Максимальный объём RAM (в байтах), который может использовать отдельная операция [GROUP BY](../../query_language/select.md#select-group-by-clause). +- Максимальный объём RAM (в байтах), который может использовать отдельная операция [GROUP BY](../../sql_reference/statements/select.md#select-group-by-clause). - 0 — `GROUP BY` во внешней памяти отключен. Значение по умолчанию — 0. @@ -228,7 +228,7 @@ FORMAT Null; Ограничивает количество строк в хэш-таблице, используемой при соединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../query_language/select.md#select-join) и к движку таблиц [Join](../table_engines/join.md). +Параметр применяется к операциям [SELECT… JOIN](../../sql_reference/statements/select.md#select-join) и к движку таблиц [Join](../../engines/table_engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. @@ -245,7 +245,7 @@ FORMAT Null; Ограничивает размер (в байтах) хэш-таблицы, используемой при объединении таблиц. -Параметр применяется к операциям [SELECT… JOIN](../../query_language/select.md#select-join) и к движку таблиц [Join](../table_engines/join.md). +Параметр применяется к операциям [SELECT… JOIN](../../sql_reference/statements/select.md#select-join) и к движку таблиц [Join](../../engines/table_engines/special/join.md). Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата. @@ -274,8 +274,8 @@ FORMAT Null; **Смотрите также** -- [Секция JOIN](../../query_language/select.md#select-join) -- [Движок таблиц Join](../table_engines/join.md) +- [Секция JOIN](../../sql_reference/statements/select.md#select-join) +- [Движоy таблиц Join](../../engines/table_engines/special/join.md) ## max\_partitions\_per\_insert\_block {#max-partitions-per-insert-block} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 42b504086d0..94169b212a1 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2,7 +2,7 @@ ## distributed\_product\_mode {#distributed-product-mode} -Изменяет поведение [распределенных подзапросов](../../query_language/select.md). +Изменяет поведение [распределенных подзапросов](../../sql_reference/statements/select.md). ClickHouse применяет настройку в тех случаях, когда запрос содержит произведение распределённых таблиц, т.е. когда запрос к распределенной таблице содержит не-GLOBAL подзапрос к также распределенной таблице. @@ -11,7 +11,7 @@ ClickHouse применяет настройку в тех случаях, ко - Только подзапросы для IN, JOIN. - Только если в секции FROM используется распределённая таблица, содержащая более одного шарда. - Если подзапрос касается распределенной таблицы, содержащей более одного шарда. -- Не используется в случае табличной функции [remote](../../query_language/table_functions/remote.md). +- Не используется в случае табличной функции [remote](../../sql_reference/table_functions/remote.md). Возможные значения: @@ -46,7 +46,7 @@ ClickHouse применяет настройку в тех случаях, ко ## fallback\_to\_stale\_replicas\_for\_distributed\_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} -Форсирует запрос в устаревшую реплику в случае, если актуальные данные недоступны. См. [Репликация](../../operations/table_engines/replication.md). +Форсирует запрос в устаревшую реплику в случае, если актуальные данные недоступны. См. [Репликация](../../engines/table_engines/mergetree_family/replication.md). Из устаревших реплик таблицы ClickHouse выбирает наиболее актуальную. @@ -60,7 +60,7 @@ ClickHouse применяет настройку в тех случаях, ко Работает с таблицами семейства MergeTree. -При `force_index_by_date=1` ClickHouse проверяет, есть ли в запросе условие на ключ даты, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Например, условие `Date != '2000-01-01'` подходит даже в том случае, когда соответствует всем данным в таблице (т.е. для выполнения запроса требуется full scan). Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе [MergeTree](../table_engines/mergetree.md). +При `force_index_by_date=1` ClickHouse проверяет, есть ли в запросе условие на ключ даты, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Например, условие `Date != '2000-01-01'` подходит даже в том случае, когда соответствует всем данным в таблице (т.е. для выполнения запроса требуется full scan). Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). ## force\_primary\_key {#settings-force-primary-key} @@ -68,7 +68,7 @@ ClickHouse применяет настройку в тех случаях, ко Работает с таблицами семейства MergeTree. -При `force_primary_key=1` ClickHouse проверяет, есть ли в запросе условие на первичный ключ, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе [MergeTree](../table_engines/mergetree.md). +При `force_primary_key=1` ClickHouse проверяет, есть ли в запросе условие на первичный ключ, которое может использоваться для отсечения диапазонов данных. Если подходящего условия нет - кидается исключение. При этом не проверяется, действительно ли условие уменьшает объём данных для чтения. Подробнее про диапазоны данных в таблицах MergeTree читайте в разделе [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). ## format\_schema {#format-schema} @@ -129,7 +129,7 @@ ClickHouse применяет настройку в тех случаях, ко ## max\_http\_get\_redirects {#setting-max_http_get_redirects} -Ограничивает максимальное количество переходов по редиректам в таблицах с движком [URL](../table_engines/url.md) при выполнении HTTP запросов методом GET. Настройка применяется для обоих типов таблиц: созданных запросом [CREATE TABLE](../../query_language/create/#create-table-query) и с помощью табличной функции [url](../../query_language/table_functions/url.md). +Ограничивает максимальное количество переходов по редиректам в таблицах с движком [URL](../../engines/table_engines/special/url.md) при выполнении HTTP запросов методом GET. Настройка применяется для обоих типов таблиц: созданных запросом [CREATE TABLE](../../sql_reference/create/#create-table-query) и с помощью табличной функции [url](../../sql_reference/table_functions/url.md). Возможные значения: @@ -165,7 +165,7 @@ ClickHouse применяет настройку в тех случаях, ко ## input\_format\_values\_interpret\_expressions {#settings-input_format_values_interpret_expressions} -Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../query_language/syntax.md). +Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql_reference/syntax.md). Возможные значения: @@ -181,7 +181,7 @@ ClickHouse применяет настройку в тех случаях, ко Пример использования: -Вставим значение типа [DateTime](../../data_types/datetime.md) при разных значения настройки. +Вставим значение типа [DateTime](../../sql_reference/data_types/datetime.md) при разных значения настройки. ``` sql SET input_format_values_interpret_expressions = 0; @@ -298,7 +298,7 @@ Ok. Выбор парсера для текстового представления дат и времени при обработке входного формата. -Настройка не применяется к [функциям для работы с датой и временем](../../query_language/functions/date_time_functions.md). +Настройка не применяется к [функциям для работы с датой и временем](../../sql_reference/functions/date_time_functions.md). Возможные значения: @@ -314,12 +314,12 @@ Ok. См. также: -- [Тип данных DateTime.](../../data_types/datetime.md) -- [Функции для работы с датой и временем.](../../query_language/functions/date_time_functions.md) +- [Тип данных DateTime.](../../sql_reference/data_types/datetime.md) +- [Функции для работы с датой и временем.](../../sql_reference/functions/date_time_functions.md) ## join\_default\_strictness {#settings-join_default_strictness} -Устанавливает строгость по умолчанию для [JOIN](../../query_language/select.md#select-join). +Устанавливает строгость по умолчанию для [JOIN](../../sql_reference/statements/select.md#select-join). Возможные значения @@ -334,7 +334,7 @@ Ok. Изменяет поведение операций, выполняемых со строгостью `ANY`. !!! warning "Внимание" - Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../table_engines/join.md). + Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table_engines/special/join.md). Возможные значения: @@ -345,18 +345,18 @@ Ok. См. также: -- [Секция JOIN](../../query_language/select.md#select-join) -- [Движок таблиц Join](../table_engines/join.md) +- [Секция JOIN](../../sql_reference/statements/select.md#select-join) +- [Движок таблиц Join](../../engines/table_engines/special/join.md) - [join\_default\_strictness](#settings-join_default_strictness) ## join\_use\_nulls {#join_use_nulls} -Устанавливает тип поведения [JOIN](../../query_language/select.md). При объединении таблиц могут появиться пустые ячейки. ClickHouse заполняет их по-разному в зависимости от настроек. +Устанавливает тип поведения [JOIN](../../sql_reference/statements/select.md). При объединении таблиц могут появиться пустые ячейки. ClickHouse заполняет их по-разному в зависимости от настроек. Возможные значения - 0 — пустые ячейки заполняются значением по умолчанию соответствующего типа поля. -- 1 — `JOIN` ведёт себя как в стандартном SQL. Тип соответствующего поля преобразуется в [Nullable](../../data_types/nullable.md#data_type-nullable), а пустые ячейки заполняются значениями [NULL](../../query_language/syntax.md). +- 1 — `JOIN` ведёт себя как в стандартном SQL. Тип соответствующего поля преобразуется в [Nullable](../../sql_reference/data_types/nullable.md#data_type-nullable), а пустые ячейки заполняются значениями [NULL](../../sql_reference/syntax.md). Значение по умолчанию: 0. @@ -376,7 +376,7 @@ Ok. ## merge\_tree\_uniform\_read\_distribution {#setting-merge-tree-uniform-read-distribution} -При чтении из таблиц [MergeTree](../table_engines/mergetree.md) ClickHouse использует несколько потоков. Этот параметр включает/выключает равномерное распределение заданий по рабочим потокам. Алгоритм равномерного распределения стремится сделать время выполнения всех потоков примерно равным для одного запроса `SELECT`. +При чтении из таблиц [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md) ClickHouse использует несколько потоков. Этот параметр включает/выключает равномерное распределение заданий по рабочим потокам. Алгоритм равномерного распределения стремится сделать время выполнения всех потоков примерно равным для одного запроса `SELECT`. Возможные значения: @@ -387,7 +387,7 @@ Ok. ## merge\_tree\_min\_rows\_for\_concurrent\_read {#setting-merge-tree-min-rows-for-concurrent-read} -Если количество строк, считываемых из файла таблицы [MergeTree](../table_engines/mergetree.md) превышает `merge_tree_min_rows_for_concurrent_read`, то ClickHouse пытается выполнить одновременное чтение из этого файла в несколько потоков. +Если количество строк, считываемых из файла таблицы [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md) превышает `merge_tree_min_rows_for_concurrent_read`, то ClickHouse пытается выполнить одновременное чтение из этого файла в несколько потоков. Возможные значения: @@ -397,7 +397,7 @@ Ok. ## merge\_tree\_min\_bytes\_for\_concurrent\_read {#setting-merge-tree-min-bytes-for-concurrent-read} -Если число байтов, которое должно быть прочитано из одного файла таблицы с движком [MergeTree](../table_engines/mergetree.md), превышает значение `merge_tree_min_bytes_for_concurrent_read`, то ClickHouse выполняет одновременное чтение в несколько потоков из этого файла. +Если число байтов, которое должно быть прочитано из одного файла таблицы с движком [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md), превышает значение `merge_tree_min_bytes_for_concurrent_read`, то ClickHouse выполняет одновременное чтение в несколько потоков из этого файла. Возможное значение: @@ -439,7 +439,7 @@ Ok. Если требуется прочитать более, чем `merge_tree_max_rows_to_use_cache` строк в одном запросе, ClickHouse не используют кэш несжатых блоков. -Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует этот кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от замусоривания запросами, для выполнения которых необходимо извлечь большое количество данных. Настройка сервера [uncompressed\_cache\_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков. +Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует этот кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от замусоривания запросами, для выполнения которых необходимо извлечь большое количество данных. Настройка сервера [uncompressed\_cache\_size](../server_configuration_parameters/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков. Возможные значения: @@ -451,7 +451,7 @@ Ok. Если требуется прочитать более, чем `merge_tree_max_bytes_to_use_cache` байтов в одном запросе, ClickHouse не используют кэш несжатых блоков. -Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от переполнения. Настройка сервера [uncompressed\_cache\_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков. +Кэш несжатых блоков хранит данные, извлечённые при выполнении запросов. ClickHouse использует кэш для ускорения ответов на повторяющиеся небольшие запросы. Настройка защищает кэш от переполнения. Настройка сервера [uncompressed\_cache\_size](../server_configuration_parameters/settings.md#server-settings-uncompressed_cache_size) определяет размер кэша несжатых блоков. Возможное значение: @@ -476,7 +476,7 @@ ClickHouse использует этот параметр при чтении д Установка логирования запроса. -Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query\_log](../../operations/server_settings/settings.md#server_settings-query-log). +Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query\_log](../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-query-log). Пример: @@ -488,7 +488,7 @@ log_queries=1 Установка логирования информации о потоках выполнения запроса. -Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query\_thread\_log](../server_settings/settings.md#server_settings-query-thread-log). +Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query\_thread\_log](../server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log). Пример: @@ -510,7 +510,7 @@ log_query_threads=1 ## max\_replica\_delay\_for\_distributed\_queries {#settings-max_replica_delay_for_distributed_queries} -Отключает отстающие реплики при распределенных запросах. См. [Репликация](../../operations/table_engines/replication.md). +Отключает отстающие реплики при распределенных запросах. См. [Репликация](../../engines/table_engines/mergetree_family/replication.md). Устанавливает время в секундах. Если отставание реплики больше установленного значения, то реплика не используется. @@ -555,7 +555,7 @@ log_query_threads=1 ## min\_compress\_block\_size {#min-compress-block-size} -Для таблиц типа [MergeTree](../table_engines/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min\_compress\_block\_size. По умолчанию - 65 536. +Для таблиц типа [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min\_compress\_block\_size. По умолчанию - 65 536. Реальный размер блока, если несжатых данных меньше max\_compress\_block\_size, будет не меньше этого значения и не меньше объёма данных на одну засечку. @@ -634,7 +634,7 @@ log_query_threads=1 Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 0 (выключено). -Использование кэша несжатых блоков (только для таблиц семейства MergeTree) может существенно сократить задержку и увеличить пропускную способность при работе с большим количеством коротких запросов. Включите эту настройку для пользователей, от которых идут частые короткие запросы. Также обратите внимание на конфигурационный параметр [uncompressed\_cache\_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) (настраивается только в конфигурационном файле) – размер кэша разжатых блоков. По умолчанию - 8 GiB. Кэш разжатых блоков заполняется по мере надобности, а наиболее невостребованные данные автоматически удаляются. +Использование кэша несжатых блоков (только для таблиц семейства MergeTree) может существенно сократить задержку и увеличить пропускную способность при работе с большим количеством коротких запросов. Включите эту настройку для пользователей, от которых идут частые короткие запросы. Также обратите внимание на конфигурационный параметр [uncompressed\_cache\_size](../server_configuration_parameters/settings.md#server-settings-uncompressed_cache_size) (настраивается только в конфигурационном файле) – размер кэша разжатых блоков. По умолчанию - 8 GiB. Кэш разжатых блоков заполняется по мере надобности, а наиболее невостребованные данные автоматически удаляются. Для запросов, читающих хоть немного приличный объём данных (миллион строк и больше), кэш разжатых блоков автоматически выключается, чтобы оставить место для действительно мелких запросов. Поэтому, можно держать настройку `use_uncompressed_cache` всегда выставленной в 1. @@ -850,7 +850,7 @@ ClickHouse генерирует исключение Значение по умолчанию: 1. -По умолчанию блоки, вставляемые в реплицируемые таблицы оператором `INSERT`, дедуплицируются (см. [Репликация данных](../table_engines/replication.md)). +По умолчанию блоки, вставляемые в реплицируемые таблицы оператором `INSERT`, дедуплицируются (см. [Репликация данных](../../engines/table_engines/mergetree_family/replication.md)). ## deduplicate\_blocks\_in\_dependent\_materialized\_views {#settings-deduplicate-blocks-in-dependent-materialized-views} @@ -869,15 +869,15 @@ ClickHouse генерирует исключение ## count\_distinct\_implementation {#settings-count_distinct_implementation} -Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../query_language/agg_functions/reference.md#agg_function-count). +Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../sql_reference/aggregate_functions/reference.md#agg_function-count). Возможные значения: -- [uniq](../../query_language/agg_functions/reference.md#agg_function-uniq) -- [uniqCombined](../../query_language/agg_functions/reference.md#agg_function-uniqcombined) -- [uniqCombined64](../../query_language/agg_functions/reference.md#agg_function-uniqcombined64) -- [uniqHLL12](../../query_language/agg_functions/reference.md#agg_function-uniqhll12) -- [uniqExact](../../query_language/agg_functions/reference.md#agg_function-uniqexact) +- [uniq](../../sql_reference/aggregate_functions/reference.md#agg_function-uniq) +- [uniqCombined](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqcombined) +- [uniqCombined64](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqcombined64) +- [uniqHLL12](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqhll12) +- [uniqExact](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqexact) Значение по умолчанию: `uniqExact`. @@ -957,7 +957,7 @@ ClickHouse генерирует исключение ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} -Включает или отключает генерирование исключения в в случаях, когда запрос [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) не выполняет мёрж. +Включает или отключает генерирование исключения в в случаях, когда запрос [OPTIMIZE](../../sql_reference/statements/misc.md#misc_operations-optimize) не выполняет мёрж. По умолчанию, `OPTIMIZE` завершается успешно и в тех случаях, когда он ничего не сделал. Настройка позволяет отделить подобные случаи и включает генерирование исключения с поясняющим сообщением. @@ -970,7 +970,7 @@ ClickHouse генерирует исключение ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms} -Основной интервал отправки данных движком таблиц [Distributed](../table_engines/distributed.md). Фактический интервал растёт экспоненциально при возникновении ошибок. +Основной интервал отправки данных движком таблиц [Distributed](../../engines/table_engines/special/distributed.md). Фактический интервал растёт экспоненциально при возникновении ошибок. Возможные значения: @@ -980,7 +980,7 @@ ClickHouse генерирует исключение ## distributed\_directory\_monitor\_max\_sleep\_time\_ms {#distributed_directory_monitor_max_sleep_time_ms} -Максимальный интервал отправки данных движком таблиц [Distributed](../table_engines/distributed.md). Ограничивает экпоненциальный рост интервала, установленого настройкой [distributed\_directory\_monitor\_sleep\_time\_ms](#distributed_directory_monitor_sleep_time_ms). +Максимальный интервал отправки данных движком таблиц [Distributed](../../engines/table_engines/special/distributed.md). Ограничивает экпоненциальный рост интервала, установленого настройкой [distributed\_directory\_monitor\_sleep\_time\_ms](#distributed_directory_monitor_sleep_time_ms). Возможные значения: @@ -992,7 +992,7 @@ ClickHouse генерирует исключение Включает/выключает пакетную отправку вставленных данных. -Если пакетная отправка включена, то движок таблиц [Distributed](../table_engines/distributed.md) вместо того, чтобы отправлять каждый файл со вставленными данными по отдельности, старается отправить их все за одну операцию. Пакетная отправка улучшает производительность кластера за счет более оптимального использования ресурсов сервера и сети. +Если пакетная отправка включена, то движок таблиц [Distributed](../../engines/table_engines/special/distributed.md) вместо того, чтобы отправлять каждый файл со вставленными данными по отдельности, старается отправить их все за одну операцию. Пакетная отправка улучшает производительность кластера за счет более оптимального использования ресурсов сервера и сети. Возможные значения: @@ -1018,7 +1018,7 @@ ClickHouse генерирует исключение ## query\_profiler\_real\_time\_period\_ns {#query_profiler_real_time_period_ns} -Sets the period for a real clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). Real clock timer counts wall-clock time. +Sets the period for a real clock timer of the [query profiler](../../operations/optimizing_performance/sampling_query_profiler.md). Real clock timer counts wall-clock time. Possible values: @@ -1031,7 +1031,7 @@ Possible values: - 0 for turning off the timer. -Type: [UInt64](../../data_types/int_uint.md). +Type: [UInt64](../../sql_reference/data_types/int_uint.md). Default value: 1000000000 nanoseconds (once a second). @@ -1041,7 +1041,7 @@ See also: ## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns} -Sets the period for a CPU clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). This timer counts only CPU time. +Sets the period for a CPU clock timer of the [query profiler](../../operations/optimizing_performance/sampling_query_profiler.md). This timer counts only CPU time. Possible values: @@ -1054,7 +1054,7 @@ Possible values: - 0 for turning off the timer. -Type: [UInt64](../../data_types/int_uint.md). +Type: [UInt64](../../sql_reference/data_types/int_uint.md). Default value: 1000000000 nanoseconds. @@ -1064,7 +1064,7 @@ See also: ## allow\_introspection\_functions {#settings-allow_introspection_functions} -Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling. +Enables of disables [introspections functions](../../sql_reference/functions/introspection.md) for query profiling. Possible values: @@ -1075,7 +1075,7 @@ Default value: 0. **See Also** -- [Sampling Query Profiler](../performance/sampling_query_profiler.md) +- [Sampling Query Profiler](../optimizing_performance/sampling_query_profiler.md) - System table [trace\_log](../../operations/system_tables.md#system_tables-trace_log) [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/settings/settings_users.md b/docs/ru/operations/settings/settings_users.md index b010358e547..1719f21a031 100644 --- a/docs/ru/operations/settings/settings_users.md +++ b/docs/ru/operations/settings/settings_users.md @@ -139,6 +139,6 @@ ``` -Элемент `filter` содержать любое выражение, возвращающее значение типа [UInt8](../../data_types/int_uint.md). Обычно он содержит сравнения и логические операторы. Строки `database_name.table1`, для которых фильтр возвращает 0 не выдаются пользователю. Фильтрация несовместима с операциями `PREWHERE` и отключает оптимизацию `WHERE→PREWHERE`. +Элемент `filter` содержать любое выражение, возвращающее значение типа [UInt8](../../sql_reference/data_types/int_uint.md). Обычно он содержит сравнения и логические операторы. Строки `database_name.table1`, для которых фильтр возвращает 0 не выдаются пользователю. Фильтрация несовместима с операциями `PREWHERE` и отключает оптимизацию `WHERE→PREWHERE`. [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings_users/) diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index fac1e63264b..dfc15e6281a 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -12,8 +12,8 @@ Столбцы: -- `metric` ([String](../data_types/string.md)) — название метрики. -- `value` ([Float64](../data_types/float.md)) — значение метрики. +- `metric` ([String](../sql_reference/data_types/string.md)) — название метрики. +- `value` ([Float64](../sql_reference/data_types/float.md)) — значение метрики. **Пример** @@ -63,7 +63,7 @@ user String — имя пользователя, которого использ Содержит информацию о столбцах всех таблиц. -С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../query_language/misc.md#misc-describe-table), но для многих таблиц сразу. +С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../sql_reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу. Таблица `system.columns` содержит столбцы (тип столбца указан в скобках): @@ -131,41 +131,41 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova' ## system.detached\_parts {#system_tables-detached_parts} -Содержит информацию об отсоединённых кусках таблиц семейства [MergeTree](table_engines/mergetree.md). Столбец `reason` содержит причину, по которой кусок был отсоединён. Для кусов, отсоединённых пользователем, `reason` содержит пустую строку. -Такие куски могут быть присоединены с помощью [ALTER TABLE ATTACH PARTITION\|PART](../query_language/query_language/alter/#alter_attach-partition). Остальные столбцы описаны в [system.parts](#system_tables-parts). -Если имя куска некорректно, значения некоторых столбцов могут быть `NULL`. Такие куски могут быть удалены с помощью [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). +Содержит информацию об отсоединённых кусках таблиц семейства [MergeTree](../engines/table_engines/mergetree_family/mergetree.md). Столбец `reason` содержит причину, по которой кусок был отсоединён. Для кусов, отсоединённых пользователем, `reason` содержит пустую строку. +Такие куски могут быть присоединены с помощью [ALTER TABLE ATTACH PARTITION\|PART](../sql_reference/alter/#alter_attach-partition). Остальные столбцы описаны в [system.parts](#system_tables-parts). +Если имя куска некорректно, значения некоторых столбцов могут быть `NULL`. Такие куски могут быть удалены с помощью [ALTER TABLE DROP DETACHED PART](../sql_reference/alter/#alter_drop-detached). ## system.dictionaries {#system_tables-dictionaries} -Содержит информацию о [внешних словарях](../query_language/dicts/external_dicts.md). +Содержит информацию о [внешних словарях](../sql_reference/dictionaries/external_dictionaries/external_dicts.md). Столбцы: -- `database` ([String](../data_types/string.md)) — Имя базы данных, в которой находится словарь, созданный с помощью DDL-запроса. Пустая строка для других словарей. -- `name` ([String](../data_types/string.md)) — [Имя словаря](../query_language/dicts/external_dicts_dict.md). -- `status` ([Enum8](../data_types/enum.md)) — Статус словаря. Возможные значения: +- `database` ([String](../sql_reference/data_types/string.md)) — Имя базы данных, в которой находится словарь, созданный с помощью DDL-запроса. Пустая строка для других словарей. +- `name` ([String](../sql_reference/data_types/string.md)) — [Имя словаря](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md). +- `status` ([Enum8](../sql_reference/data_types/enum.md)) — Статус словаря. Возможные значения: - `NOT_LOADED` — Словарь не загружен, потому что не использовался. - `LOADED` — Словарь загружен успешно. - `FAILED` — Словарь не загружен в результате ошибки. - `LOADING` — Словарь в процессе загрузки. - - `LOADED_AND_RELOADING` — Словарь загружен успешно, сейчас перезагружается (частые причины: запрос [SYSTEM RELOAD DICTIONARY](../query_language/system.md#query_language-system-reload-dictionary), таймаут, изменение настроек словаря). + - `LOADED_AND_RELOADING` — Словарь загружен успешно, сейчас перезагружается (частые причины: запрос [SYSTEM RELOAD DICTIONARY](../sql_reference/statements/system.md#query_language-system-reload-dictionary), таймаут, изменение настроек словаря). - `FAILED_AND_RELOADING` — Словарь не загружен в результате ошибки, сейчас перезагружается. -- `origin` ([String](../data_types/string.md)) — Путь к конфигурационному файлу, описывающему словарь. -- `type` ([String](../data_types/string.md)) — Тип размещения словаря. [Хранение словарей в памяти](../query_language/dicts/external_dicts_dict_layout.md). -- `key` — [Тип ключа](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-key): Числовой ключ ([UInt64](../data_types/int_uint.md#uint-ranges)) или Составной ключ ([String](../data_types/string.md)) — строка вида "(тип 1, тип 2, ..., тип n)". -- `attribute.names` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Массив [имен атрибутов](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. -- `attribute.types` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Соответствующий массив [типов атрибутов](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. -- `bytes_allocated` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Объем оперативной памяти, используемый словарем. -- `query_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Количество запросов с момента загрузки словаря или с момента последней успешной перезагрузки. -- `hit_rate` ([Float64](../data_types/float.md)) — Для cache-словарей — процент закэшированных значений. -- `element_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Количество элементов, хранящихся в словаре. -- `load_factor` ([Float64](../data_types/float.md)) — Процент заполнения словаря (для хэшированного словаря — процент заполнения хэш-таблицы). -- `source` ([String](../data_types/string.md)) — Текст, описывающий [источник данных](../query_language/dicts/external_dicts_dict_sources.md) для словаря. -- `lifetime_min` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Минимальное [время обновления](../query_language/dicts/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. -- `lifetime_max` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Максимальное [время обновления](../query_language/dicts/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. -- `loading_start_time` ([DateTime](../data_types/datetime.md)) — Время начала загрузки словаря. -- `loading_duration` ([Float32](../data_types/float.md)) — Время, затраченное на загрузку словаря. -- `last_exception` ([String](../data_types/string.md)) — Текст ошибки, возникающей при создании или перезагрузке словаря, если словарь не удалось создать. +- `origin` ([String](../sql_reference/data_types/string.md)) — Путь к конфигурационному файлу, описывающему словарь. +- `type` ([String](../sql_reference/data_types/string.md)) — Тип размещения словаря. [Хранение словарей в памяти](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md). +- `key` — [Тип ключа](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-key): Числовой ключ ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) или Составной ключ ([String](../sql_reference/data_types/string.md)) — строка вида "(тип 1, тип 2, ..., тип n)". +- `attribute.names` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Массив [имен атрибутов](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `attribute.types` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Соответствующий массив [типов атрибутов](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `bytes_allocated` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Объем оперативной памяти, используемый словарем. +- `query_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Количество запросов с момента загрузки словаря или с момента последней успешной перезагрузки. +- `hit_rate` ([Float64](../sql_reference/data_types/float.md)) — Для cache-словарей — процент закэшированных значений. +- `element_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Количество элементов, хранящихся в словаре. +- `load_factor` ([Float64](../sql_reference/data_types/float.md)) — Процент заполнения словаря (для хэшированного словаря — процент заполнения хэш-таблицы). +- `source` ([String](../sql_reference/data_types/string.md)) — Текст, описывающий [источник данных](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) для словаря. +- `lifetime_min` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Минимальное [время обновления](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `lifetime_max` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Максимальное [время обновления](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `loading_start_time` ([DateTime](../sql_reference/data_types/datetime.md)) — Время начала загрузки словаря. +- `loading_duration` ([Float32](../sql_reference/data_types/float.md)) — Время, затраченное на загрузку словаря. +- `last_exception` ([String](../sql_reference/data_types/string.md)) — Текст ошибки, возникающей при создании или перезагрузке словаря, если словарь не удалось создать. **Пример** @@ -202,9 +202,9 @@ SELECT * FROM system.dictionaries Столбцы: -- `event` ([String](../data_types/string.md)) — имя события. -- `value` ([UInt64](../data_types/int_uint.md)) — количество произошедших событий. -- `description` ([String](../data_types/string.md)) — описание события. +- `event` ([String](../sql_reference/data_types/string.md)) — имя события. +- `value` ([UInt64](../sql_reference/data_types/int_uint.md)) — количество произошедших событий. +- `description` ([String](../sql_reference/data_types/string.md)) — описание события. **Пример** @@ -240,7 +240,7 @@ SELECT * FROM system.events LIMIT 5 ## system.graphite\_retentions {#system-graphite-retentions} -Содержит информацию о том, какие параметры [graphite\_rollup](server_settings/settings.md#server_settings-graphite_rollup) используются в таблицах с движками [\*GraphiteMergeTree](table_engines/graphitemergetree.md). +Содержит информацию о том, какие параметры [graphite\_rollup](server_configuration_parameters/settings.md#server_configuration_parameters-graphite_rollup) используются в таблицах с движками [\*GraphiteMergeTree](../engines/table_engines/mergetree_family/graphitemergetree.md). Столбцы: @@ -280,9 +280,9 @@ SELECT * FROM system.events LIMIT 5 Столбцы: -- `metric` ([String](../data_types/string.md)) — название метрики. -- `value` ([Int64](../data_types/int_uint.md)) — значение метрики. -- `description` ([String](../data_types/string.md)) — описание метрики. +- `metric` ([String](../sql_reference/data_types/string.md)) — название метрики. +- `value` ([Int64](../sql_reference/data_types/int_uint.md)) — значение метрики. +- `description` ([String](../sql_reference/data_types/string.md)) — описание метрики. Список поддержанных метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). @@ -389,13 +389,13 @@ CurrentMetric_ReplicatedChecks: 0 ## system.parts {#system_tables-parts} -Содержит информацию о кусках данных таблиц семейства [MergeTree](table_engines/mergetree.md). +Содержит информацию о кусках данных таблиц семейства [MergeTree](../engines/table_engines/mergetree_family/mergetree.md). Каждая строка описывает один кусок данных. Столбцы: -- `partition` (`String`) – Имя партиции. Что такое партиция можно узнать из описания запроса [ALTER](../query_language/alter.md#query_language_queries_alter). +- `partition` (`String`) – Имя партиции. Что такое партиция можно узнать из описания запроса [ALTER](../sql_reference/statements/alter.md#sql_reference_queries_alter). Форматы: @@ -446,7 +446,7 @@ CurrentMetric_ReplicatedChecks: 0 - `primary_key_bytes_in_memory_allocated` (`UInt64`) – объём памяти (в байтах) выделенный для размещения первичных ключей. -- `is_frozen` (`UInt8`) – Признак, показывающий существование бэкапа партиции. 1, бэкап есть. 0, бэкапа нет. Смотрите раздел [FREEZE PARTITION](../query_language/alter.md#alter_freeze-partition). +- `is_frozen` (`UInt8`) – Признак, показывающий существование бэкапа партиции. 1, бэкап есть. 0, бэкапа нет. Смотрите раздел [FREEZE PARTITION](../sql_reference/statements/alter.md#alter_freeze-partition). - `database` (`String`) – имя базы данных. @@ -458,11 +458,11 @@ CurrentMetric_ReplicatedChecks: 0 - `disk` (`String`) – имя диска, на котором находится кусок данных. -- `hash_of_all_files` (`String`) – значение [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) для сжатых файлов. +- `hash_of_all_files` (`String`) – значение [sipHash128](../sql_reference/functions/hash_functions.md#hash_functions-siphash128) для сжатых файлов. -- `hash_of_uncompressed_files` (`String`) – значение [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) несжатых файлов (файлы с засечками, первичным ключом и пр.) +- `hash_of_uncompressed_files` (`String`) – значение [sipHash128](../sql_reference/functions/hash_functions.md#hash_functions-siphash128) несжатых файлов (файлы с засечками, первичным ключом и пр.) -- `uncompressed_hash_of_compressed_files` (`String`) – значение [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) данных в сжатых файлах как если бы они были разжатыми. +- `uncompressed_hash_of_compressed_files` (`String`) – значение [sipHash128](../sql_reference/functions/hash_functions.md#hash_functions-siphash128) данных в сжатых файлах как если бы они были разжатыми. - `bytes` (`UInt64`) – алиас для `bytes_on_disk`. @@ -470,9 +470,9 @@ CurrentMetric_ReplicatedChecks: 0 ## system.part\_log {#system_tables-part-log} -Системная таблица `system.part_log` создается только в том случае, если задана серверная настройка [part\_log](server_settings/settings.md#server_settings-part-log). +Системная таблица `system.part_log` создается только в том случае, если задана серверная настройка [part\_log](server_configuration_parameters/settings.md#server_configuration_parameters-part-log). -Содержит информацию о всех событиях, произошедших с [кусками данных](table_engines/custom_partitioning_key.md) таблиц семейства [MergeTree](table_engines/mergetree.md) (например, события добавления, удаления или слияния данных). +Содержит информацию о всех событиях, произошедших с [кусками данных](../engines/table_engines/mergetree_family/custom_partitioning_key.md) таблиц семейства [MergeTree](../engines/table_engines/mergetree_family/mergetree.md) (например, события добавления, удаления или слияния данных). Столбцы: @@ -480,7 +480,7 @@ CurrentMetric_ReplicatedChecks: 0 - `NEW_PART` — вставка нового куска. - `MERGE_PARTS` — слияние кусков. - `DOWNLOAD_PART` — загрузка с реплики. - - `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../query_language/alter.md#alter_detach-partition). + - `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../sql_reference/statements/alter.md#alter_detach-partition). - `MUTATE_PART` — изменение куска. - `MOVE_PART` — перемещение куска между дисками. - `event_date` (Date) — дата события. @@ -524,7 +524,7 @@ CurrentMetric_ReplicatedChecks: 0 !!! note "Внимание" Таблица не содержит входных данных для запросов `INSERT`. -ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_log](server_settings/settings.md#server_settings-query-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. +ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. Чтобы включить логирование, задайте значение параметра [log\_queries](settings/settings.md#settings-log-queries) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md). @@ -594,14 +594,14 @@ ClickHouse создаёт таблицу только в том случае, к 2. Если во время обработки запроса произошла ошибка, создаются два события с типами 1 и 4. 3. Если ошибка произошла до запуска запроса, создается одно событие с типом 3. -По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_log](server_settings/settings.md#server_settings-query-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. +По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. Если таблицу удалить вручную, она пересоздастся автоматически «на лету». При этом все логи на момент удаления таблицы будут удалены. !!! note "Примечание" Срок хранения логов не ограничен. Логи не удаляются из таблицы автоматически. Вам необходимо самостоятельно организовать удаление устаревших логов. -Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_log](server_settings/settings.md#server_settings-query-log) (параметр `partition_by`). +Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) (параметр `partition_by`). ## system.query\_log {#system_tables-query_log} @@ -610,7 +610,7 @@ Contains information about execution of queries. For each query, you can see pro !!! note "Note" The table doesn’t contain input data for `INSERT` queries. -ClickHouse creates this table only if the [query\_log](server_settings/settings.md#server_settings-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. +ClickHouse creates this table only if the [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. To enable query logging, set the [log\_queries](settings/settings.md#settings-log-queries) parameter to 1. For details, see the [Settings](settings/settings.md) section. @@ -680,19 +680,19 @@ Each query creates one or two rows in the `query_log` table, depending on the st 2. If an error occurred during query processing, two events with types 1 and 4 are created. 3. If an error occurred before launching the query, a single event with type 3 is created. -By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_log](server_settings/settings.md#server_settings-query-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. +By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. !!! note "Note" The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself. -You can specify an arbitrary partitioning key for the `system.query_log` table in the [query\_log](server_settings/settings.md#server_settings-query-log) server setting (see the `partition_by` parameter). +You can specify an arbitrary partitioning key for the `system.query_log` table in the [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) server setting (see the `partition_by` parameter). \#\# system.query\_thread\_log {\#system\_tables-query-thread-log} Содержит информацию о каждом потоке выполняемых запросов. -ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_thread\_log](server_settings/settings.md#server_settings-query-thread-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. +ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_thread\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. Чтобы включить логирование, задайте значение параметра [log\_query\_threads](settings/settings.md#settings-log-query-threads) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md). @@ -743,43 +743,43 @@ ClickHouse создаёт таблицу только в том случае, к - `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(\#system\_tables-events - `ProfileEvents.Values` (Array(UInt64)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`. -По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_thread\_log](server_settings/settings.md#server_settings-query-thread-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. +По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_thread\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. Если таблицу удалить вручную, она пересоздастся автоматически «на лету». При этом все логи на момент удаления таблицы будут удалены. !!! note "Примечание" Срок хранения логов не ограничен. Логи не удаляются из таблицы автоматически. Вам необходимо самостоятельно организовать удаление устаревших логов. -Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_thread\_log](server_settings/settings.md#server_settings-query-thread-log) (параметр `partition_by`). +Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_thread\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log) (параметр `partition_by`). ## system.trace\_log {#system_tables-trace_log} Contains stack traces collected by the sampling query profiler. -ClickHouse creates this table when the [trace\_log](server_settings/settings.md#server_settings-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. +ClickHouse creates this table when the [trace\_log](server_configuration_parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. Columns: -- `event_date`([Date](../data_types/date.md)) — Date of sampling moment. +- `event_date`([Date](../sql_reference/data_types/date.md)) — Date of sampling moment. -- `event_time`([DateTime](../data_types/datetime.md)) — Timestamp of sampling moment. +- `event_time`([DateTime](../sql_reference/data_types/datetime.md)) — Timestamp of sampling moment. -- `revision`([UInt32](../data_types/int_uint.md)) — ClickHouse server build revision. +- `revision`([UInt32](../sql_reference/data_types/int_uint.md)) — ClickHouse server build revision. When connecting to server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. -- `timer_type`([Enum8](../data_types/enum.md)) — Timer type: +- `timer_type`([Enum8](../sql_reference/data_types/enum.md)) — Timer type: - `Real` represents wall-clock time. - `CPU` represents CPU time. -- `thread_number`([UInt32](../data_types/int_uint.md)) — Thread identifier. +- `thread_number`([UInt32](../sql_reference/data_types/int_uint.md)) — Thread identifier. -- `query_id`([String](../data_types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table. +- `query_id`([String](../sql_reference/data_types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table. -- `trace`([Array(UInt64)](../data_types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. +- `trace`([Array(UInt64)](../sql_reference/data_types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. **Example** @@ -927,13 +927,13 @@ WHERE Столбцы: -- `name` ([String](../data_types/string.md)) — имя настройки. -- `value` ([String](../data_types/string.md)) — значение настройки. -- `changed` ([UInt8](../data_types/int_uint.md#uint-ranges)) — показывает, изменена ли настройка по отношению к значению по умолчанию. -- `description` ([String](../data_types/string.md)) — краткое описание настройки. -- `min` ([Nullable](../data_types/nullable.md)([String](../data_types/string.md))) — минимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../query_language/syntax.md#null-literal). -- `max` ([Nullable](../data_types/nullable.md)([String](../data_types/string.md))) — максимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../query_language/syntax.md#null-literal). -- `readonly` ([UInt8](../data_types/int_uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку: +- `name` ([String](../sql_reference/data_types/string.md)) — имя настройки. +- `value` ([String](../sql_reference/data_types/string.md)) — значение настройки. +- `changed` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — показывает, изменена ли настройка по отношению к значению по умолчанию. +- `description` ([String](../sql_reference/data_types/string.md)) — краткое описание настройки. +- `min` ([Nullable](../sql_reference/data_types/nullable.md)([String](../sql_reference/data_types/string.md))) — минимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../sql_reference/syntax.md#null-literal). +- `max` ([Nullable](../sql_reference/data_types/nullable.md)([String](../sql_reference/data_types/string.md))) — максимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../sql_reference/syntax.md#null-literal). +- `readonly` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку: - `0` — Текущий пользователь может изменять настройку. - `1` — Текущий пользователь не может изменять настройку. @@ -1013,9 +1013,9 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') **Смотрите также** -- [Секции движка](table_engines/mergetree/#mergetree-query-clauses) семейства MergeTree -- [Настройки](table_engines/kafka.md#table_engine-kafka-creating-a-table) Kafka -- [Настройки](table_engines/join/#join-limitations-and-settings) Join +- [Секции движка](../engines/table_engines/mergetree_family/mergetree.md#mergetree-query-clauses) семейства MergeTree +- [Настройки](../engines/table_engines/integrations/kafka.md#table_engine-kafka-creating-a-table) Kafka +- [Настройки](../engines/table_engines/special/join.md#join-limitations-and-settings) Join ## system.tables {#system-tables} @@ -1031,7 +1031,7 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') - `metadata_path` (String) — путь к табличным метаданным в файловой системе. - `metadata_modification_time` (DateTime) — время последней модификации табличных метаданных. - `dependencies_database` (Array(String)) — зависимости базы данных. -- `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](table_engines/materializedview.md), созданные на базе текущей таблицы). +- `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](../engines/table_engines/special/materializedview.md), созданные на базе текущей таблицы). - `create_table_query` (String) — запрос, которым создавалась таблица. - `engine_full` (String) — параметры табличного движка. - `partition_key` (String) — ключ партиционирования таблицы. @@ -1114,7 +1114,7 @@ path: /clickhouse/tables/01-08/visits/replicas ## system.mutations {#system_tables-mutations} -Таблица содержит информацию о ходе выполнения [мутаций](../query_language/alter.md#alter-mutations) MergeTree-таблиц. Каждой команде мутации соответствует одна строка. В таблице есть следующие столбцы: +Таблица содержит информацию о ходе выполнения [мутаций](../sql_reference/statements/alter.md#alter-mutations) MergeTree-таблиц. Каждой команде мутации соответствует одна строка. В таблице есть следующие столбцы: **database**, **table** - имя БД и таблицы, к которой была применена мутация. @@ -1140,28 +1140,28 @@ path: /clickhouse/tables/01-08/visits/replicas ## system.disks {#system_tables-disks} -Cодержит информацию о дисках, заданных в [конфигурации сервера](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +Cодержит информацию о дисках, заданных в [конфигурации сервера](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). Столбцы: -- `name` ([String](../data_types/string.md)) — имя диска в конфигурации сервера. -- `path` ([String](../data_types/string.md)) — путь к точке монтирования в файловой системе. -- `free_space` ([UInt64](../data_types/int_uint.md)) — свободное место на диске в байтах. -- `total_space` ([UInt64](../data_types/int_uint.md)) — объём диска в байтах. -- `keep_free_space` ([UInt64](../data_types/int_uint.md)) — место, которое должно остаться свободным на диске в байтах. Задаётся значением параметра `keep_free_space_bytes` конфигурации дисков. +- `name` ([String](../sql_reference/data_types/string.md)) — имя диска в конфигурации сервера. +- `path` ([String](../sql_reference/data_types/string.md)) — путь к точке монтирования в файловой системе. +- `free_space` ([UInt64](../sql_reference/data_types/int_uint.md)) — свободное место на диске в байтах. +- `total_space` ([UInt64](../sql_reference/data_types/int_uint.md)) — объём диска в байтах. +- `keep_free_space` ([UInt64](../sql_reference/data_types/int_uint.md)) — место, которое должно остаться свободным на диске в байтах. Задаётся значением параметра `keep_free_space_bytes` конфигурации дисков. ## system.storage\_policies {#system_tables-storage_policies} -Содержит информацию о политиках хранения и томах, заданных в [конфигурации сервера](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +Содержит информацию о политиках хранения и томах, заданных в [конфигурации сервера](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). Столбцы: -- `policy_name` ([String](../data_types/string.md)) — имя политики хранения. -- `volume_name` ([String](../data_types/string.md)) — имя тома, который содержится в политике хранения. -- `volume_priority` ([UInt64](../data_types/int_uint.md)) — порядковый номер тома согласно конфигурации. -- `disks` ([Array(String)](../data_types/array.md)) — имена дисков, содержащихся в политике хранения. -- `max_data_part_size` ([UInt64](../data_types/int_uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений). -- `move_factor` ([Float64](../data_types/float.md))\` — доля свободного места, при превышении которой данные начинают перемещаться на следующий том. +- `policy_name` ([String](../sql_reference/data_types/string.md)) — имя политики хранения. +- `volume_name` ([String](../sql_reference/data_types/string.md)) — имя тома, который содержится в политике хранения. +- `volume_priority` ([UInt64](../sql_reference/data_types/int_uint.md)) — порядковый номер тома согласно конфигурации. +- `disks` ([Array(String)](../sql_reference/data_types/array.md)) — имена дисков, содержащихся в политике хранения. +- `max_data_part_size` ([UInt64](../sql_reference/data_types/int_uint.md)) — максимальный размер куска данных, который может храниться на дисках тома (0 — без ограничений). +- `move_factor` ([Float64](../sql_reference/data_types/float.md))\` — доля свободного места, при превышении которой данные начинают перемещаться на следующий том. Если политика хранения содержит несколько томов, то каждому тому соответствует отдельная запись в таблице. diff --git a/docs/ru/operations/table_engines/generate.md b/docs/ru/operations/table_engines/generate.md deleted file mode 100644 index 051369d2e1c..00000000000 --- a/docs/ru/operations/table_engines/generate.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -en_copy: true ---- - -# GenerateRandom {#table_engines-generate} - -The GenerateRandom table engine produces random data for given table schema. - -Usage examples: - -- Use in test to populate reproducible large table. -- Generate random input for fuzzing tests. - -## Usage in ClickHouse Server {#usage-in-clickhouse-server} - -``` sql -ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) -``` - -The `max_array_length` and `max_string_length` parameters specify maximum length of all -array columns and strings correspondingly in generated data. - -Generate table engine supports only `SELECT` queries. - -It supports all [DataTypes](../../data_types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. - -**Example:** - -**1.** Set up the `generate_engine_table` table: - -``` sql -CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) -``` - -**2.** Query the data: - -``` sql -SELECT * FROM generate_engine_table LIMIT 3 -``` - -``` text -┌─name─┬──────value─┐ -│ c4xJ │ 1412771199 │ -│ r │ 1791099446 │ -│ 7#$ │ 124312908 │ -└──────┴────────────┘ -``` - -## Details of Implementation {#details-of-implementation} - -- Not supported: - - `ALTER` - - `SELECT ... SAMPLE` - - `INSERT` - - Indices - - Replication - -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/ru/operations/table_engines/materializedview.md b/docs/ru/operations/table_engines/materializedview.md deleted file mode 100644 index 6bcc528782a..00000000000 --- a/docs/ru/operations/table_engines/materializedview.md +++ /dev/null @@ -1,5 +0,0 @@ -# MaterializedView {#materializedview} - -Используется для реализации материализованных представлений (подробнее см. запрос [CREATE TABLE](../../query_language/create.md)). Для хранения данных, использует другой движок, который был указан при создании представления. При чтении из таблицы, просто использует этот движок. - -[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/materializedview/) diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md index 4c9fbf49bd6..7c4be02456c 100644 --- a/docs/ru/operations/troubleshooting.md +++ b/docs/ru/operations/troubleshooting.md @@ -98,7 +98,7 @@ $ sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-se - Параметры endpoint. - Проверьте настройки [listen_host](server_settings/settings.md#server_settings-listen_host) и [tcp_port](server_settings/settings.md#server_settings-tcp_port). + Проверьте настройки [listen_host](server_configuration_parameters/settings.md#server_configuration_parameters-listen_host) и [tcp_port](server_configuration_parameters/settings.md#server_configuration_parameters-tcp_port). По умолчанию, сервер ClickHouse принимает только локальные подключения. diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md new file mode 100644 index 00000000000..5467a58676e --- /dev/null +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -0,0 +1,154 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +# clickhouse-бенчмарк {#clickhouse-benchmark} + +Подключается к серверу ClickHouse и повторно отправляет указанные запросы. + +Синтаксис: + +``` bash +$ echo "single query" | clickhouse-benchmark [keys] +``` + +или + +``` bash +$ clickhouse-benchmark [keys] <<< "single query" +``` + +Если вы хотите отправить набор запросов, создайте текстовый файл и поместите каждый запрос в отдельную строку в этом файле. Например: + +``` sql +SELECT * FROM system.numbers LIMIT 10000000 +SELECT 1 +``` + +Затем передайте этот файл на стандартный вход `clickhouse-benchmark`. + +``` bash +clickhouse-benchmark [keys] < queries_file +``` + +## Ключи {#clickhouse-benchmark-keys} + +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` посылает одновременно. Значение по умолчанию: 1. +- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. +- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. Для [режим сравнения](#clickhouse-benchmark-comparison-mode) вы можете использовать несколько `-h` ключи. +- `-p N`, `--port=N` — Server port. Default value: 9000. For the [режим сравнения](#clickhouse-benchmark-comparison-mode) вы можете использовать несколько `-p` ключи. +- `-i N`, `--iterations=N` — Total number of queries. Default value: 0. +- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. +- `-s`, `--secure` — Using TLS connection. +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` прекращает отправку запросов по достижении указанного срока. Значение по умолчанию: 0 (ограничение по времени отключено). +- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [режим сравнения](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` выполняет следующие функции: [Независимый двухпробный t-тест Стьюдента](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) проверьте, не отличаются ли эти два распределения с выбранным уровнем достоверности. +- `--cumulative` — Printing cumulative data instead of data per interval. +- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. +- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` выводит отчет в указанный JSON-файл. +- `--user=USERNAME` — ClickHouse user name. Default value: `default`. +- `--password=PSWD` — ClickHouse user password. Default value: empty string. +- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` выводит трассировки стека исключений. +- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` на указанном этапе. Возможное значение: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. +- `--help` — Shows the help message. + +Если вы хотите применить некоторые из них [настройки](../../operations/settings/index.md) для запросов передайте их в качестве ключа `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. + +## Выход {#clickhouse-benchmark-output} + +По умолчанию, `clickhouse-benchmark` отчеты для каждого из них `--delay` интервал. + +Пример отчета: + +``` text +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. + +0.000% 0.145 sec. +10.000% 0.146 sec. +20.000% 0.146 sec. +30.000% 0.146 sec. +40.000% 0.147 sec. +50.000% 0.148 sec. +60.000% 0.148 sec. +70.000% 0.148 sec. +80.000% 0.149 sec. +90.000% 0.150 sec. +95.000% 0.150 sec. +99.000% 0.150 sec. +99.900% 0.150 sec. +99.990% 0.150 sec. +``` + +В отчете вы можете найти:: + +- Количество запросов в системе `Queries executed:` поле. + +- Строка состояния, содержащая (по порядку): + + - Конечная точка сервера ClickHouse. + - Количество обработанных запросов. + - QPS: QPS: сколько запросов сервер выполняет в секунду в течение периода, указанного в `--delay` аргумент. + - RPS: сколько строк сервер читает в секунду в течение периода, указанного в `--delay` аргумент. + - MiB/s: сколько мегабайт сервер читает в секунду в течение периода, указанного в `--delay` аргумент. + - result RPS: сколько строк помещается сервером в результат запроса в секунду в течение периода, указанного в `--delay` аргумент. + - результат MiB/s. сколько мебибайт помещается сервером в результат запроса в секунду в течение периода, указанного в `--delay` аргумент. + +- Процентили времени выполнения запросов. + +## Режим сравнения {#clickhouse-benchmark-comparison-mode} + +`clickhouse-benchmark` можно сравнить производительность для двух запущенных серверов ClickHouse. + +Чтобы использовать режим сравнения, укажите конечные точки обоих серверов по двум парам `--host`, `--port` ключи. Ключи, сопоставленные вместе по позиции в списке аргументов, первые `--host` сопоставляется с первым `--port` и так далее. `clickhouse-benchmark` устанавливает соединения с обоими серверами, а затем отправляет запросы. Каждый запрос адресован случайно выбранному серверу. Результаты отображаются для каждого сервера отдельно. + +## Пример {#clickhouse-benchmark-example} + +``` bash +$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 +``` + +``` text +Loaded 1 queries. + +Queries executed: 6. + +localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.159 sec. +30.000% 0.160 sec. +40.000% 0.160 sec. +50.000% 0.162 sec. +60.000% 0.164 sec. +70.000% 0.165 sec. +80.000% 0.166 sec. +90.000% 0.166 sec. +95.000% 0.167 sec. +99.000% 0.167 sec. +99.900% 0.167 sec. +99.990% 0.167 sec. + + + +Queries executed: 10. + +localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. + +0.000% 0.159 sec. +10.000% 0.159 sec. +20.000% 0.160 sec. +30.000% 0.163 sec. +40.000% 0.164 sec. +50.000% 0.165 sec. +60.000% 0.166 sec. +70.000% 0.166 sec. +80.000% 0.167 sec. +90.000% 0.167 sec. +95.000% 0.170 sec. +99.000% 0.172 sec. +99.900% 0.172 sec. +99.990% 0.172 sec. +``` diff --git a/docs/ru/operations/utils/clickhouse-copier.md b/docs/ru/operations/utilities/clickhouse-copier.md similarity index 100% rename from docs/ru/operations/utils/clickhouse-copier.md rename to docs/ru/operations/utilities/clickhouse-copier.md diff --git a/docs/ru/operations/utils/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md similarity index 97% rename from docs/ru/operations/utils/clickhouse-local.md rename to docs/ru/operations/utilities/clickhouse-local.md index bcc34deea4e..7dfa9587686 100644 --- a/docs/ru/operations/utils/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -1,6 +1,6 @@ # clickhouse-local {#clickhouse-local} -Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../query_language/index.md) ClickHouse. +Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../operations/utilities/clickhouse-local.md) ClickHouse. `clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер. diff --git a/docs/ru/operations/utils/index.md b/docs/ru/operations/utilities/index.md similarity index 100% rename from docs/ru/operations/utils/index.md rename to docs/ru/operations/utilities/index.md diff --git a/docs/ru/operations/utils/clickhouse-benchmark.md b/docs/ru/operations/utils/clickhouse-benchmark.md deleted file mode 100644 index 1d8ac3dec46..00000000000 --- a/docs/ru/operations/utils/clickhouse-benchmark.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -en_copy: true ---- - -# clickhouse-benchmark {#clickhouse-benchmark} - -Connects to a ClickHouse server and repeatedly sends specified queries. - -Syntax: - -``` bash -$ echo "single query" | clickhouse-benchmark [keys] -``` - -or - -``` bash -$ clickhouse-benchmark [keys] <<< "single query" -``` - -If you want to send a set of queries, create a text file and place each query on the individual string in this file. For example: - -``` sql -SELECT * FROM system.numbers LIMIT 10000000 -SELECT 1 -``` - -Then pass this file to a standard input of `clickhouse-benchmark`. - -``` bash -clickhouse-benchmark [keys] < queries_file -``` - -## Keys {#clickhouse-benchmark-keys} - -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. -- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. -- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0. -- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. -- `-s`, `--secure` — Using TLS connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). -- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) test to determine whether the two distributions aren’t different with the selected level of confidence. -- `--cumulative` — Printing cumulative data instead of data per interval. -- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. -- `--user=USERNAME` — ClickHouse user name. Default value: `default`. -- `--password=PSWD` — ClickHouse user password. Default value: empty string. -- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. -- `--help` — Shows the help message. - -If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. - -## Output {#clickhouse-benchmark-output} - -By default, `clickhouse-benchmark` reports for each `--delay` interval. - -Example of the report: - -``` text -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. - -0.000% 0.145 sec. -10.000% 0.146 sec. -20.000% 0.146 sec. -30.000% 0.146 sec. -40.000% 0.147 sec. -50.000% 0.148 sec. -60.000% 0.148 sec. -70.000% 0.148 sec. -80.000% 0.149 sec. -90.000% 0.150 sec. -95.000% 0.150 sec. -99.000% 0.150 sec. -99.900% 0.150 sec. -99.990% 0.150 sec. -``` - -In the report you can find: - -- Number of queries in the `Queries executed:` field. - -- Status string containing (in order): - - - Endpoint of ClickHouse server. - - Number of processed queries. - - QPS: QPS: How many queries server performed per second during a period specified in the `--delay` argument. - - RPS: How many rows server read per second during a period specified in the `--delay` argument. - - MiB/s: How many mebibytes server read per second during a period specified in the `--delay` argument. - - result RPS: How many rows placed by server to the result of a query per second during a period specified in the `--delay` argument. - - result MiB/s. How many mebibytes placed by server to the result of a query per second during a period specified in the `--delay` argument. - -- Percentiles of queries execution time. - -## Comparison mode {#clickhouse-benchmark-comparison-mode} - -`clickhouse-benchmark` can compare performances for two running ClickHouse servers. - -To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. - -## Example {#clickhouse-benchmark-example} - -``` bash -$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 -``` - -``` text -Loaded 1 queries. - -Queries executed: 6. - -localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.159 sec. -30.000% 0.160 sec. -40.000% 0.160 sec. -50.000% 0.162 sec. -60.000% 0.164 sec. -70.000% 0.165 sec. -80.000% 0.166 sec. -90.000% 0.166 sec. -95.000% 0.167 sec. -99.000% 0.167 sec. -99.900% 0.167 sec. -99.990% 0.167 sec. - - - -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.160 sec. -30.000% 0.163 sec. -40.000% 0.164 sec. -50.000% 0.165 sec. -60.000% 0.166 sec. -70.000% 0.166 sec. -80.000% 0.167 sec. -90.000% 0.167 sec. -95.000% 0.170 sec. -99.000% 0.172 sec. -99.900% 0.172 sec. -99.990% 0.172 sec. -``` diff --git a/docs/ru/query_language/index.md b/docs/ru/query_language/index.md deleted file mode 100644 index d57d8971a7d..00000000000 --- a/docs/ru/query_language/index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Справка по SQL {#spravka-po-sql} - -- [SELECT](select.md) -- [INSERT INTO](insert_into.md) -- [CREATE](create.md) -- [ALTER](alter.md#query_language_queries_alter) -- [Прочие виды запросов](misc.md) - -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/) diff --git a/docs/ru/roadmap.md b/docs/ru/roadmap.md deleted file mode 100644 index 0d17b67a3a9..00000000000 --- a/docs/ru/roadmap.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -en_copy: true ---- - -# Roadmap {#roadmap} - -## Q1 2020 {#q1-2020} - -- Role-based access control - -## Q2 2020 {#q2-2020} - -- Integration with external authentication services -- Resource pools for more precise distribution of cluster capacity between users - -{## [Original article](https://clickhouse.tech/docs/en/roadmap/) ##} diff --git a/docs/ru/query_language/agg_functions/combinators.md b/docs/ru/sql_reference/aggregate_functions/combinators.md similarity index 96% rename from docs/ru/query_language/agg_functions/combinators.md rename to docs/ru/sql_reference/aggregate_functions/combinators.md index bc0616ef5de..e4295f0d76e 100644 --- a/docs/ru/query_language/agg_functions/combinators.md +++ b/docs/ru/sql_reference/aggregate_functions/combinators.md @@ -27,9 +27,9 @@ Для работы с промежуточными состояниями предназначены: -- Движок таблиц [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md). -- Функция [finalizeAggregation](../functions/other_functions.md#function-finalizeaggregation). -- Функция [runningAccumulate](../functions/other_functions.md#function-runningaccumulate). +- Движок таблиц [AggregatingMergeTree](../../engines/table_engines/mergetree_family/aggregatingmergetree.md). +- Функция [finalizeAggregation](../../sql_reference/aggregate_functions/combinators.md#function-finalizeaggregation). +- Функция [runningAccumulate](../../sql_reference/aggregate_functions/combinators.md#function-runningaccumulate). - Комбинатор [-Merge](#aggregate_functions_combinators_merge). - Комбинатор [-MergeState](#aggregate_functions_combinators_mergestate). diff --git a/docs/ru/query_language/agg_functions/index.md b/docs/ru/sql_reference/aggregate_functions/index.md similarity index 100% rename from docs/ru/query_language/agg_functions/index.md rename to docs/ru/sql_reference/aggregate_functions/index.md diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/sql_reference/aggregate_functions/parametric_functions.md similarity index 94% rename from docs/ru/query_language/agg_functions/parametric_functions.md rename to docs/ru/sql_reference/aggregate_functions/parametric_functions.md index 22fc7eb2934..70430d21eae 100644 --- a/docs/ru/query_language/agg_functions/parametric_functions.md +++ b/docs/ru/sql_reference/aggregate_functions/parametric_functions.md @@ -17,7 +17,7 @@ **Возвращаемые значения** -- [Массив](../../data_types/array.md) [кортежей](../../data_types/tuple.md) следующего вида: +- [Массив](../../sql_reference/data_types/array.md) [кортежей](../../sql_reference/data_types/tuple.md) следующего вида: ``` [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] @@ -44,7 +44,7 @@ FROM ( └─────────────────────────────────────────────────────────────────────────┘ ``` -С помощью функции [bar](../functions/other_functions.md#function-bar) можно визуализировать гистограмму, например: +С помощью функции [bar](../../sql_reference/aggregate_functions/parametric_functions.md#function-bar) можно визуализировать гистограмму, например: ``` sql WITH histogram(5)(rand() % 100) AS hist @@ -86,7 +86,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) - `pattern` — строка с шаблоном. Смотрите [Синтаксис шаблонов](#sequence-function-pattern-syntax). -- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../data_types/int_uint.md). +- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../sql_reference/aggregate_functions/parametric_functions.md). - `cond1`, `cond2` — условия, описывающие цепочку событий. Тип данных — `UInt8`. Можно использовать до 32 условий. Функция учитывает только те события, которые указаны в условиях. Функция пропускает данные из последовательности, если они не описаны ни в одном из условий. @@ -173,7 +173,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) - `pattern` — строка с шаблоном. Смотрите [Синтаксис шаблонов](#sequence-function-pattern-syntax). -- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../data_types/int_uint.md). +- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../sql_reference/aggregate_functions/parametric_functions.md). - `cond1`, `cond2` — условия, описывающие цепочку событий. Тип данных — `UInt8`. Можно использовать до 32 условий. Функция учитывает только те события, которые указаны в условиях. Функция пропускает данные из последовательности, если они не описаны ни в одном из условий. @@ -234,10 +234,10 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) **Параметры** -- `window` — ширина скользящего окна по времени в секундах. [UInt](../../data_types/int_uint.md). +- `window` — ширина скользящего окна по времени в секундах. [UInt](../../sql_reference/aggregate_functions/parametric_functions.md). - `mode` - необязательный параметр. Если установлено значение `'strict'`, то функция `windowFunnel()` применяет условия только для уникальных значений. -- `timestamp` — имя столбца, содержащего временные отметки. [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md#data_type-datetime) и другие параметры с типом `Integer`. В случае хранения меток времени в столбцах с типом `UInt64`, максимально допустимое значение соответствует ограничению для типа `Int64`, т.е. равно `2^63-1`. -- `cond` — условия или данные, описывающие цепочку событий. [UInt8](../../data_types/int_uint.md). +- `timestamp` — имя столбца, содержащего временные отметки. [Date](../../sql_reference/aggregate_functions/parametric_functions.md), [DateTime](../../sql_reference/aggregate_functions/parametric_functions.md#data_type-datetime) и другие параметры с типом `Integer`. В случае хранения меток времени в столбцах с типом `UInt64`, максимально допустимое значение соответствует ограничению для типа `Int64`, т.е. равно `2^63-1`. +- `cond` — условия или данные, описывающие цепочку событий. [UInt8](../../sql_reference/aggregate_functions/parametric_functions.md). **Возвращаемое значение** @@ -299,7 +299,7 @@ ORDER BY level ASC Аналитическая функция, которая показывает, насколько выдерживаются те или иные условия, например, удержание динамики/уровня [посещаемости сайта](https://yandex.ru/support/partner2/statistics/metrika-visitors-statistics.html?lang=ru). -Функция принимает набор (от 1 до 32) логических условий, как в [WHERE](../select.md#select-where), и применяет их к заданному набору данных. +Функция принимает набор (от 1 до 32) логических условий, как в [WHERE](../../sql_reference/statements/select.md#select-where), и применяет их к заданному набору данных. Условия, кроме первого, применяются попарно: результат второго будет истинным, если истинно первое и второе, третьего - если истинно первое и третье и т. д. diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/sql_reference/aggregate_functions/reference.md similarity index 91% rename from docs/ru/query_language/agg_functions/reference.md rename to docs/ru/sql_reference/aggregate_functions/reference.md index 3a6c3679c54..4e82b8775db 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/sql_reference/aggregate_functions/reference.md @@ -19,9 +19,9 @@ ClickHouse поддерживает следующие виды синтакси **Возвращаемое значение** - Если функция вызывается без параметров, она вычисляет количество строк. -- Если передаётся [выражение](../syntax.md#syntax-expressions) , то функция вычисляет количество раз, когда выражение возвращает не NULL. Если выражение возвращает значение типа [Nullable](../../data_types/nullable.md), то результат `count` не становится `Nullable`. Функция возвращает 0, если выражение возвращает `NULL` для всех строк. +- Если передаётся [выражение](../syntax.md#syntax-expressions) , то функция вычисляет количество раз, когда выражение возвращает не NULL. Если выражение возвращает значение типа [Nullable](../../sql_reference/data_types/nullable.md), то результат `count` не становится `Nullable`. Функция возвращает 0, если выражение возвращает `NULL` для всех строк. -В обоих случаях тип возвращаемого значения [UInt64](../../data_types/int_uint.md). +В обоих случаях тип возвращаемого значения [UInt64](../../sql_reference/data_types/int_uint.md). **Подробности** @@ -240,7 +240,7 @@ binary decimal ## groupBitmap {#groupbitmap} -Bitmap или агрегатные вычисления для столбца с типом данных `UInt*`, возвращают кардинальность в виде значения типа UInt64, если добавить суффикс -State, то возвращают [объект bitmap](../functions/bitmap_functions.md). +Bitmap или агрегатные вычисления для столбца с типом данных `UInt*`, возвращают кардинальность в виде значения типа UInt64, если добавить суффикс -State, то возвращают [объект bitmap](../../sql_reference/aggregate_functions/reference.md). ``` sql groupBitmap(expr) @@ -376,7 +376,7 @@ skewPop(expr) **Возвращаемое значение** -Коэффициент асимметрии заданного распределения. Тип — [Float64](../../data_types/float.md) +Коэффициент асимметрии заданного распределения. Тип — [Float64](../../sql_reference/aggregate_functions/reference.md) **Пример** @@ -400,7 +400,7 @@ skewSamp(expr) **Возвращаемое значение** -Коэффициент асимметрии заданного распределения. Тип — [Float64](../../data_types/float.md). Если `n <= 1` (`n` — размер выборки), тогда функция возвращает `nan`. +Коэффициент асимметрии заданного распределения. Тип — [Float64](../../sql_reference/aggregate_functions/reference.md). Если `n <= 1` (`n` — размер выборки), тогда функция возвращает `nan`. **Пример** @@ -422,7 +422,7 @@ kurtPop(expr) **Возвращаемое значение** -Коэффициент эксцесса заданного распределения. Тип — [Float64](../../data_types/float.md) +Коэффициент эксцесса заданного распределения. Тип — [Float64](../../sql_reference/aggregate_functions/reference.md) **Пример** @@ -446,7 +446,7 @@ kurtSamp(expr) **Возвращаемое значение** -Коэффициент эксцесса заданного распределения. Тип — [Float64](../../data_types/float.md). Если `n <= 1` (`n` — размер выборки), тогда функция возвращает `nan`. +Коэффициент эксцесса заданного распределения. Тип — [Float64](../../sql_reference/aggregate_functions/reference.md). Если `n <= 1` (`n` — размер выборки), тогда функция возвращает `nan`. **Пример** @@ -536,8 +536,8 @@ avgWeighted(x, weight) **Параметры** -- `x` — Значения. [Целые числа](../../data_types/int_uint.md) или [числа с плавающей запятой](../../data_types/float.md). -- `weight` — Веса отдельных значений. [Целые числа](../../data_types/int_uint.md) или [числа с плавающей запятой](../../data_types/float.md). +- `x` — Значения. [Целые числа](../../sql_reference/aggregate_functions/reference.md) или [числа с плавающей запятой](../../sql_reference/aggregate_functions/reference.md). +- `weight` — Веса отдельных значений. [Целые числа](../../sql_reference/aggregate_functions/reference.md) или [числа с плавающей запятой](../../sql_reference/aggregate_functions/reference.md). Типы параметров должны совпадать. @@ -546,7 +546,7 @@ avgWeighted(x, weight) - Среднее арифметическое взвешенное. - `NaN`, если все веса равны 0. -Тип: [Float64](../../data_types/float.md) +Тип: [Float64](../../sql_reference/aggregate_functions/reference.md) **Пример** @@ -580,7 +580,7 @@ uniq(x[, ...]) **Возвращаемое значение** -- Значение с типом данных [UInt64](../../data_types/int_uint.md). +- Значение с типом данных [UInt64](../../sql_reference/aggregate_functions/reference.md). **Детали реализации** @@ -621,7 +621,7 @@ uniqCombined(HLL_precision)(x[, ...]) **Возвращаемое значение** -- Число типа [UInt64](../../data_types/int_uint.md). +- Число типа [UInt64](../../sql_reference/aggregate_functions/reference.md). **Детали реализации** @@ -669,7 +669,7 @@ uniqHLL12(x[, ...]) **Возвращаемое значение** -- Значение хэша с типом данных [UInt64](../../data_types/int_uint.md). +- Значение хэша с типом данных [UInt64](../../sql_reference/aggregate_functions/reference.md). **Детали реализации** @@ -905,7 +905,7 @@ quantile(level)(expr) **Параметры** - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../data_types/index.md#data_types) или типов [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md). +- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../sql_reference/aggregate_functions/reference.md#data_types) или типов [Date](../../sql_reference/aggregate_functions/reference.md), [DateTime](../../sql_reference/aggregate_functions/reference.md). **Возвращаемое значение** @@ -913,9 +913,9 @@ quantile(level)(expr) Тип: -- [Float64](../../data_types/float.md) для входных данных числового типа. -- [Date](../../data_types/date.md), если входные значения имеют тип `Date`. -- [DateTime](../../data_types/datetime.md), если входные значения имеют тип `DateTime`. +- [Float64](../../sql_reference/aggregate_functions/reference.md) для входных данных числового типа. +- [Date](../../sql_reference/aggregate_functions/reference.md), если входные значения имеют тип `Date`. +- [DateTime](../../sql_reference/aggregate_functions/reference.md), если входные значения имеют тип `DateTime`. **Пример** @@ -968,7 +968,7 @@ quantileDeterministic(level)(expr, determinator) **Параметры** - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../data_types/index.md#data_types) или типов [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md). +- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../sql_reference/aggregate_functions/reference.md#data_types) или типов [Date](../../sql_reference/aggregate_functions/reference.md), [DateTime](../../sql_reference/aggregate_functions/reference.md). - `determinator` — Число, хэш которого используется при сэмплировании в алгоритме reservoir sampling, чтобы сделать результат детерминированным. В качестве детерминатора можно использовать любое определённое положительное число, например, идентификатор пользователя или события. Если одно и то же значение детерминатора попадается в выборке слишком часто, то функция выдаёт некорректный результат. **Возвращаемое значение** @@ -977,9 +977,9 @@ quantileDeterministic(level)(expr, determinator) Тип: -- [Float64](../../data_types/float.md) для входных данных числового типа. -- [Date](../../data_types/date.md) если входные значения имеют тип `Date`. -- [DateTime](../../data_types/datetime.md) если входные значения имеют тип `DateTime`. +- [Float64](../../sql_reference/aggregate_functions/reference.md) для входных данных числового типа. +- [Date](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `Date`. +- [DateTime](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `DateTime`. **Пример** @@ -1032,7 +1032,7 @@ quantileExact(level)(expr) **Параметры** - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../data_types/index.md#data_types) или типов [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md). +- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../sql_reference/aggregate_functions/reference.md#data_types) или типов [Date](../../sql_reference/aggregate_functions/reference.md), [DateTime](../../sql_reference/aggregate_functions/reference.md). **Возвращаемое значение** @@ -1040,9 +1040,9 @@ quantileExact(level)(expr) Тип: -- [Float64](../../data_types/float.md) для входных данных числового типа. -- [Date](../../data_types/date.md) если входные значения имеют тип `Date`. -- [DateTime](../../data_types/datetime.md) если входные значения имеют тип `DateTime`. +- [Float64](../../sql_reference/aggregate_functions/reference.md) для входных данных числового типа. +- [Date](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `Date`. +- [DateTime](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `DateTime`. **Пример** @@ -1084,7 +1084,7 @@ quantileExactWeighted(level)(expr, weight) **Параметры** - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../data_types/index.md#data_types) или типов [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md). +- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../sql_reference/aggregate_functions/reference.md#data_types) или типов [Date](../../sql_reference/aggregate_functions/reference.md), [DateTime](../../sql_reference/aggregate_functions/reference.md). - `weight` — Столбец с весам элементов последовательности. Вес — это количество повторений элемента в последовательности. **Возвращаемое значение** @@ -1093,9 +1093,9 @@ quantileExactWeighted(level)(expr, weight) Тип: -- [Float64](../../data_types/float.md) для входных данных числового типа. -- [Date](../../data_types/date.md) если входные значения имеют тип `Date`. -- [DateTime](../../data_types/datetime.md) если входные значения имеют тип `DateTime`. +- [Float64](../../sql_reference/aggregate_functions/reference.md) для входных данных числового типа. +- [Date](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `Date`. +- [DateTime](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `DateTime`. **Пример** @@ -1149,7 +1149,7 @@ quantileTiming(level)(expr) - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — [Выражение](../syntax.md#syntax-expressions) над значения столбца, которые возвращают данные типа [Float\*](../../data_types/float.md). +- `expr` — [Выражение](../syntax.md#syntax-expressions) над значения столбца, которые возвращают данные типа [Float\*](../../sql_reference/aggregate_functions/reference.md). - Если в функцию передать отрицательные значения, то её поведение не определено. - Если значение больше, чем 30 000 (например, время загрузки страницы превышает 30 секунд), то оно приравнивается к 30 000. @@ -1173,7 +1173,7 @@ quantileTiming(level)(expr) Тип: `Float32`. !!! note "Примечания" - Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../data_types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../select.md#select-order-by). + Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../sql_reference/aggregate_functions/reference.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../sql_reference/statements/select.md#select-order-by). **Пример** @@ -1232,7 +1232,7 @@ quantileTimingWeighted(level)(expr, weight) - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — [Выражение](../syntax.md#syntax-expressions) над значения столбца, которые возвращают данные типа [Float\*](../../data_types/float.md). +- `expr` — [Выражение](../syntax.md#syntax-expressions) над значения столбца, которые возвращают данные типа [Float\*](../../sql_reference/aggregate_functions/reference.md). - Если в функцию передать отрицательные значения, то её поведение не определено. - Если значение больше, чем 30 000 (например, время загрузки страницы превышает 30 секунд), то оно приравнивается к 30 000. @@ -1258,7 +1258,7 @@ quantileTimingWeighted(level)(expr, weight) Тип: `Float32`. !!! note "Примечания" - Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../data_types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../select.md#select-order-by). + Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../sql_reference/aggregate_functions/reference.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../sql_reference/statements/select.md#select-order-by). **Пример** @@ -1315,7 +1315,7 @@ quantileTDigest(level)(expr) **Параметры** - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../data_types/index.md#data_types) или типов [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md). +- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../sql_reference/aggregate_functions/reference.md#data_types) или типов [Date](../../sql_reference/aggregate_functions/reference.md), [DateTime](../../sql_reference/aggregate_functions/reference.md). **Возвращаемое значение** @@ -1323,9 +1323,9 @@ quantileTDigest(level)(expr) Тип: -- [Float64](../../data_types/float.md) для входных данных числового типа. -- [Date](../../data_types/date.md) если входные значения имеют тип `Date`. -- [DateTime](../../data_types/datetime.md) если входные значения имеют тип `DateTime`. +- [Float64](../../sql_reference/aggregate_functions/reference.md) для входных данных числового типа. +- [Date](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `Date`. +- [DateTime](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `DateTime`. **Пример** @@ -1369,7 +1369,7 @@ quantileTDigestWeighted(level)(expr, weight) **Параметры** - `level` — Уровень квантили. Опционально. Константное значение с плавающей запятой от 0 до 1. Мы рекомендуем использовать значение `level` из диапазона `[0.01, 0.99]`. Значение по умолчанию: 0.5. При `level=0.5` функция вычисляет [медиану](https://ru.wikipedia.org/wiki/Медиана_(статистика)). -- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../data_types/index.md#data_types) или типов [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md). +- `expr` — Выражение над значениями столбца, которое возвращает данные [числовых типов](../../sql_reference/aggregate_functions/reference.md#data_types) или типов [Date](../../sql_reference/aggregate_functions/reference.md), [DateTime](../../sql_reference/aggregate_functions/reference.md). - `weight` — Столбец с весам элементов последовательности. Вес — это количество повторений элемента в последовательности. **Возвращаемое значение** @@ -1378,9 +1378,9 @@ quantileTDigestWeighted(level)(expr, weight) Тип: -- [Float64](../../data_types/float.md) для входных данных числового типа. -- [Date](../../data_types/date.md) если входные значения имеют тип `Date`. -- [DateTime](../../data_types/datetime.md) если входные значения имеют тип `DateTime`. +- [Float64](../../sql_reference/aggregate_functions/reference.md) для входных данных числового типа. +- [Date](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `Date`. +- [DateTime](../../sql_reference/aggregate_functions/reference.md) если входные значения имеют тип `DateTime`. **Пример** @@ -1522,7 +1522,7 @@ topKWeighted(N)(x, weight) **Аргументы** - `x` – значение. -- `weight` — вес. [UInt8](../../data_types/int_uint.md). +- `weight` — вес. [UInt8](../../sql_reference/aggregate_functions/reference.md). **Возвращаемое значение** diff --git a/docs/ru/data_types/nested_data_structures/aggregatefunction.md b/docs/ru/sql_reference/data_types/aggregatefunction.md similarity index 87% rename from docs/ru/data_types/nested_data_structures/aggregatefunction.md rename to docs/ru/sql_reference/data_types/aggregatefunction.md index 641d8aa8386..d7ee1211845 100644 --- a/docs/ru/data_types/nested_data_structures/aggregatefunction.md +++ b/docs/ru/sql_reference/data_types/aggregatefunction.md @@ -23,7 +23,7 @@ CREATE TABLE t ) ENGINE = ... ``` -[uniq](../../query_language/agg_functions/reference.md#agg_function-uniq), anyIf ([any](../../query_language/agg_functions/reference.md#agg_function-any)+[If](../../query_language/agg_functions/combinators.md#agg-functions-combinator-if)) и [quantiles](../../query_language/agg_functions/reference.md) — агрегатные функции, поддержанные в ClickHouse. +[uniq](../../sql_reference/data_types/aggregatefunction.md#agg_function-uniq), anyIf ([any](../../sql_reference/data_types/aggregatefunction.md#agg_function-any)+[If](../../sql_reference/data_types/aggregatefunction.md#agg-functions-combinator-if)) и [quantiles](../../sql_reference/data_types/aggregatefunction.md) — агрегатные функции, поддержанные в ClickHouse. ## Особенности использования {#osobennosti-ispolzovaniia} @@ -58,6 +58,6 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP ## Пример использования {#primer-ispolzovaniia} -Смотрите в описании движка [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md). +Смотрите в описании движка [AggregatingMergeTree](../../sql_reference/data_types/aggregatefunction.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/nested_data_structures/aggregatefunction/) diff --git a/docs/ru/data_types/array.md b/docs/ru/sql_reference/data_types/array.md similarity index 92% rename from docs/ru/data_types/array.md rename to docs/ru/sql_reference/data_types/array.md index 0fa13d54cae..acc2e4940d0 100644 --- a/docs/ru/data_types/array.md +++ b/docs/ru/sql_reference/data_types/array.md @@ -42,7 +42,7 @@ SELECT [1, 2] AS x, toTypeName(x) ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh} -При создании массива «на лету» ClickHouse автоматически определяет тип аргументов как наиболее узкий тип данных, в котором можно хранить все перечисленные аргументы. Если среди аргументов есть [NULL](../query_language/syntax.md#null-literal) или аргумент типа [Nullable](nullable.md#data_type-nullable), то тип элементов массива — [Nullable](nullable.md). +При создании массива «на лету» ClickHouse автоматически определяет тип аргументов как наиболее узкий тип данных, в котором можно хранить все перечисленные аргументы. Если среди аргументов есть [NULL](../../sql_reference/data_types/array.md#null-literal) или аргумент типа [Nullable](nullable.md#data_type-nullable), то тип элементов массива — [Nullable](nullable.md). Если ClickHouse не смог подобрать тип данных, то он сгенерирует исключение. Это произойдёт, например, при попытке создать массив одновременно со строками и числами `SELECT array(1, 'a')`. diff --git a/docs/ru/data_types/boolean.md b/docs/ru/sql_reference/data_types/boolean.md similarity index 100% rename from docs/ru/data_types/boolean.md rename to docs/ru/sql_reference/data_types/boolean.md diff --git a/docs/ru/data_types/date.md b/docs/ru/sql_reference/data_types/date.md similarity index 100% rename from docs/ru/data_types/date.md rename to docs/ru/sql_reference/data_types/date.md diff --git a/docs/ru/data_types/datetime.md b/docs/ru/sql_reference/data_types/datetime.md similarity index 86% rename from docs/ru/data_types/datetime.md rename to docs/ru/sql_reference/data_types/datetime.md index 957ffe717a3..e52ca549907 100644 --- a/docs/ru/data_types/datetime.md +++ b/docs/ru/sql_reference/data_types/datetime.md @@ -18,13 +18,13 @@ DateTime([timezone]) Список поддерживаемых временных зон можно найти в [IANA Time Zone Database](https://www.iana.org/time-zones). Пакет `tzdata`, содержащий [базу данных часовых поясов IANA](https://www.iana.org/time-zones), должен быть установлен в системе. Используйте команду `timedatectl list-timezones` для получения списка часовых поясов, известных локальной системе. -Часовой пояс для столбца типа `DateTime` можно в явном виде установить при создании таблицы. Если часовой пояс не установлен, то ClickHouse использует значение параметра [timezone](../operations/server_settings/settings.md#server_settings-timezone), установленное в конфигурации сервера или в настройках операционной системы на момент запуска сервера. +Часовой пояс для столбца типа `DateTime` можно в явном виде установить при создании таблицы. Если часовой пояс не установлен, то ClickHouse использует значение параметра [timezone](../../sql_reference/data_types/datetime.md#server_configuration_parameters-timezone), установленное в конфигурации сервера или в настройках операционной системы на момент запуска сервера. -Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../interfaces/cli.md) с параметром `--use_client_time_zone`. +Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../query_language/functions/date_time_functions.md#formatdatetime). +ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql_reference/data_types/datetime.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date\_time\_input\_format](../operations/settings/settings.md#settings-date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date\_time\_input\_format](../../operations/settings/settings.md#settings-date_time_input_format). ## Примеры {#primery} @@ -111,12 +111,12 @@ FROM dt ## See Also {#see-also} -- [Функции преобразования типов](../query_language/functions/type_conversion_functions.md) -- [Функции для работы с датой и временем](../query_language/functions/date_time_functions.md) -- [Функции для работы с массивами](../query_language/functions/array_functions.md) -- [Настройка `date_time_input_format`](../operations/settings/settings.md#settings-date_time_input_format) -- [Конфигурационный параметр сервера `timezone`](../operations/server_settings/settings.md#server_settings-timezone) -- [Операторы для работы с датой и временем](../query_language/operators.md#operators-datetime) +- [Функции преобразования типов](../../sql_reference/data_types/datetime.md) +- [Функции для работы с датой и временем](../../sql_reference/data_types/datetime.md) +- [Функции для работы с массивами](../../sql_reference/data_types/datetime.md) +- [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) +- [Конфигурационный параметр сервера `timezone`](../../sql_reference/data_types/datetime.md#server_configuration_parameters-timezone) +- [Операторы для работы с датой и временем](../../sql_reference/data_types/datetime.md#operators-datetime) - [Тип данных `Date`](date.md) [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/datetime/) diff --git a/docs/ru/data_types/datetime64.md b/docs/ru/sql_reference/data_types/datetime64.md similarity index 92% rename from docs/ru/data_types/datetime64.md rename to docs/ru/sql_reference/data_types/datetime64.md index 8e3277dd52f..9e126885058 100644 --- a/docs/ru/data_types/datetime64.md +++ b/docs/ru/sql_reference/data_types/datetime64.md @@ -87,11 +87,11 @@ FROM dt ## See Also {#see-also} -- [Функции преобразования типов](../query_language/functions/type_conversion_functions.md) -- [Функции для работы с датой и временем](../query_language/functions/date_time_functions.md) -- [Функции для работы с массивами](../query_language/functions/array_functions.md) -- [Настройка `date_time_input_format`](../operations/settings/settings.md#settings-date_time_input_format) -- [Конфигурационный параметр сервера `timezone`](../operations/server_settings/settings.md#server_settings-timezone) -- [Операторы для работы с датой и временем](../query_language/operators.md#operators-datetime) +- [Функции преобразования типов](../../sql_reference/data_types/datetime64.md) +- [Функции для работы с датой и временем](../../sql_reference/data_types/datetime64.md) +- [Функции для работы с массивами](../../sql_reference/data_types/datetime64.md) +- [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) +- [Конфигурационный параметр сервера `timezone`](../../sql_reference/data_types/datetime64.md#server_configuration_parameters-timezone) +- [Операторы для работы с датой и временем](../../sql_reference/data_types/datetime64.md#operators-datetime) - [Тип данных `Date`](date.md) - [Тип данных `DateTime`](datetime.md) diff --git a/docs/ru/data_types/decimal.md b/docs/ru/sql_reference/data_types/decimal.md similarity index 100% rename from docs/ru/data_types/decimal.md rename to docs/ru/sql_reference/data_types/decimal.md diff --git a/docs/ru/sql_reference/data_types/domains/index.md b/docs/ru/sql_reference/data_types/domains/index.md new file mode 100644 index 00000000000..d4496cf8d5b --- /dev/null +++ b/docs/ru/sql_reference/data_types/domains/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Domains +toc_priority: 56 +--- + diff --git a/docs/ru/data_types/domains/ipv4.md b/docs/ru/sql_reference/data_types/domains/ipv4.md similarity index 100% rename from docs/ru/data_types/domains/ipv4.md rename to docs/ru/sql_reference/data_types/domains/ipv4.md diff --git a/docs/ru/data_types/domains/ipv6.md b/docs/ru/sql_reference/data_types/domains/ipv6.md similarity index 100% rename from docs/ru/data_types/domains/ipv6.md rename to docs/ru/sql_reference/data_types/domains/ipv6.md diff --git a/docs/ru/data_types/domains/overview.md b/docs/ru/sql_reference/data_types/domains/overview.md similarity index 100% rename from docs/ru/data_types/domains/overview.md rename to docs/ru/sql_reference/data_types/domains/overview.md diff --git a/docs/ru/data_types/enum.md b/docs/ru/sql_reference/data_types/enum.md similarity index 99% rename from docs/ru/data_types/enum.md rename to docs/ru/sql_reference/data_types/enum.md index 2ee7c77028b..58f2a4b188e 100644 --- a/docs/ru/data_types/enum.md +++ b/docs/ru/sql_reference/data_types/enum.md @@ -86,7 +86,7 @@ SELECT toTypeName(CAST('a', 'Enum(\'a\' = 1, \'b\' = 2)')) Для каждого из значений прописывается число в диапазоне `-128 .. 127` для `Enum8` или в диапазоне `-32768 .. 32767` для `Enum16`. Все строки должны быть разными, числа - тоже. Разрешена пустая строка. При указании такого типа (в определении таблицы), числа могут идти не подряд и в произвольном порядке. При этом, порядок не имеет значения. -Ни строка, ни цифровое значение в `Enum` не могут быть [NULL](../query_language/syntax.md). +Ни строка, ни цифровое значение в `Enum` не могут быть [NULL](../../sql_reference/syntax.md#null-literal). `Enum` может быть передан в тип [Nullable](nullable.md). Таким образом, если создать таблицу запросом diff --git a/docs/ru/data_types/fixedstring.md b/docs/ru/sql_reference/data_types/fixedstring.md similarity index 89% rename from docs/ru/data_types/fixedstring.md rename to docs/ru/sql_reference/data_types/fixedstring.md index ba91fcde9b3..7e2fdd5d525 100644 --- a/docs/ru/data_types/fixedstring.md +++ b/docs/ru/sql_reference/data_types/fixedstring.md @@ -51,6 +51,6 @@ WHERE a = 'b\0' Это поведение отличается от поведения MySQL для типа `CHAR`, где строки дополняются пробелами, а пробелы перед выводом вырезаются. -Обратите внимание, что длина значения `FixedString(N)` постоянна. Функция [length](../query_language/functions/array_functions.md#array_functions-length) возвращает `N` даже если значение `FixedString(N)` заполнено только нулевыми байтами, однако функция [empty](../query_language/functions/string_functions.md#empty) в этом же случае возвращает `1`. +Обратите внимание, что длина значения `FixedString(N)` постоянна. Функция [length](../../sql_reference/data_types/fixedstring.md#array_functions-length) возвращает `N` даже если значение `FixedString(N)` заполнено только нулевыми байтами, однако функция [empty](../../sql_reference/data_types/fixedstring.md#empty) в этом же случае возвращает `1`. [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/fixedstring/) diff --git a/docs/ru/data_types/float.md b/docs/ru/sql_reference/data_types/float.md similarity index 97% rename from docs/ru/data_types/float.md rename to docs/ru/sql_reference/data_types/float.md index 20eab345573..91d4b655e2a 100644 --- a/docs/ru/data_types/float.md +++ b/docs/ru/sql_reference/data_types/float.md @@ -75,6 +75,6 @@ SELECT 0 / 0 └──────────────┘ ``` - Смотрите правила сортировки `NaN` в разделе [Секция ORDER BY](../query_language/select.md). + Смотрите правила сортировки `NaN` в разделе [Секция ORDER BY](../sql_reference/data_types/float.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/float/) diff --git a/docs/ru/data_types/index.md b/docs/ru/sql_reference/data_types/index.md similarity index 100% rename from docs/ru/data_types/index.md rename to docs/ru/sql_reference/data_types/index.md diff --git a/docs/ru/data_types/int_uint.md b/docs/ru/sql_reference/data_types/int_uint.md similarity index 100% rename from docs/ru/data_types/int_uint.md rename to docs/ru/sql_reference/data_types/int_uint.md diff --git a/docs/ru/data_types/nested_data_structures/index.md b/docs/ru/sql_reference/data_types/nested_data_structures/index.md similarity index 100% rename from docs/ru/data_types/nested_data_structures/index.md rename to docs/ru/sql_reference/data_types/nested_data_structures/index.md diff --git a/docs/ru/data_types/nested_data_structures/nested.md b/docs/ru/sql_reference/data_types/nested_data_structures/nested.md similarity index 100% rename from docs/ru/data_types/nested_data_structures/nested.md rename to docs/ru/sql_reference/data_types/nested_data_structures/nested.md diff --git a/docs/ru/data_types/nullable.md b/docs/ru/sql_reference/data_types/nullable.md similarity index 83% rename from docs/ru/data_types/nullable.md rename to docs/ru/sql_reference/data_types/nullable.md index 1ee6777254b..dfbd0fb0512 100644 --- a/docs/ru/data_types/nullable.md +++ b/docs/ru/sql_reference/data_types/nullable.md @@ -1,6 +1,6 @@ # Nullable(TypeName) {#data_type-nullable} -Позволяет работать как со значением типа `TypeName` так и с отсутствием этого значения ([NULL](../query_language/syntax.md)) в одной и той же переменной, в том числе хранить `NULL` в таблицах вместе со значения типа `TypeName`. Например, в столбце типа `Nullable(Int8)` можно хранить значения типа `Int8`, а в тех строках, где значения нет, будет храниться `NULL`. +Позволяет работать как со значением типа `TypeName` так и с отсутствием этого значения ([NULL](../../sql_reference/data_types/nullable.md)) в одной и той же переменной, в том числе хранить `NULL` в таблицах вместе со значения типа `TypeName`. Например, в столбце типа `Nullable(Int8)` можно хранить значения типа `Int8`, а в тех строках, где значения нет, будет храниться `NULL`. В качестве `TypeName` нельзя использовать составные типы данных [Array](array.md#data_type-array) и [Tuple](tuple.md). Составные типы данных могут содержать значения типа `Nullable`, например `Array(Nullable(Int8))`. diff --git a/docs/ru/data_types/special_data_types/expression.md b/docs/ru/sql_reference/data_types/special_data_types/expression.md similarity index 100% rename from docs/ru/data_types/special_data_types/expression.md rename to docs/ru/sql_reference/data_types/special_data_types/expression.md diff --git a/docs/ru/data_types/special_data_types/index.md b/docs/ru/sql_reference/data_types/special_data_types/index.md similarity index 100% rename from docs/ru/data_types/special_data_types/index.md rename to docs/ru/sql_reference/data_types/special_data_types/index.md diff --git a/docs/ru/data_types/special_data_types/interval.md b/docs/ru/sql_reference/data_types/special_data_types/interval.md similarity index 84% rename from docs/ru/data_types/special_data_types/interval.md rename to docs/ru/sql_reference/data_types/special_data_types/interval.md index 22912bdbca1..1721b8631ad 100644 --- a/docs/ru/data_types/special_data_types/interval.md +++ b/docs/ru/sql_reference/data_types/special_data_types/interval.md @@ -1,6 +1,6 @@ # Interval {#data-type-interval} -Семейство типов данных, представляющих интервалы дат и времени. Оператор [INTERVAL](../../query_language/operators.md#operator-interval) возвращает значения этих типов. +Семейство типов данных, представляющих интервалы дат и времени. Оператор [INTERVAL](../../../sql_reference/data_types/special_data_types/interval.md#operator-interval) возвращает значения этих типов. !!! warning "Внимание" Нельзя использовать типы данных `Interval` для хранения данных в таблице. @@ -35,7 +35,7 @@ SELECT toTypeName(INTERVAL 4 DAY) ## Использование {#data-type-interval-usage-remarks} -Значения типов `Interval` можно использовать в арифметических операциях со значениями типов [Date](../../data_types/date.md) и [DateTime](../../data_types/datetime.md). Например, можно добавить 4 дня к текущей дате: +Значения типов `Interval` можно использовать в арифметических операциях со значениями типов [Date](../../../sql_reference/data_types/special_data_types/interval.md) и [DateTime](../../../sql_reference/data_types/special_data_types/interval.md). Например, можно добавить 4 дня к текущей дате: ``` sql SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY @@ -74,5 +74,5 @@ Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argu ## Смотрите также {#smotrite-takzhe} -- Оператор[INTERVAL](../../query_language/operators.md#operator-interval) -- Функция приведения типа [toInterval](../../query_language/functions/type_conversion_functions.md#function-tointerval) +- Оператор[INTERVAL](../../../sql_reference/data_types/special_data_types/interval.md#operator-interval) +- Функция приведения типа [toInterval](../../../sql_reference/data_types/special_data_types/interval.md#function-tointerval) diff --git a/docs/ru/data_types/special_data_types/nothing.md b/docs/ru/sql_reference/data_types/special_data_types/nothing.md similarity index 63% rename from docs/ru/data_types/special_data_types/nothing.md rename to docs/ru/sql_reference/data_types/special_data_types/nothing.md index ab4b96cc519..c23de847077 100644 --- a/docs/ru/data_types/special_data_types/nothing.md +++ b/docs/ru/sql_reference/data_types/special_data_types/nothing.md @@ -1,8 +1,8 @@ # Nothing {#nothing} -Этот тип данных предназначен только для того, чтобы представлять [NULL](../../query_language/syntax.md), т.е. отсутствие значения. +Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql_reference/data_types/special_data_types/nothing.md), т.е. отсутствие значения. -Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../data_types/nullable.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов: +Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql_reference/data_types/special_data_types/nothing.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов: ``` sql SELECT toTypeName(Array()) diff --git a/docs/ru/data_types/special_data_types/set.md b/docs/ru/sql_reference/data_types/special_data_types/set.md similarity index 100% rename from docs/ru/data_types/special_data_types/set.md rename to docs/ru/sql_reference/data_types/special_data_types/set.md diff --git a/docs/ru/data_types/string.md b/docs/ru/sql_reference/data_types/string.md similarity index 100% rename from docs/ru/data_types/string.md rename to docs/ru/sql_reference/data_types/string.md diff --git a/docs/ru/data_types/tuple.md b/docs/ru/sql_reference/data_types/tuple.md similarity index 87% rename from docs/ru/data_types/tuple.md rename to docs/ru/sql_reference/data_types/tuple.md index 17732d04953..e8f5f335278 100644 --- a/docs/ru/data_types/tuple.md +++ b/docs/ru/sql_reference/data_types/tuple.md @@ -2,7 +2,7 @@ Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов. -Кортежи используются для временной группировки столбцов. Столбцы могут группироваться при использовании выражения IN в запросе, а также для указания нескольких формальных параметров лямбда-функций. Подробнее смотрите разделы [Операторы IN](../query_language/select.md), [Функции высшего порядка](../query_language/functions/higher_order_functions.md#higher_order_functions). +Кортежи используются для временной группировки столбцов. Столбцы могут группироваться при использовании выражения IN в запросе, а также для указания нескольких формальных параметров лямбда-функций. Подробнее смотрите разделы [Операторы IN](../../sql_reference/data_types/tuple.md), [Функции высшего порядка](../../sql_reference/data_types/tuple.md#higher_order_functions). Кортежи могут быть результатом запроса. В этом случае, в текстовых форматах кроме JSON, значения выводятся в круглых скобках через запятую. В форматах JSON, кортежи выводятся в виде массивов (в квадратных скобках). @@ -28,7 +28,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x) ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh} -При создании кортежа «на лету» ClickHouse автоматически определяет тип каждого аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../query_language/syntax.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md). +При создании кортежа «на лету» ClickHouse автоматически определяет тип каждого аргументов как минимальный из типов, который может сохранить значение аргумента. Если аргумент — [NULL](../../sql_reference/data_types/tuple.md#null-literal), то тип элемента кортежа — [Nullable](nullable.md). Пример автоматического определения типа данных: diff --git a/docs/ru/data_types/uuid.md b/docs/ru/sql_reference/data_types/uuid.md similarity index 82% rename from docs/ru/data_types/uuid.md rename to docs/ru/sql_reference/data_types/uuid.md index 24f43cc5d24..d62ec22eecb 100644 --- a/docs/ru/data_types/uuid.md +++ b/docs/ru/sql_reference/data_types/uuid.md @@ -16,7 +16,7 @@ ## Как сгенерировать UUID {#kak-sgenerirovat-uuid} -Для генерации UUID-значений предназначена функция [generateUUIDv4](../query_language/functions/uuid_functions.md). +Для генерации UUID-значений предназначена функция [generateUUIDv4](../../sql_reference/data_types/uuid.md). ## Примеры использования {#primery-ispolzovaniia} @@ -65,8 +65,8 @@ SELECT * FROM t_uuid ## Ограничения {#ogranicheniia} -Тип данных UUID можно использовать только с функциями, которые поддерживаются типом данных [String](string.md) (например, [min](../query_language/agg_functions/reference.md#agg_function-min), [max](../query_language/agg_functions/reference.md#agg_function-max), и [count](../query_language/agg_functions/reference.md#agg_function-count)). +Тип данных UUID можно использовать только с функциями, которые поддерживаются типом данных [String](string.md) (например, [min](../../sql_reference/data_types/uuid.md#agg_function-min), [max](../../sql_reference/data_types/uuid.md#agg_function-max), и [count](../../sql_reference/data_types/uuid.md#agg_function-count)). -Тип данных UUID не поддерживается арифметическими операциями (например, [abs](../query_language/functions/arithmetic_functions.md#arithm_func-abs)) или агрегатными функциями, такими как [sum](../query_language/agg_functions/reference.md#agg_function-sum) и [avg](../query_language/agg_functions/reference.md#agg_function-avg). +Тип данных UUID не поддерживается арифметическими операциями (например, [abs](../../sql_reference/data_types/uuid.md#arithm_func-abs)) или агрегатными функциями, такими как [sum](../../sql_reference/data_types/uuid.md#agg_function-sum) и [avg](../../sql_reference/data_types/uuid.md#agg_function-avg). [Original article](https://clickhouse.tech/docs/en/data_types/uuid/) diff --git a/docs/ru/query_language/dicts/external_dicts.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts.md similarity index 79% rename from docs/ru/query_language/dicts/external_dicts.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts.md index 586e443e3a9..4929e571758 100644 --- a/docs/ru/query_language/dicts/external_dicts.md +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts.md @@ -5,11 +5,11 @@ ClickHouse: - Полностью или частично хранит словари в оперативной памяти. - Периодически обновляет их и динамически подгружает отсутствующие значения. -- Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../create.md#create-dictionary-query). +- Позволяет создавать внешние словари с помощью xml-файлов или [DDL-запросов](../../../sql_reference/statements/create.md#create-dictionary-query). -Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries\_config](../../operations/server_settings/settings.md). +Конфигурация внешних словарей может находится в одном или нескольких xml-файлах. Путь к конфигурации указывается в параметре [dictionaries\_config](../../../sql_reference/dictionaries/external_dictionaries/external_dicts.md). -Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries\_lazy\_load](../../operations/server_settings/settings.md). +Словари могут загружаться при старте сервера или при первом использовании, в зависимости от настройки [dictionaries\_lazy\_load](../../../sql_reference/dictionaries/external_dictionaries/external_dicts.md). Конфигурационный файл словарей имеет вид: @@ -35,10 +35,10 @@ ClickHouse: В одном файле можно [сконфигурировать](external_dicts_dict.md) произвольное количество словарей. -Если вы создаёте внешние словари [DDL-запросами](../create.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера. +Если вы создаёте внешние словари [DDL-запросами](../../../sql_reference/statements/create.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера. !!! attention "Внимание" - Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../functions/other_functions.md)). Эта функциональность не связана с внешними словарями. + Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql_reference/dictionaries/external_dictionaries/external_dicts.md)). Эта функциональность не связана с внешними словарями. ## Смотрите также {#ext-dicts-see-also} @@ -47,6 +47,6 @@ ClickHouse: - [Обновление словарей](external_dicts_dict_lifetime.md) - [Источники внешних словарей](external_dicts_dict_sources.md) - [Ключ и поля словаря](external_dicts_dict_structure.md) -- [Функции для работы с внешними словарями](../functions/ext_dict_functions.md#ext_dict_functions) +- [Функции для работы с внешними словарями](../../../sql_reference/dictionaries/external_dictionaries/external_dicts.md#ext_dict_functions) [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/external_dicts/) diff --git a/docs/ru/query_language/dicts/external_dicts_dict.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md similarity index 91% rename from docs/ru/query_language/dicts/external_dicts_dict.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md index dcb8c6652b0..2e3068882bf 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict.md +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md @@ -24,7 +24,7 @@ XML-конфигурация словаря имеет следующую стр ``` -Соответствующий [DDL-запрос](../create.md#create-dictionary-query) имеет следующий вид: +Соответствующий [DDL-запрос](../../../sql_reference/statements/create.md#create-dictionary-query) имеет следующий вид: ``` sql CREATE DICTIONARY dict_name diff --git a/docs/ru/query_language/dicts/external_dicts_dict_hierarchical.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md similarity index 90% rename from docs/ru/query_language/dicts/external_dicts_dict_hierarchical.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md index 335b882a97c..ef9b79c0444 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_hierarchical.md +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md @@ -32,7 +32,7 @@ ClickHouse поддерживает иерархические словари с ClickHouse поддерживает свойство [hierarchical](external_dicts_dict_structure.md#hierarchical-dict-attr) для атрибутов [внешнего словаря](index.md). Это свойство позволяет конфигурировать словари, подобные описанному выше. -С помощью функции [dictGetHierarchy](../functions/ext_dict_functions.md#dictgethierarchy) можно получить цепочку предков элемента. +С помощью функции [dictGetHierarchy](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md#dictgethierarchy) можно получить цепочку предков элемента. Структура словаря для нашего примера может выглядеть следующим образом: diff --git a/docs/ru/query_language/dicts/external_dicts_dict_layout.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md similarity index 99% rename from docs/ru/query_language/dicts/external_dicts_dict_layout.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md index 458593e82aa..898fe45b15a 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_layout.md +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md @@ -34,7 +34,7 @@ ``` -Соответствущий [DDL-запрос](../create.md#create-dictionary-query): +Соответствущий [DDL-запрос](../../../sql_reference/statements/create.md#create-dictionary-query): ``` sql CREATE DICTIONARY (...) diff --git a/docs/ru/query_language/dicts/external_dicts_dict_lifetime.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md similarity index 100% rename from docs/ru/query_language/dicts/external_dicts_dict_lifetime.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md diff --git a/docs/ru/query_language/dicts/external_dicts_dict_sources.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md similarity index 96% rename from docs/ru/query_language/dicts/external_dicts_dict_sources.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md index 8b9961ee7fc..32115e703f4 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md @@ -19,7 +19,7 @@ ``` -Аналогичный [DDL-запрос](../create.md#create-dictionary-query): +Аналогичный [DDL-запрос](../../../sql_reference/statements/create.md#create-dictionary-query): ``` sql CREATE DICTIONARY dict_name (...) @@ -64,7 +64,7 @@ SOURCE(FILE(path '/opt/dictionaries/os.tsv' format 'TabSeparated')) Поля настройки: - `path` — Абсолютный путь к файлу. -- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../interfaces/formats.md#formats)». +- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)». ## Исполняемый файл {#dicts-external_dicts_dict_sources-executable} @@ -90,7 +90,7 @@ SOURCE(EXECUTABLE(command 'cat /opt/dictionaries/os.tsv' format 'TabSeparated')) Поля настройки: - `command` — Абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`). -- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../interfaces/formats.md#formats)». +- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)». ## HTTP(s) {#dicts-external_dicts_dict_sources-http} @@ -128,12 +128,12 @@ SOURCE(HTTP( )) ``` -Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо [настроить openSSL](../../operations/server_settings/settings.md) в конфигурации сервера. +Чтобы ClickHouse смог обратиться к HTTPS-ресурсу, необходимо [настроить openSSL](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) в конфигурации сервера. Поля настройки: - `url` — URL источника. -- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../interfaces/formats.md#formats)». +- `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)». ## ODBC {#dicts-external_dicts_dict_sources-odbc} @@ -172,7 +172,7 @@ SOURCE(ODBC( ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных. -Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../faq/general.md#oracle-odbc-encodings). +Если у вас есть проблемы с кодировками при использовании Oracle, ознакомьтесь с соответствующим разделом [FAQ](../../../faq/general.md#oracle-odbc-encodings). ### Выявленная уязвимость в функционировании ODBC словарей {#vyiavlennaia-uiazvimost-v-funktsionirovanii-odbc-slovarei} @@ -509,7 +509,7 @@ SOURCE(CLICKHOUSE( Поля настройки: -- `host` — хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа [Distributed](../../operations/table_engines/distributed.md) и прописать её в дальнейших настройках. +- `host` — хост ClickHouse. Если host локальный, то запрос выполняется без сетевого взаимодействия. Чтобы повысить отказоустойчивость решения, можно создать таблицу типа [Distributed](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) и прописать её в дальнейших настройках. - `port` — порт сервера ClickHouse. - `user` — имя пользователя ClickHouse. - `password` — пароль пользователя ClickHouse. diff --git a/docs/ru/query_language/dicts/external_dicts_dict_structure.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md similarity index 90% rename from docs/ru/query_language/dicts/external_dicts_dict_structure.md rename to docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md index 7fa762c063a..f83b7657b54 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_structure.md +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md @@ -154,14 +154,14 @@ CREATE DICTIONARY somename ( | Тег | Описание | Обязательный | |------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | `name` | Имя столбца. | Да | -| `type` | Тип данных ClickHouse.
ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. [Nullable](../../data_types/nullable.md) не поддерживается. | Да | +| `type` | Тип данных ClickHouse.
ClickHouse пытается привести значение из словаря к заданному типу данных. Например, в случае MySQL, в таблице-источнике поле может быть `TEXT`, `VARCHAR`, `BLOB`, но загружено может быть как `String`. [Nullable](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md) не поддерживается. | Да | | `null_value` | Значение по умолчанию для несуществующего элемента.
В примере это пустая строка. Нельзя указать значение `NULL`. | Да | -| `expression` | [Выражение](../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.
Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.

Значение по умолчанию: нет выражения. | Нет | +| `expression` | [Выражение](../../syntax.md#syntax-expressions), которое ClickHouse выполняет со значением.
Выражением может быть имя столбца в удаленной SQL базе. Таким образом, вы можете использовать его для создания псевдонима удаленного столбца.

Значение по умолчанию: нет выражения. | Нет | | `hierarchical` | Если `true`, то атрибут содержит ключ предка для текущего элемента. Смотрите [Иерархические словари](external_dicts_dict_hierarchical.md).

Default value: `false`. | No | | `is_object_id` | Признак того, что запрос выполняется к документу MongoDB по `ObjectID`.

Значение по умолчанию: `false`. | Нет | ## Смотрите также {#smotrite-takzhe} -- [Функции для работы с внешними словарями](../functions/ext_dict_functions.md). +- [Функции для работы с внешними словарями](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/external_dicts_dict_structure/) diff --git a/docs/ru/sql_reference/dictionaries/external_dictionaries/index.md b/docs/ru/sql_reference/dictionaries/external_dictionaries/index.md new file mode 100644 index 00000000000..9af8b4f2f12 --- /dev/null +++ b/docs/ru/sql_reference/dictionaries/external_dictionaries/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: External Dictionaries +toc_priority: 37 +--- + diff --git a/docs/ru/query_language/dicts/index.md b/docs/ru/sql_reference/dictionaries/index.md similarity index 82% rename from docs/ru/query_language/dicts/index.md rename to docs/ru/sql_reference/dictionaries/index.md index e53b7f394d8..e876b92c9bf 100644 --- a/docs/ru/query_language/dicts/index.md +++ b/docs/ru/sql_reference/dictionaries/index.md @@ -8,7 +8,7 @@ ClickHouse поддерживает специальные функции для ClickHouse поддерживает: -- [Встроенные словари](internal_dicts.md#internal_dicts) со специфическим [набором функций](../functions/ym_dict_functions.md). -- [Подключаемые (внешние) словари](external_dicts.md) с [набором функций](../functions/ext_dict_functions.md). +- [Встроенные словари](internal_dicts.md#internal_dicts) со специфическим [набором функций](../../sql_reference/dictionaries/external_dictionaries/index.md). +- [Подключаемые (внешние) словари](external_dictionaries/external_dicts.md) с [набором функций](../../sql_reference/dictionaries/external_dictionaries/index.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/dicts/) diff --git a/docs/ru/query_language/dicts/internal_dicts.md b/docs/ru/sql_reference/dictionaries/internal_dicts.md similarity index 100% rename from docs/ru/query_language/dicts/internal_dicts.md rename to docs/ru/sql_reference/dictionaries/internal_dicts.md diff --git a/docs/ru/query_language/functions/arithmetic_functions.md b/docs/ru/sql_reference/functions/arithmetic_functions.md similarity index 100% rename from docs/ru/query_language/functions/arithmetic_functions.md rename to docs/ru/sql_reference/functions/arithmetic_functions.md diff --git a/docs/ru/query_language/functions/array_functions.md b/docs/ru/sql_reference/functions/array_functions.md similarity index 97% rename from docs/ru/query_language/functions/array_functions.md rename to docs/ru/sql_reference/functions/array_functions.md index ce757921bf5..36865f9aa79 100644 --- a/docs/ru/query_language/functions/array_functions.md +++ b/docs/ru/sql_reference/functions/array_functions.md @@ -55,7 +55,7 @@ arrayConcat(arrays) **Параметры** -- `arrays` – произвольное количество элементов типа [Array](../../data_types/array.md) +- `arrays` – произвольное количество элементов типа [Array](../../sql_reference/functions/array_functions.md) **Пример** @@ -359,7 +359,7 @@ arrayPushBack(array, single_value) **Параметры** - `array` - Массив. -- `single_value` - Одиночное значение. В массив с числам можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../data_types/index.md#data_types)». Может быть равно `NULL`. Функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`. +- `single_value` - Одиночное значение. В массив с числам можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql_reference/functions/array_functions.md#data_types)». Может быть равно `NULL`. Функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`. **Пример** @@ -384,7 +384,7 @@ arrayPushFront(array, single_value) **Параметры** - `array` - Массив. -- `single_value` - Одиночное значение. В массив с числам можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../data_types/index.md#data_types)». Может быть равно `NULL`. Функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`. +- `single_value` - Одиночное значение. В массив с числам можно добавить только числа, в массив со строками только строки. При добавлении чисел ClickHouse автоматически приводит тип `single_value` к типу данных массива. Подробнее о типах данных в ClickHouse читайте в разделе «[Типы данных](../../sql_reference/functions/array_functions.md#data_types)». Может быть равно `NULL`. Функция добавит элемент `NULL` в массив, а тип элементов массива преобразует в `Nullable`. **Пример** @@ -881,7 +881,7 @@ flatten(array_of_arrays) **Параметры** -- `array_of_arrays` — [Массивов](../../data_types/array.md) массивов. Например, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Массивов](../../sql_reference/functions/array_functions.md) массивов. Например, `[[1,2,3], [4,5]]`. **Примеры** @@ -907,7 +907,7 @@ arrayCompact(arr) **Параметры** -`arr` — [Массив](../../data_types/array.md) для обхода. +`arr` — [Массив](../../sql_reference/functions/array_functions.md) для обхода. **Возвращаемое значение** diff --git a/docs/ru/query_language/functions/array_join.md b/docs/ru/sql_reference/functions/array_join.md similarity index 100% rename from docs/ru/query_language/functions/array_join.md rename to docs/ru/sql_reference/functions/array_join.md diff --git a/docs/ru/query_language/functions/bit_functions.md b/docs/ru/sql_reference/functions/bit_functions.md similarity index 94% rename from docs/ru/query_language/functions/bit_functions.md rename to docs/ru/sql_reference/functions/bit_functions.md index 53efa9db06b..09eb9d17bc2 100644 --- a/docs/ru/query_language/functions/bit_functions.md +++ b/docs/ru/sql_reference/functions/bit_functions.md @@ -207,7 +207,7 @@ bitCount(x) **Параметры** -- `x` — [Целое число](../../data_types/int_uint.md) или [число с плавающей запятой](../../data_types/float.md). Функция использует представление числа в памяти, что позволяет поддержать числа с плавающей запятой. +- `x` — [Целое число](../../sql_reference/functions/bit_functions.md) или [число с плавающей запятой](../../sql_reference/functions/bit_functions.md). Функция использует представление числа в памяти, что позволяет поддержать числа с плавающей запятой. **Возвращаемое значение** diff --git a/docs/ru/query_language/functions/bitmap_functions.md b/docs/ru/sql_reference/functions/bitmap_functions.md similarity index 97% rename from docs/ru/query_language/functions/bitmap_functions.md rename to docs/ru/sql_reference/functions/bitmap_functions.md index 90adee46b15..0a6288498be 100644 --- a/docs/ru/query_language/functions/bitmap_functions.md +++ b/docs/ru/sql_reference/functions/bitmap_functions.md @@ -61,8 +61,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Параметры** - `bitmap` – Битмап. [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Начальная точка подмножества. [UInt32](../../data_types/int_uint.md). -- `cardinality_limit` – Верхний предел подмножества. [UInt32](../../data_types/int_uint.md). +- `range_start` – Начальная точка подмножества. [UInt32](../../sql_reference/functions/bitmap_functions.md). +- `cardinality_limit` – Верхний предел подмножества. [UInt32](../../sql_reference/functions/bitmap_functions.md). **Возвращаемое значение** @@ -97,7 +97,7 @@ bitmapContains(haystack, needle) **Параметры** - `haystack` – [объект Bitmap](#bitmap_functions-bitmapbuild), в котором функция ищет значение. -- `needle` – значение, которое функция ищет. Тип — [UInt32](../../data_types/int_uint.md). +- `needle` – значение, которое функция ищет. Тип — [UInt32](../../sql_reference/functions/bitmap_functions.md). **Возвращаемые значения** diff --git a/docs/ru/query_language/functions/comparison_functions.md b/docs/ru/sql_reference/functions/comparison_functions.md similarity index 100% rename from docs/ru/query_language/functions/comparison_functions.md rename to docs/ru/sql_reference/functions/comparison_functions.md diff --git a/docs/ru/query_language/functions/conditional_functions.md b/docs/ru/sql_reference/functions/conditional_functions.md similarity index 98% rename from docs/ru/query_language/functions/conditional_functions.md rename to docs/ru/sql_reference/functions/conditional_functions.md index f2c97330d20..f3c57aac38b 100644 --- a/docs/ru/query_language/functions/conditional_functions.md +++ b/docs/ru/sql_reference/functions/conditional_functions.md @@ -14,7 +14,7 @@ SELECT if(cond, then, else) **Параметры** -- `cond` – Условие, которое может быть равно 0 или нет. Может быть [UInt8](../../data_types/int_uint.md) или `NULL`. +- `cond` – Условие, которое может быть равно 0 или нет. Может быть [UInt8](../../sql_reference/functions/conditional_functions.md) или `NULL`. - `then` - Возвращается результат выражения, если условие `cond` истинно. - `else` - Возвращается результат выражения, если условие `cond` ложно. diff --git a/docs/ru/query_language/functions/date_time_functions.md b/docs/ru/sql_reference/functions/date_time_functions.md similarity index 97% rename from docs/ru/query_language/functions/date_time_functions.md rename to docs/ru/sql_reference/functions/date_time_functions.md index 51a097fb31b..9f2f5b8afd2 100644 --- a/docs/ru/query_language/functions/date_time_functions.md +++ b/docs/ru/sql_reference/functions/date_time_functions.md @@ -215,9 +215,9 @@ dateDiff('unit', startdate, enddate, [timezone]) |quarter | |year | -- `startdate` — Первая дата. [Date](../../data_types/date.md) или [DateTime](../../data_types/datetime.md). +- `startdate` — Первая дата. [Date](../../sql_reference/functions/date_time_functions.md) или [DateTime](../../sql_reference/functions/date_time_functions.md). -- `enddate` — Вторая дата. [Date](../../data_types/date.md) или [DateTime](../../data_types/datetime.md). +- `enddate` — Вторая дата. [Date](../../sql_reference/functions/date_time_functions.md) или [DateTime](../../sql_reference/functions/date_time_functions.md). - `timezone` — Опциональный параметр. Если определен, применяется к обоим значениям: `startdate` и `enddate`. Если не определен, используются часовые пояса `startdate` и `enddate`. Если часовые пояса не совпадают, вернется неожидаемый результат. diff --git a/docs/ru/query_language/functions/encoding_functions.md b/docs/ru/sql_reference/functions/encoding_functions.md similarity index 91% rename from docs/ru/query_language/functions/encoding_functions.md rename to docs/ru/sql_reference/functions/encoding_functions.md index e6fbeb133c5..b4c4716d9c7 100644 --- a/docs/ru/query_language/functions/encoding_functions.md +++ b/docs/ru/sql_reference/functions/encoding_functions.md @@ -12,7 +12,7 @@ char(number_1, [number_2, ..., number_n]); **Параметры** -- `number_1, number_2, ..., number_n` — Числовые аргументы, которые интерпретируются как целые числа. Типы: [Int](../../data_types/int_uint.md), [Float](../../data_types/float.md). +- `number_1, number_2, ..., number_n` — Числовые аргументы, которые интерпретируются как целые числа. Типы: [Int](../../sql_reference/functions/encoding_functions.md), [Float](../../sql_reference/functions/encoding_functions.md). **Возвращаемое значение** @@ -104,7 +104,7 @@ Values of floating point and Decimal types are encoded as their representation i **Parameters** -- `arg` — A value to convert to hexadecimal. Types: [String](../../data_types/string.md), [UInt](../../data_types/int_uint.md), [Float](../../data_types/float.md), [Decimal](../../data_types/decimal.md), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). +- `arg` — A value to convert to hexadecimal. Types: [String](../../sql_reference/functions/encoding_functions.md), [UInt](../../sql_reference/functions/encoding_functions.md), [Float](../../sql_reference/functions/encoding_functions.md), [Decimal](../../sql_reference/functions/encoding_functions.md), [Date](../../sql_reference/functions/encoding_functions.md) or [DateTime](../../sql_reference/functions/encoding_functions.md). **Returned value** diff --git a/docs/ru/query_language/functions/ext_dict_functions.md b/docs/ru/sql_reference/functions/ext_dict_functions.md similarity index 85% rename from docs/ru/query_language/functions/ext_dict_functions.md rename to docs/ru/sql_reference/functions/ext_dict_functions.md index dd7aff463f5..1a1d383e4bb 100644 --- a/docs/ru/query_language/functions/ext_dict_functions.md +++ b/docs/ru/sql_reference/functions/ext_dict_functions.md @@ -1,6 +1,6 @@ # Функции для работы с внешними словарями {#ext_dict_functions} -Информацию о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../dicts/external_dicts.md). +Информацию о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../../sql_reference/functions/ext_dict_functions.md). ## dictGet {#dictget} @@ -15,12 +15,12 @@ dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). - `attr_name` — имя столбца словаря. [Строковый литерал](../syntax.md#syntax-string-literal). -- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md) или [Tuple](../../data_types/tuple.md) в зависимости от конфигурации словаря. +- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql_reference/functions/ext_dict_functions.md) или [Tuple](../../sql_reference/functions/ext_dict_functions.md) в зависимости от конфигурации словаря. - `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращающее значение с типом данных, сконфигурированным для атрибута `attr_name`. **Возвращаемое значение** -- Значение атрибута, соответствующее ключу `id_expr`, если ClickHouse смог привести это значение к [заданному типу данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes). +- Значение атрибута, соответствующее ключу `id_expr`, если ClickHouse смог привести это значение к [заданному типу данных](../../sql_reference/functions/ext_dict_functions.md#ext_dict_structure-attributes). - Если ключа, соответствующего `id_expr` в словаре нет, то: @@ -90,7 +90,7 @@ LIMIT 3 **Смотрите также** -- [Внешние словари](../dicts/external_dicts.md) +- [Внешние словари](../../sql_reference/functions/ext_dict_functions.md) ## dictHas {#dicthas} @@ -103,7 +103,7 @@ dictHas('dict_name', id) **Параметры** - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). -- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md). +- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql_reference/functions/ext_dict_functions.md). **Возвращаемое значение** @@ -114,7 +114,7 @@ dictHas('dict_name', id) ## dictGetHierarchy {#dictgethierarchy} -Создаёт массив, содержащий цепочку предков для заданного ключа в [иерархическом словаре](../dicts/external_dicts_dict_hierarchical.md). +Создаёт массив, содержащий цепочку предков для заданного ключа в [иерархическом словаре](../dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md). **Синтаксис** @@ -125,13 +125,13 @@ dictGetHierarchy('dict_name', key) **Параметры** - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). -- `key` — значение ключа. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md). +- `key` — значение ключа. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql_reference/functions/ext_dict_functions.md). **Возвращаемое значение** - Цепочка предков заданного ключа. -Type: [Array(UInt64)](../../data_types/array.md). +Type: [Array(UInt64)](../../sql_reference/functions/ext_dict_functions.md). ## dictIsIn {#dictisin} @@ -142,8 +142,8 @@ Type: [Array(UInt64)](../../data_types/array.md). **Параметры** - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). -- `child_id_expr` — ключ для проверки. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md). -- `ancestor_id_expr` — предполагаемый предок ключа `child_id_expr`. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md). +- `child_id_expr` — ключ для проверки. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql_reference/functions/ext_dict_functions.md). +- `ancestor_id_expr` — предполагаемый предок ключа `child_id_expr`. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql_reference/functions/ext_dict_functions.md). **Возвращаемое значение** @@ -179,12 +179,12 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) - `dict_name` — имя словаря. [Строковый литерал](../syntax.md#syntax-string-literal). - `attr_name` — имя столбца словаря. [Строковый литерал](../syntax.md#syntax-string-literal). -- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../data_types/int_uint.md). +- `id_expr` — значение ключа словаря. [Выражение](../syntax.md#syntax-expressions), возвращающее значение типа [UInt64](../../sql_reference/functions/ext_dict_functions.md). - `default_value_expr` — значение, возвращаемое в том случае, когда словарь не содержит строки с заданным ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращающее значение с типом данных, сконфигурированным для атрибута `attr_name`. **Возвращаемое значение** -- Если ClickHouse успешно обработал атрибут в соответствии с [заданным типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), то функции возвращают значение атрибута, соответствующее ключу `id_expr`. +- Если ClickHouse успешно обработал атрибут в соответствии с [заданным типом данных](../../sql_reference/functions/ext_dict_functions.md#ext_dict_structure-attributes), то функции возвращают значение атрибута, соответствующее ключу `id_expr`. - Если запрошенного `id_expr` нет в словаре, то: diff --git a/docs/ru/query_language/functions/functions_for_nulls.md b/docs/ru/sql_reference/functions/functions_for_nulls.md similarity index 97% rename from docs/ru/query_language/functions/functions_for_nulls.md rename to docs/ru/sql_reference/functions/functions_for_nulls.md index 1782a5a0cf2..0d07cbeb62d 100644 --- a/docs/ru/query_language/functions/functions_for_nulls.md +++ b/docs/ru/sql_reference/functions/functions_for_nulls.md @@ -204,7 +204,7 @@ SELECT nullIf(1, 2) ## assumeNotNull {#assumenotnull} -Приводит значение типа [Nullable](../../data_types/nullable.md) к не `Nullable`, если значение не `NULL`. +Приводит значение типа [Nullable](../../sql_reference/functions/functions_for_nulls.md) к не `Nullable`, если значение не `NULL`. ``` sql assumeNotNull(x) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/sql_reference/functions/geo.md similarity index 86% rename from docs/ru/query_language/functions/geo.md rename to docs/ru/sql_reference/functions/geo.md index 07e2e4c1da6..db51ac05166 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/sql_reference/functions/geo.md @@ -107,8 +107,8 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **Входные значения** -- `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../data_types/tuple.md) — кортеж из двух чисел. -- `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../data_types/array.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным. +- `(x, y)` — координаты точки на плоскости. Тип данных — [Tuple](../../sql_reference/functions/geo.md) — кортеж из двух чисел. +- `[(a, b), (c, d) ...]` — вершины многоугольника. Тип данных — [Array](../../sql_reference/functions/geo.md). Каждая вершина представлена парой координат `(a, b)`. Вершины следует указывать в порядке обхода по или против часовой стрелки. Минимальное количество вершин — 3. Многоугольник должен быть константным. - функция поддерживает также многоугольники с дырками (вырезанными кусками). Для этого случая, добавьте многоугольники, описывающие вырезанные куски, дополнительными аргументами функции. Функция не поддерживает не односвязные многоугольники. **Возвращаемые значения** @@ -196,14 +196,14 @@ h3IsValid(h3index) **Входные значения** -- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../data_types/int_uint.md). +- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql_reference/functions/geo.md). **Возвращаемые значения** - 0 — число не является H3-индексом - 1 — число является H3-индексом -Тип — [UInt8](../../data_types/int_uint.md). +Тип — [UInt8](../../sql_reference/functions/geo.md). **Пример** @@ -227,14 +227,14 @@ h3GetResolution(h3index) **Входные значения** -- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../data_types/int_uint.md). +- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql_reference/functions/geo.md). **Возвращаемые значения** - Разрешение сетки, от 0 до 15. - Для несуществующего идентификатора может быть возвращено произвольное значение, используйте [h3IsValid](#h3isvalid) для проверки идентификаторов -Тип — [UInt8](../../data_types/int_uint.md). +Тип — [UInt8](../../sql_reference/functions/geo.md). **Пример** @@ -258,11 +258,11 @@ h3EdgeAngle(resolution) **Входные значения** -- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значений — `[0, 15]`. +- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql_reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`. **Возвращаемые значения** -Средняя длина стороны многоугольника H3 в градусах, тип — [Float64](../../data_types/float.md). +Средняя длина стороны многоугольника H3 в градусах, тип — [Float64](../../sql_reference/functions/geo.md). **Пример** @@ -286,11 +286,11 @@ h3EdgeLengthM(resolution) **Входные значения** -- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значений — `[0, 15]`. +- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql_reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`. **Возвращаемые значения** -Средняя длина стороны многоугольника H3 в метрах, тип — [Float64](../../data_types/float.md). +Средняя длина стороны многоугольника H3 в метрах, тип — [Float64](../../sql_reference/functions/geo.md). **Пример** @@ -320,16 +320,16 @@ geoToH3(lon, lat, resolution) **Параметры** -- `lon` — географическая долгота. Тип данных — [Float64](../../data_types/float.md). -- `lat` — географическая широта. Тип данных — [Float64](../../data_types/float.md). -- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значений — `[0, 15]`. +- `lon` — географическая долгота. Тип данных — [Float64](../../sql_reference/functions/geo.md). +- `lat` — географическая широта. Тип данных — [Float64](../../sql_reference/functions/geo.md). +- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../sql_reference/functions/geo.md). Диапазон возможных значений — `[0, 15]`. **Возвращаемые значения** - Порядковый номер шестиугольника. - 0 в случае ошибки. -Тип — [UInt64](../../data_types/int_uint.md). +Тип — [UInt64](../../sql_reference/functions/geo.md). **Пример** @@ -357,12 +357,12 @@ h3kRing(h3index, k) **Входные значения** -- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../data_types/int_uint.md). -- `k` — радиус. Тип данных — [целое число](../../data_types/int_uint.md) +- `h3index` — идентификатор шестиугольника. Тип данных — [UInt64](../../sql_reference/functions/geo.md). +- `k` — радиус. Тип данных — [целое число](../../sql_reference/functions/geo.md) **Возвращаемые значения** -[Массив](../../data_types/array.md) из H3-индексов типа [UInt64](../../data_types/int_uint.md). +[Массив](../../sql_reference/functions/geo.md) из H3-индексов типа [UInt64](../../sql_reference/functions/geo.md). **Пример** diff --git a/docs/ru/query_language/functions/hash_functions.md b/docs/ru/sql_reference/functions/hash_functions.md similarity index 88% rename from docs/ru/query_language/functions/hash_functions.md rename to docs/ru/sql_reference/functions/hash_functions.md index 32e701cbd23..62b6566f63f 100644 --- a/docs/ru/query_language/functions/hash_functions.md +++ b/docs/ru/sql_reference/functions/hash_functions.md @@ -4,7 +4,7 @@ ## halfMD5 {#hash-functions-halfmd5} -[Интерпретирует](../../query_language/functions/type_conversion_functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш [MD5](https://ru.wikipedia.org/wiki/MD5) для каждой из них. Затем объединяет хэши, берет первые 8 байт хэша результирующей строки и интерпретирует их как значение типа `UInt64` с big-endian порядком байтов. +[Интерпретирует](../../sql_reference/functions/hash_functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш [MD5](https://ru.wikipedia.org/wiki/MD5) для каждой из них. Затем объединяет хэши, берет первые 8 байт хэша результирующей строки и интерпретирует их как значение типа `UInt64` с big-endian порядком байтов. ``` sql halfMD5(par1, ...) @@ -15,11 +15,11 @@ halfMD5(par1, ...) **Параметры** -Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -Значение хэша с типом данных [UInt64](../../data_types/int_uint.md). +Значение хэша с типом данных [UInt64](../../sql_reference/functions/hash_functions.md). **Пример** @@ -49,7 +49,7 @@ sipHash64(par1,...) Это криптографическая хэш-функция. Она работает по крайней мере в три раза быстрее, чем функция [MD5](#hash_functions-md5). -Функция [интерпретирует](../../query_language/functions/type_conversion_functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш MD5 для каждой из них. Затем комбинирует хэши по следующему алгоритму. +Функция [интерпретирует](../../sql_reference/functions/hash_functions.md#type_conversion_functions-reinterpretAsString) все входные параметры как строки и вычисляет хэш MD5 для каждой из них. Затем комбинирует хэши по следующему алгоритму. 1. После хэширования всех входных параметров функция получает массив хэшей. 2. Функция принимает первый и второй элементы и вычисляет хэш для массива из них. @@ -58,11 +58,11 @@ sipHash64(par1,...) **Параметры** -Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -Значение хэша с типом данных [UInt64](../../data_types/int_uint.md). +Значение хэша с типом данных [UInt64](../../sql_reference/functions/hash_functions.md). **Пример** @@ -94,11 +94,11 @@ cityHash64(par1,...) **Параметры** -Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -Значение хэша с типом данных [UInt64](../../data_types/int_uint.md). +Значение хэша с типом данных [UInt64](../../sql_reference/functions/hash_functions.md). **Примеры** @@ -160,11 +160,11 @@ farmHash64(par1, ...) **Параметры** -Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -Значение хэша с типом данных [UInt64](../../data_types/int_uint.md). +Значение хэша с типом данных [UInt64](../../sql_reference/functions/hash_functions.md). **Пример** @@ -288,11 +288,11 @@ metroHash64(par1, ...) **Параметры** -Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Функция принимает переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -Значение хэша с типом данных [UInt64](../../data_types/int_uint.md). +Значение хэша с типом данных [UInt64](../../sql_reference/functions/hash_functions.md). **Пример** @@ -323,12 +323,12 @@ murmurHash2_64(par1, ...) **Параметры** -Обе функции принимают переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Обе функции принимают переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -- Функция `murmurHash2_32` возвращает значение типа [UInt32](../../data_types/int_uint.md). -- Функция `murmurHash2_64` возвращает значение типа [UInt64](../../data_types/int_uint.md). +- Функция `murmurHash2_32` возвращает значение типа [UInt32](../../sql_reference/functions/hash_functions.md). +- Функция `murmurHash2_64` возвращает значение типа [UInt64](../../sql_reference/functions/hash_functions.md). **Пример** @@ -353,12 +353,12 @@ murmurHash3_64(par1, ...) **Параметры** -Обе функции принимают переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../data_types/index.md). +Обе функции принимают переменное число входных параметров. Параметры могут быть любого [поддерживаемого типа данных](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -- Функция `murmurHash3_32` возвращает значение типа [UInt32](../../data_types/int_uint.md). -- Функция `murmurHash3_64` возвращает значение типа [UInt64](../../data_types/int_uint.md). +- Функция `murmurHash3_32` возвращает значение типа [UInt32](../../sql_reference/functions/hash_functions.md). +- Функция `murmurHash3_64` возвращает значение типа [UInt64](../../sql_reference/functions/hash_functions.md). **Пример** @@ -382,11 +382,11 @@ murmurHash3_128( expr ) **Параметры** -- `expr` — [выражение](../syntax.md#syntax-expressions) возвращающее значение типа[String](../../data_types/string.md). +- `expr` — [выражение](../syntax.md#syntax-expressions) возвращающее значение типа[String](../../sql_reference/functions/hash_functions.md). **Возвращаемое значение** -Хэш-значение типа [FixedString(16)](../../data_types/fixedstring.md). +Хэш-значение типа [FixedString(16)](../../sql_reference/functions/hash_functions.md). **Пример** diff --git a/docs/ru/query_language/functions/higher_order_functions.md b/docs/ru/sql_reference/functions/higher_order_functions.md similarity index 100% rename from docs/ru/query_language/functions/higher_order_functions.md rename to docs/ru/sql_reference/functions/higher_order_functions.md diff --git a/docs/ru/query_language/functions/in_functions.md b/docs/ru/sql_reference/functions/in_functions.md similarity index 93% rename from docs/ru/query_language/functions/in_functions.md rename to docs/ru/sql_reference/functions/in_functions.md index 70c8c1bb6c3..a5cdb1dc217 100644 --- a/docs/ru/query_language/functions/in_functions.md +++ b/docs/ru/sql_reference/functions/in_functions.md @@ -2,7 +2,7 @@ ## in, notIn, globalIn, globalNotIn {#in-functions} -Смотрите раздел [Операторы IN](../select.md#select-in-operators). +Смотрите раздел [Операторы IN](../statements/select.md#select-in-operators). ## tuple(x, y, …), оператор (x, y, …) {#tuplex-y-operator-x-y} diff --git a/docs/ru/query_language/functions/index.md b/docs/ru/sql_reference/functions/index.md similarity index 100% rename from docs/ru/query_language/functions/index.md rename to docs/ru/sql_reference/functions/index.md diff --git a/docs/ru/query_language/functions/introspection.md b/docs/ru/sql_reference/functions/introspection.md similarity index 94% rename from docs/ru/query_language/functions/introspection.md rename to docs/ru/sql_reference/functions/introspection.md index 50b4cbb44bf..41ffb114e09 100644 --- a/docs/ru/query_language/functions/introspection.md +++ b/docs/ru/sql_reference/functions/introspection.md @@ -29,7 +29,7 @@ addressToLine(address_of_binary_instruction) **Параметры** -- `address_of_binary_instruction` ([Тип UInt64](../../data_types/int_uint.md))- Адрес инструкции в запущенном процессе. +- `address_of_binary_instruction` ([Тип UInt64](../../sql_reference/functions/introspection.md))- Адрес инструкции в запущенном процессе. **Возвращаемое значение** @@ -41,7 +41,7 @@ addressToLine(address_of_binary_instruction) - Пустая строка, если адрес не является допустимым. -Тип: [String](../../data_types/string.md). +Тип: [String](../../sql_reference/functions/introspection.md). **Пример** @@ -120,14 +120,14 @@ addressToSymbol(address_of_binary_instruction) **Параметры** -- `address_of_binary_instruction` ([Тип uint64](../../data_types/int_uint.md)) — Адрес инструкции в запущенном процессе. +- `address_of_binary_instruction` ([Тип uint64](../../sql_reference/functions/introspection.md)) — Адрес инструкции в запущенном процессе. **Возвращаемое значение** - Символ из объектных файлов ClickHouse. - Пустая строка, если адрес не является допустимым. -Тип: [String](../../data_types/string.md). +Тип: [String](../../sql_reference/functions/introspection.md). **Пример** @@ -217,14 +217,14 @@ demangle(symbol) **Параметры** -- `symbol` ([Строка](../../data_types/string.md)) - Символ из объектного файла. +- `symbol` ([Строка](../../sql_reference/functions/introspection.md)) - Символ из объектного файла. **Возвращаемое значение** - Имя функции C++. - Пустая строка, если символ не является допустимым. -Тип: [Строка](../../data_types/string.md). +Тип: [Строка](../../sql_reference/functions/introspection.md). **Пример** diff --git a/docs/ru/query_language/functions/ip_address_functions.md b/docs/ru/sql_reference/functions/ip_address_functions.md similarity index 95% rename from docs/ru/query_language/functions/ip_address_functions.md rename to docs/ru/sql_reference/functions/ip_address_functions.md index 87c1da4114b..57485f6a812 100644 --- a/docs/ru/query_language/functions/ip_address_functions.md +++ b/docs/ru/sql_reference/functions/ip_address_functions.md @@ -176,7 +176,7 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32) ## toIPv4(string) {#toipv4string} -Псевдоним функции `IPv4StringToNum()` которая принимает строку с адресом IPv4 и возвращает значение типа [IPv4](../../data_types/domains/ipv4.md), которое равно значению, возвращаемому функцией `IPv4StringToNum()`. +Псевдоним функции `IPv4StringToNum()` которая принимает строку с адресом IPv4 и возвращает значение типа [IPv4](../../sql_reference/functions/ip_address_functions.md), которое равно значению, возвращаемому функцией `IPv4StringToNum()`. ``` sql WITH @@ -208,7 +208,7 @@ SELECT ## toIPv6(string) {#toipv6string} -Псевдоним функции `IPv6StringToNum()` которая принимает строку с адресом IPv6 и возвращает значение типа [IPv6](../../data_types/domains/ipv6.md), которое равно значению, возвращаемому функцией `IPv6StringToNum()`. +Псевдоним функции `IPv6StringToNum()` которая принимает строку с адресом IPv6 и возвращает значение типа [IPv6](../../sql_reference/functions/ip_address_functions.md), которое равно значению, возвращаемому функцией `IPv6StringToNum()`. ``` sql WITH diff --git a/docs/ru/query_language/functions/json_functions.md b/docs/ru/sql_reference/functions/json_functions.md similarity index 100% rename from docs/ru/query_language/functions/json_functions.md rename to docs/ru/sql_reference/functions/json_functions.md diff --git a/docs/ru/query_language/functions/logical_functions.md b/docs/ru/sql_reference/functions/logical_functions.md similarity index 100% rename from docs/ru/query_language/functions/logical_functions.md rename to docs/ru/sql_reference/functions/logical_functions.md diff --git a/docs/ru/query_language/functions/machine_learning_functions.md b/docs/ru/sql_reference/functions/machine_learning_functions.md similarity index 51% rename from docs/ru/query_language/functions/machine_learning_functions.md rename to docs/ru/sql_reference/functions/machine_learning_functions.md index 8a51d1dd4c6..c5dd27d96af 100644 --- a/docs/ru/query_language/functions/machine_learning_functions.md +++ b/docs/ru/sql_reference/functions/machine_learning_functions.md @@ -6,8 +6,8 @@ ### Stochastic Linear Regression {#stochastic-linear-regression} -Агрегатная функция [stochasticLinearRegression](../agg_functions/reference.md#agg_functions-stochasticlinearregression) реализует стохастический градиентный спуск, использую линейную модель и функцию потерь MSE. +Агрегатная функция [stochasticLinearRegression](../../sql_reference/functions/machine_learning_functions.md#agg_functions-stochasticlinearregression) реализует стохастический градиентный спуск, использую линейную модель и функцию потерь MSE. ### Stochastic Logistic Regression {#stochastic-logistic-regression} -Агрегатная функция [stochasticLogisticRegression](../agg_functions/reference.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации. +Агрегатная функция [stochasticLogisticRegression](../../sql_reference/functions/machine_learning_functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации. diff --git a/docs/ru/query_language/functions/math_functions.md b/docs/ru/sql_reference/functions/math_functions.md similarity index 100% rename from docs/ru/query_language/functions/math_functions.md rename to docs/ru/sql_reference/functions/math_functions.md diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/sql_reference/functions/other_functions.md similarity index 97% rename from docs/ru/query_language/functions/other_functions.md rename to docs/ru/sql_reference/functions/other_functions.md index e4f7440b7cd..a0568cb262e 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/sql_reference/functions/other_functions.md @@ -48,7 +48,7 @@ basename( expr ) **Параметры** -- `expr` — Выражение, возвращающее значение типа [String](../../data_types/string.md). В результирующем значении все бэкслэши должны быть экранированы. +- `expr` — Выражение, возвращающее значение типа [String](../../sql_reference/functions/other_functions.md). В результирующем значении все бэкслэши должны быть экранированы. **Возвращаемое значение** @@ -186,8 +186,8 @@ SELECT currentUser(); **Параметры** -- `x` — Значение, которое нужно проверить на бесконечность. Тип: [Float\*](../../data_types/float.md). -- `y` — Запасное значение. Тип: [Float\*](../../data_types/float.md). +- `x` — Значение, которое нужно проверить на бесконечность. Тип: [Float\*](../../sql_reference/functions/other_functions.md). +- `y` — Запасное значение. Тип: [Float\*](../../sql_reference/functions/other_functions.md). **Возвращаемые значения** @@ -420,7 +420,7 @@ neighbor(column, offset[, default_value]) **Параметры** - `column` — Имя столбца или скалярное выражение. -- `offset` - Смещение от текущей строки `column`. [Int64](../../data_types/int_uint.md). +- `offset` - Смещение от текущей строки `column`. [Int64](../../sql_reference/functions/other_functions.md). - `default_value` - Опциональный параметр. Значение, которое будет возвращено, если смещение выходит за пределы блока данных. **Возвращаемое значение** @@ -603,7 +603,7 @@ WHERE diff != 1 ## getSizeOfEnumType {#getsizeofenumtype} -Возвращает количество полей в [Enum](../../data_types/enum.md). +Возвращает количество полей в [Enum](../../sql_reference/functions/other_functions.md). ``` sql getSizeOfEnumType(value) @@ -716,7 +716,7 @@ defaultValueOfArgumentType(expression) - `0` для чисел; - Пустая строка для строк; -- `ᴺᵁᴸᴸ` для [Nullable](../../data_types/nullable.md). +- `ᴺᵁᴸᴸ` для [Nullable](../../sql_reference/functions/other_functions.md). **Пример** @@ -791,7 +791,7 @@ filesystemAvailable() - Объём доступного для записи данных места в байтах. -Тип: [UInt64](../../data_types/int_uint.md). +Тип: [UInt64](../../sql_reference/functions/other_functions.md). **Пример** @@ -823,7 +823,7 @@ filesystemFree() - Объем свободного места в байтах. -Тип: [UInt64](../../data_types/int_uint.md). +Тип: [UInt64](../../sql_reference/functions/other_functions.md). **Пример** @@ -843,7 +843,7 @@ SELECT formatReadableSize(filesystemFree()) AS "Free space", toTypeName(filesyst ## filesystemCapacity {#filesystemcapacity} -Возвращает информацию о ёмкости файловой системы в байтах. Для оценки должен быть настроен [путь](../../operations/server_settings/settings.md#server_settings-path) к каталогу с данными. +Возвращает информацию о ёмкости файловой системы в байтах. Для оценки должен быть настроен [путь](../../sql_reference/functions/other_functions.md#server_configuration_parameters-path) к каталогу с данными. **Синтаксис** @@ -855,7 +855,7 @@ filesystemCapacity() - Информация о ёмкости файловой системы в байтах. -Тип: [UInt64](../../data_types/int_uint.md). +Тип: [UInt64](../../sql_reference/functions/other_functions.md). **Пример** @@ -883,9 +883,9 @@ SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesy ## joinGet {#joinget} -Функция позволяет извлекать данные из таблицы таким же образом как из [словаря](../../query_language/dicts/index.md). +Функция позволяет извлекать данные из таблицы таким же образом как из [словаря](../../sql_reference/functions/other_functions.md). -Получает данные из таблиц [Join](../../operations/table_engines/join.md#creating-a-table) по ключу. +Получает данные из таблиц [Join](../../sql_reference/functions/other_functions.md#creating-a-table) по ключу. Поддерживаются только таблицы, созданные с `ENGINE = Join(ANY, LEFT, )`. @@ -907,7 +907,7 @@ joinGet(join_storage_table_name, `value_column`, join_keys) Если значения не существует в исходной таблице, вернется `0` или `null` в соответствии с настройками [join\_use\_nulls](../../operations/settings/settings.md#join_use_nulls). -Подробнее о настройке `join_use_nulls` в [операциях Join](../../operations/table_engines/join.md). +Подробнее о настройке `join_use_nulls` в [операциях Join](../../sql_reference/functions/other_functions.md). **Пример** @@ -1010,7 +1010,7 @@ randomPrintableASCII(length) - Строка со случайным набором печатных символов [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters). -Тип: [String](../../data_types/string.md) +Тип: [String](../../sql_reference/functions/other_functions.md) **Пример** diff --git a/docs/ru/query_language/functions/random_functions.md b/docs/ru/sql_reference/functions/random_functions.md similarity index 100% rename from docs/ru/query_language/functions/random_functions.md rename to docs/ru/sql_reference/functions/random_functions.md diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/sql_reference/functions/rounding_functions.md similarity index 98% rename from docs/ru/query_language/functions/rounding_functions.md rename to docs/ru/sql_reference/functions/rounding_functions.md index 9c5f73815b5..9e7947c109d 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/sql_reference/functions/rounding_functions.md @@ -30,7 +30,7 @@ round(expression [, decimal_places]) **Параметры:** -- `expression` — Число для округления. Может быть любым [выражением](../syntax.md#syntax-expressions), возвращающим числовой [тип данных](../../data_types/index.md#data_types). +- `expression` — Число для округления. Может быть любым [выражением](../syntax.md#syntax-expressions), возвращающим числовой [тип данных](../../sql_reference/functions/rounding_functions.md#data_types). - `decimal-places` — Целое значение. - Если `decimal-places > 0`, то функция округляет значение справа от запятой. - Если `decimal-places < 0` то функция округляет значение слева от запятой. @@ -109,7 +109,7 @@ roundBankers(expression [, decimal_places]) **Параметры** -- `expression` — Число для округления. Может быть любым [выражением](../syntax.md#syntax-expressions), возвращающим числовой [тип данных](../../data_types/index.md#data_types). +- `expression` — Число для округления. Может быть любым [выражением](../syntax.md#syntax-expressions), возвращающим числовой [тип данных](../../sql_reference/functions/rounding_functions.md#data_types). - `decimal-places` — Десятичный разряд. Целое число. - `decimal-places > 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции справа от запятой. Например, `roundBankers(3.55, 1) = 3.6`. - `decimal-places < 0` — Функция округляет значение выражения до ближайшего чётного числа на соответствующей позиции слева от запятой. Например, `roundBankers(24.55, -1) = 20`. diff --git a/docs/ru/query_language/functions/splitting_merging_functions.md b/docs/ru/sql_reference/functions/splitting_merging_functions.md similarity index 100% rename from docs/ru/query_language/functions/splitting_merging_functions.md rename to docs/ru/sql_reference/functions/splitting_merging_functions.md diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/sql_reference/functions/string_functions.md similarity index 97% rename from docs/ru/query_language/functions/string_functions.md rename to docs/ru/sql_reference/functions/string_functions.md index ef2793d2c18..5c51ad7c73c 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/sql_reference/functions/string_functions.md @@ -70,7 +70,7 @@ toValidUTF8( input_string ) Параметры: -- input\_string — произвольный набор байтов, представленный как объект типа [String](../../data_types/string.md). +- input\_string — произвольный набор байтов, представленный как объект типа [String](../../sql_reference/functions/string_functions.md). Возвращаемое значение: Корректная строка UTF-8. @@ -98,8 +98,8 @@ repeat(s, n) **Параметры** -- `s` — Строка для повторения. [String](../../data_types/string.md). -- `n` — Количество повторов. [UInt](../../data_types/int_uint.md). +- `s` — Строка для повторения. [String](../../sql_reference/functions/string_functions.md). +- `n` — Количество повторов. [UInt](../../sql_reference/functions/string_functions.md). **Возвращаемое значение** @@ -322,8 +322,8 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Параметры** -- `trim_character` — один или несколько символов, подлежащие удалению. [String](../../data_types/string.md). -- `input_string` — строка для обрезки. [String](../../data_types/string.md). +- `trim_character` — один или несколько символов, подлежащие удалению. [String](../../sql_reference/functions/string_functions.md). +- `input_string` — строка для обрезки. [String](../../sql_reference/functions/string_functions.md). **Возвращаемое значение** @@ -361,7 +361,7 @@ trimLeft(input_string) **Параметры** -- `input_string` — строка для обрезки. [String](../../data_types/string.md). +- `input_string` — строка для обрезки. [String](../../sql_reference/functions/string_functions.md). **Возвращаемое значение** @@ -399,7 +399,7 @@ trimRight(input_string) **Параметры** -- `input_string` — строка для обрезки. [String](../../data_types/string.md). +- `input_string` — строка для обрезки. [String](../../sql_reference/functions/string_functions.md). **Возвращаемое значение** @@ -437,7 +437,7 @@ trimBoth(input_string) **Параметры** -- `input_string` — строка для обрезки. [String](../../data_types/string.md). +- `input_string` — строка для обрезки. [String](../../sql_reference/functions/string_functions.md). **Возвращаемое значение** diff --git a/docs/ru/query_language/functions/string_replace_functions.md b/docs/ru/sql_reference/functions/string_replace_functions.md similarity index 100% rename from docs/ru/query_language/functions/string_replace_functions.md rename to docs/ru/sql_reference/functions/string_replace_functions.md diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/sql_reference/functions/string_search_functions.md similarity index 100% rename from docs/ru/query_language/functions/string_search_functions.md rename to docs/ru/sql_reference/functions/string_search_functions.md diff --git a/docs/ru/query_language/functions/type_conversion_functions.md b/docs/ru/sql_reference/functions/type_conversion_functions.md similarity index 90% rename from docs/ru/query_language/functions/type_conversion_functions.md rename to docs/ru/sql_reference/functions/type_conversion_functions.md index 00582cd61cb..be4de7e7c89 100644 --- a/docs/ru/query_language/functions/type_conversion_functions.md +++ b/docs/ru/sql_reference/functions/type_conversion_functions.md @@ -8,7 +8,7 @@ ## toInt(8\|16\|32\|64) {#toint8163264} -Преобразует входное значение к типу [Int](../../data_types/int_uint.md). Семейство функций включает: +Преобразует входное значение к типу [Int](../../sql_reference/functions/type_conversion_functions.md). Семейство функций включает: - `toInt8(expr)` — возвращает значение типа `Int8`. - `toInt16(expr)` — возвращает значение типа `Int16`. @@ -25,7 +25,7 @@ Функции используют [округление к нулю](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), т.е. обрезают дробную часть числа. -Поведение функций для аргументов [NaN и Inf](../../data_types/float.md#data_type-float-nan-inf) не определено. При использовании функций помните о возможных проблемах при [преобразовании чисел](#numeric-conversion-issues). +Поведение функций для аргументов [NaN и Inf](../../sql_reference/functions/type_conversion_functions.md#data_type-float-nan-inf) не определено. При использовании функций помните о возможных проблемах при [преобразовании чисел](#numeric-conversion-issues). **Пример** @@ -73,7 +73,7 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123') ## toUInt(8\|16\|32\|64) {#touint8163264} -Преобраует входное значение к типу [UInt](../../data_types/int_uint.md). Семейство функций включает: +Преобраует входное значение к типу [UInt](../../sql_reference/functions/type_conversion_functions.md). Семейство функций включает: - `toUInt8(expr)` — возвращает значение типа `UInt8`. - `toUInt16(expr)` — возвращает значение типа `UInt16`. @@ -90,7 +90,7 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123') Функции используют [округление к нулю](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), т.е. обрезают дробную часть числа. -Поведение функций для аргументов [NaN и Inf](../../data_types/float.md#data_type-float-nan-inf) не определено. Если передать строку, содержащую отрицательное число, например `'-32'`, ClickHouse генерирует исключение. При использовании функций помните о возможных проблемах при [преобразовании чисел](#numeric-conversion-issues). +Поведение функций для аргументов [NaN и Inf](../../sql_reference/functions/type_conversion_functions.md#data_type-float-nan-inf) не определено. Если передать строку, содержащую отрицательное число, например `'-32'`, ClickHouse генерирует исключение. При использовании функций помните о возможных проблемах при [преобразовании чисел](#numeric-conversion-issues). **Пример** @@ -128,7 +128,7 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) ## toDecimal(32\|64\|128) {#todecimal3264128} -Преобразует `value` к типу данных [Decimal](../../data_types/decimal.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков. +Преобразует `value` к типу данных [Decimal](../../sql_reference/functions/type_conversion_functions.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков. - `toDecimal32(value, S)` - `toDecimal64(value, S)` @@ -136,7 +136,7 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) ## toDecimal(32\|64\|128)OrNull {#todecimal3264128ornull} -Преобразует входную строку в значение с типом данных [Nullable (Decimal (P, S))](../../data_types/decimal.md). Семейство функций включает в себя: +Преобразует входную строку в значение с типом данных [Nullable (Decimal (P, S))](../../sql_reference/functions/type_conversion_functions.md). Семейство функций включает в себя: - `toDecimal32OrNull(expr, S)` — Возвращает значение типа `Nullable(Decimal32(S))`. - `toDecimal64OrNull(expr, S)` — Возвращает значение типа `Nullable(Decimal64(S))`. @@ -146,7 +146,7 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) **Параметры** -- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../data_types/string.md). ClickHouse ожидает текстовое представление десятичного числа. Например, `'1.111'`. +- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql_reference/functions/type_conversion_functions.md). ClickHouse ожидает текстовое представление десятичного числа. Например, `'1.111'`. - `S` — количество десятичных знаков в результирующем значении. **Возвращаемое значение** @@ -180,7 +180,7 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val) ## toDecimal(32\|64\|128)OrZero {#todecimal3264128orzero} -Преобразует тип входного значения в [Decimal (P, S)](../../data_types/decimal.md). Семейство функций включает в себя: +Преобразует тип входного значения в [Decimal (P, S)](../../sql_reference/functions/type_conversion_functions.md). Семейство функций включает в себя: - `toDecimal32OrZero( expr, S)` — возвращает значение типа `Decimal32(S)`. - `toDecimal64OrZero( expr, S)` — возвращает значение типа `Decimal64(S)`. @@ -190,7 +190,7 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val) **Параметры** -- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../data_types/string.md). ClickHouse ожидает текстовое представление десятичного числа. Например, `'1.111'`. +- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql_reference/functions/type_conversion_functions.md). ClickHouse ожидает текстовое представление десятичного числа. Например, `'1.111'`. - `S` — количество десятичных знаков в результирующем значении. **Возвращаемое значение** @@ -332,7 +332,7 @@ SELECT Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N). -Поддержано преобразование к типу [Nullable](../../data_types/nullable.md) и обратно. Пример: +Поддержано преобразование к типу [Nullable](../../sql_reference/functions/type_conversion_functions.md) и обратно. Пример: ``` sql SELECT toTypeName(x) FROM t_null @@ -358,7 +358,7 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval} -Приводит аргумент из числового типа данных к типу данных [IntervalType](../../data_types/special_data_types/interval.md). +Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql_reference/functions/type_conversion_functions.md). **Синтаксис** @@ -401,7 +401,7 @@ SELECT ## parseDateTimeBestEffort {#parsedatetimebesteffort} -Преобразует дату и время в [строковом](../../data_types/string.md) представлении к типу данных [DateTime](../../data_types/datetime.md#data_type-datetime). +Преобразует дату и время в [строковом](../../sql_reference/functions/type_conversion_functions.md) представлении к типу данных [DateTime](../../sql_reference/functions/type_conversion_functions.md#data_type-datetime). Функция распознаёт форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), формат даты времени ClickHouse's а также некоторые другие форматы. @@ -413,8 +413,8 @@ parseDateTimeBestEffort(time_string[, time_zone]); **Параметры** -- `time_string` — строка, содержащая дату и время для преобразования. [String](../../data_types/string.md). -- `time_zone` — часовой пояс. Функция анализирует `time_string` в соответствии с заданным часовым поясом. [String](../../data_types/string.md). +- `time_string` — строка, содержащая дату и время для преобразования. [String](../../sql_reference/functions/type_conversion_functions.md). +- `time_zone` — часовой пояс. Функция анализирует `time_string` в соответствии с заданным часовым поясом. [String](../../sql_reference/functions/type_conversion_functions.md). **Поддерживаемые нестандартные форматы** diff --git a/docs/ru/query_language/functions/url_functions.md b/docs/ru/sql_reference/functions/url_functions.md similarity index 98% rename from docs/ru/query_language/functions/url_functions.md rename to docs/ru/sql_reference/functions/url_functions.md index 6dc62b9c193..f024cfdcfd9 100644 --- a/docs/ru/query_language/functions/url_functions.md +++ b/docs/ru/sql_reference/functions/url_functions.md @@ -20,7 +20,7 @@ domain(url) **Параметры** -- `url` — URL. Тип — [String](../../data_types/string.md). +- `url` — URL. Тип — [String](../../sql_reference/functions/url_functions.md). URL может быть указан со схемой или без неё. Примеры: @@ -71,7 +71,7 @@ topLevelDomain(url) **Параметры** -- `url` — URL. Тип — [String](../../data_types/string.md). +- `url` — URL. Тип — [String](../../sql_reference/functions/url_functions.md). URL может быть указан со схемой или без неё. Примеры: diff --git a/docs/ru/query_language/functions/uuid_functions.md b/docs/ru/sql_reference/functions/uuid_functions.md similarity index 92% rename from docs/ru/query_language/functions/uuid_functions.md rename to docs/ru/sql_reference/functions/uuid_functions.md index 1f52ba27e31..960e2b10e80 100644 --- a/docs/ru/query_language/functions/uuid_functions.md +++ b/docs/ru/sql_reference/functions/uuid_functions.md @@ -10,7 +10,7 @@ generateUUIDv4() **Возвращаемое значение** -Значение типа [UUID](../../data_types/uuid.md). +Значение типа [UUID](../../sql_reference/functions/uuid_functions.md). **Пример использования** @@ -56,7 +56,7 @@ SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid ## UUIDStringToNum {#uuidstringtonum} -Принимает строку, содержащую 36 символов в формате `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, и возвращает в виде набора байт в [FixedString(16)](../../data_types/fixedstring.md). +Принимает строку, содержащую 36 символов в формате `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, и возвращает в виде набора байт в [FixedString(16)](../../sql_reference/functions/uuid_functions.md). ``` sql UUIDStringToNum(String) @@ -82,7 +82,7 @@ SELECT ## UUIDNumToString {#uuidnumtostring} -Принимает значение типа [FixedString(16)](../../data_types/fixedstring.md). Возвращает строку из 36 символов в текстовом виде. +Принимает значение типа [FixedString(16)](../../sql_reference/functions/uuid_functions.md). Возвращает строку из 36 символов в текстовом виде. ``` sql UUIDNumToString(FixedString(16)) diff --git a/docs/ru/query_language/functions/ym_dict_functions.md b/docs/ru/sql_reference/functions/ym_dict_functions.md similarity index 98% rename from docs/ru/query_language/functions/ym_dict_functions.md rename to docs/ru/sql_reference/functions/ym_dict_functions.md index d5e11658a4f..5d7aece88b9 100644 --- a/docs/ru/query_language/functions/ym_dict_functions.md +++ b/docs/ru/sql_reference/functions/ym_dict_functions.md @@ -113,8 +113,8 @@ regionToTopContinent(id[, geobase]); **Параметры** -- `id` — Идентификатор региона из геобазы Яндекса. [UInt32](../../data_types/int_uint.md). -- `geobase` — Ключ словаря. Смотрите [Множественные геобазы](#multiple-geobases). [String](../../data_types/string.md). Опциональный параметр. +- `id` — Идентификатор региона из геобазы Яндекса. [UInt32](../../sql_reference/functions/ym_dict_functions.md). +- `geobase` — Ключ словаря. Смотрите [Множественные геобазы](#multiple-geobases). [String](../../sql_reference/functions/ym_dict_functions.md). Опциональный параметр. **Возвращаемое значение** diff --git a/docs/ru/sql_reference/index.md b/docs/ru/sql_reference/index.md new file mode 100644 index 00000000000..ca35c14a547 --- /dev/null +++ b/docs/ru/sql_reference/index.md @@ -0,0 +1,9 @@ +# Справка по SQL {#spravka-po-sql} + +- [SELECT](statements/select.md) +- [INSERT INTO](statements/insert_into.md) +- [CREATE](statements/create.md) +- [ALTER](statements/alter.md) +- [Прочие виды запросов](statements/misc.md) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/) diff --git a/docs/ru/query_language/operators.md b/docs/ru/sql_reference/operators.md similarity index 89% rename from docs/ru/query_language/operators.md rename to docs/ru/sql_reference/operators.md index 670990b0967..2cff1ad6aed 100644 --- a/docs/ru/query_language/operators.md +++ b/docs/ru/sql_reference/operators.md @@ -55,7 +55,7 @@ ## Операторы для работы с множествами {#operatory-dlia-raboty-s-mnozhestvami} -*Смотрите раздел [Операторы IN](select.md#select-in-operators).* +*Смотрите раздел [Операторы IN](../sql_reference/statements/select.md#select-in-operators).* `a IN ...` - функция `in(a, b)` @@ -86,7 +86,7 @@ EXTRACT(part FROM date); Эти значения могут быть указаны также в нижнем регистре (`day`, `month`). -В параметре `date` указывается исходная дата. Поддерживаются типы [Date](../data_types/date.md) и [DateTime](../data_types/datetime.md). +В параметре `date` указывается исходная дата. Поддерживаются типы [Date](../sql_reference/data_types/date.md) и [DateTime](../sql_reference/data_types/datetime.md). Примеры: @@ -133,7 +133,7 @@ FROM test.Orders; ### INTERVAL {#operator-interval} -Создаёт значение типа [Interval](../data_types/special_data_types/interval.md) которое должно использоваться в арифметических операциях со значениями типов [Date](../data_types/date.md) и [DateTime](../data_types/datetime.md). +Создаёт значение типа [Interval](../sql_reference/operators.md) которое должно использоваться в арифметических операциях со значениями типов [Date](../sql_reference/operators.md) и [DateTime](../sql_reference/operators.md). Типы интервалов: - `SECOND` @@ -162,8 +162,8 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL **Смотрите также** -- Тип данных [Interval](../data_types/special_data_types/interval.md) -- Функции преобразования типов [toInterval](functions/type_conversion_functions.md#function-tointerval) +- Тип данных [Interval](../sql_reference/operators.md) +- Функции преобразования типов [toInterval](../sql_reference/operators.md#function-tointerval) ## Оператор логического отрицания {#operator-logicheskogo-otritsaniia} @@ -183,7 +183,7 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL Примечание: -Условный оператор сначала вычисляет значения b и c, затем проверяет выполнение условия a, и только после этого возвращает соответствующее значение. Если в качестве b или с выступает функция [arrayJoin()](functions/array_join.md#functions_arrayjoin), то размножение каждой строки произойдет вне зависимости от условия а. +Условный оператор сначала вычисляет значения b и c, затем проверяет выполнение условия a, и только после этого возвращает соответствующее значение. Если в качестве b или с выступает функция [arrayJoin()](../sql_reference/operators.md#functions_arrayjoin), то размножение каждой строки произойдет вне зависимости от условия а. ## Условное выражение {#operator_case} @@ -232,7 +232,7 @@ ClickHouse поддерживает операторы `IS NULL` и `IS NOT NULL ### IS NULL {#operator-is-null} -- Для значений типа [Nullable](../data_types/nullable.md) оператор `IS NULL` возвращает: +- Для значений типа [Nullable](../sql_reference/operators.md) оператор `IS NULL` возвращает: - `1`, если значение — `NULL`. - `0` в обратном случае. - Для прочих значений оператор `IS NULL` всегда возвращает `0`. @@ -251,7 +251,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL ### IS NOT NULL {#is-not-null} -- Для значений типа [Nullable](../data_types/nullable.md) оператор `IS NOT NULL` возвращает: +- Для значений типа [Nullable](../sql_reference/operators.md) оператор `IS NOT NULL` возвращает: - `0`, если значение — `NULL`. - `1`, в обратном случае. - Для прочих значений оператор `IS NOT NULL` всегда возвращает `1`. diff --git a/docs/ru/query_language/alter.md b/docs/ru/sql_reference/statements/alter.md similarity index 93% rename from docs/ru/query_language/alter.md rename to docs/ru/sql_reference/statements/alter.md index 401d7e3bcbc..bc06fe074d2 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/sql_reference/statements/alter.md @@ -33,7 +33,7 @@ ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] Если указано `IF NOT EXISTS`, запрос не будет возвращать ошибку, если столбец уже существует. Если указано `AFTER name_after` (имя другого столбца), то столбец добавляется (в список столбцов таблицы) после указанного. Иначе, столбец добавляется в конец таблицы. Обратите внимание, ClickHouse не позволяет добавлять столбцы в начало таблицы. Для цепочки действий, `name_after` может быть именем столбца, который добавляется в одном из предыдущих действий. -Добавление столбца всего лишь меняет структуру таблицы, и не производит никаких действий с данными - соответствующие данные не появляются на диске после ALTER-а. При чтении из таблицы, если для какого-либо столбца отсутствуют данные, то он заполняется значениями по умолчанию (выполняя выражение по умолчанию, если такое есть, или нулями, пустыми строками). Также, столбец появляется на диске при слиянии кусков данных (см. [MergeTree](../operations/table_engines/mergetree.md)). +Добавление столбца всего лишь меняет структуру таблицы, и не производит никаких действий с данными - соответствующие данные не появляются на диске после ALTER-а. При чтении из таблицы, если для какого-либо столбца отсутствуют данные, то он заполняется значениями по умолчанию (выполняя выражение по умолчанию, если такое есть, или нулями, пустыми строками). Также, столбец появляется на диске при слиянии кусков данных (см. [MergeTree](../../sql_reference/statements/alter.md)). Такая схема позволяет добиться мгновенной работы запроса `ALTER` и отсутствия необходимости увеличивать объём старых данных. @@ -107,11 +107,11 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] - TTL - Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../operations/table_engines/mergetree.md#mergetree-column-ttl). + Примеры изменения TTL столбца смотрите в разделе [TTL столбца](../../sql_reference/statements/alter.md#mergetree-column-ttl). Если указано `IF EXISTS`, запрос не возвращает ошибку, если столбца не существует. -При изменении типа, значения преобразуются так, как если бы к ним была применена функция [toType](functions/type_conversion_functions.md). Если изменяется только выражение для умолчания, запрос не делает никакой сложной работы и выполняется мгновенно. +При изменении типа, значения преобразуются так, как если бы к ним была применена функция [toType](../../sql_reference/statements/alter.md). Если изменяется только выражение для умолчания, запрос не делает никакой сложной работы и выполняется мгновенно. Пример запроса: @@ -139,11 +139,11 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`). -Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](insert_into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../operations/utils/clickhouse-copier.md). +Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](insert_into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../sql_reference/statements/alter.md). Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть, если на момент запроса `ALTER`, выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время, все новые запросы к той же таблице, будут ждать, пока завершится этот `ALTER`. -Для таблиц, которые не хранят данные самостоятельно (типа [Merge](../operations/table_engines/merge.md) и [Distributed](../operations/table_engines/distributed.md)), `ALTER` всего лишь меняет структуру таблицы, но не меняет структуру подчинённых таблиц. Для примера, при ALTER-е таблицы типа `Distributed`, вам также потребуется выполнить запрос `ALTER` для таблиц на всех удалённых серверах. +Для таблиц, которые не хранят данные самостоятельно (типа [Merge](../../sql_reference/statements/alter.md) и [Distributed](../../sql_reference/statements/alter.md)), `ALTER` всего лишь меняет структуру таблицы, но не меняет структуру подчинённых таблиц. Для примера, при ALTER-е таблицы типа `Distributed`, вам также потребуется выполнить запрос `ALTER` для таблиц на всех удалённых серверах. ### Манипуляции с ключевыми выражениями таблиц {#manipuliatsii-s-kliuchevymi-vyrazheniiami-tablits} @@ -153,8 +153,8 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) MODIFY ORDER BY new_expression ``` -Работает только для таблиц семейства [`MergeTree`](../operations/table_engines/mergetree.md) (в том числе [реплицированных](../operations/table_engines/replication.md)). После выполнения запроса -[ключ сортировки](../operations/table_engines/mergetree.md) таблицы +Работает только для таблиц семейства [`MergeTree`](../../sql_reference/statements/alter.md) (в том числе [реплицированных](../../sql_reference/statements/alter.md)). После выполнения запроса +[ключ сортировки](../../sql_reference/statements/alter.md) таблицы заменяется на `new_expression` (выражение или кортеж выражений). Первичный ключ при этом остаётся прежним. Операция затрагивает только метаданные. Чтобы сохранить свойство упорядоченности кусков данных по ключу @@ -197,7 +197,7 @@ ALTER TABLE [db].name DROP CONSTRAINT constraint_name; ### Манипуляции с партициями и кусками {#alter_manipulations-with-partitions} -Для работы с [партициями](../operations/table_engines/custom_partitioning_key.md) доступны следующие операции: +Для работы с [партициями](../../sql_reference/statements/alter.md) доступны следующие операции: - [DETACH PARTITION](#alter_detach-partition) – перенести партицию в директорию `detached`; - [DROP PARTITION](#alter_drop-partition) – удалить партицию; @@ -229,7 +229,7 @@ ALTER TABLE visits DETACH PARTITION 201901 После того как запрос будет выполнен, вы сможете производить любые операции с данными в директории `detached`. Например, можно удалить их из файловой системы. -Запрос реплицируется — данные будут перенесены в директорию `detached` и забыты на всех репликах. Обратите внимание, запрос может быть отправлен только на реплику-лидер. Чтобы узнать, является ли реплика лидером, выполните запрос `SELECT` к системной таблице [system.replicas](../operations/system_tables.md#system_tables-replicas). Либо можно выполнить запрос `DETACH` на всех репликах — тогда на всех репликах, кроме реплики-лидера, запрос вернет ошибку. +Запрос реплицируется — данные будут перенесены в директорию `detached` и забыты на всех репликах. Обратите внимание, запрос может быть отправлен только на реплику-лидер. Чтобы узнать, является ли реплика лидером, выполните запрос `SELECT` к системной таблице [system.replicas](../../operations/system_tables.md#system_tables-replicas). Либо можно выполнить запрос `DETACH` на всех репликах — тогда на всех репликах, кроме реплики-лидера, запрос вернет ошибку. #### DROP PARTITION {#alter_drop-partition} @@ -355,7 +355,7 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr] - `N` — инкрементальный номер резервной копии. !!! note "Примечание" - При использовании [нескольких дисков для хранения данных таблицы](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. + При использовании [нескольких дисков для хранения данных таблицы](../../sql_reference/statements/alter.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. Структура директорий внутри резервной копии такая же, как внутри `/var/lib/clickhouse/`. Запрос выполнит ‘chmod’ для всех файлов, запрещая запись в них. @@ -373,7 +373,7 @@ ALTER TABLE table_name FREEZE [PARTITION partition_expr] Восстановление данных из резервной копии не требует остановки сервера. -Подробнее о резервном копировании и восстановлении данных читайте в разделе [Резервное копирование данных](../operations/backup.md). +Подробнее о резервном копировании и восстановлении данных читайте в разделе [Резервное копирование данных](../../operations/backup.md). #### FETCH PARTITION {#alter_fetch-partition} @@ -406,7 +406,7 @@ ALTER TABLE users ATTACH PARTITION 201902; #### MOVE PARTITION\|PART {#alter_move-partition} -Перемещает партицию или кусок данных на другой том или диск для таблиц с движком `MergeTree`. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes). +Перемещает партицию или кусок данных на другой том или диск для таблиц с движком `MergeTree`. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](../../sql_reference/statements/alter.md#table_engine-mergetree-multiple-volumes). ``` sql ALTER TABLE table_name MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name' @@ -429,10 +429,10 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать: -- Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../operations/system_tables.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`. +- Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../operations/system_tables.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`. - Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. - Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached\_parts](../operations/system_tables.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. +- Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached\_parts](../../operations/system_tables.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Использование кавычек в имени партиций зависит от типа данных столбца, по которому задано партиционирование. Например, для столбца с типом `String` имя партиции необходимо указывать в кавычках (одинарных). Для типов `Date` и `Int*` кавычки указывать не нужно. @@ -448,7 +448,7 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; ### Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy} -Вы можете изменить [TTL для таблицы](../operations/table_engines/mergetree.md#mergetree-table-ttl) запросом следующего вида: +Вы можете изменить [TTL для таблицы](../../sql_reference/statements/alter.md#mergetree-table-ttl) запросом следующего вида: ``` sql ALTER TABLE table-name MODIFY TTL ttl-expression @@ -493,7 +493,7 @@ ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name Мутации линейно упорядочены между собой и накладываются на каждый кусок в порядке добавления. Мутации также упорядочены со вставками - гарантируется, что данные, вставленные в таблицу до начала выполнения запроса мутации, будут изменены, а данные, вставленные после окончания запроса мутации, изменены не будут. При этом мутации никак не блокируют вставки. -Запрос завершается немедленно после добавления информации о мутации (для реплицированных таблиц - в ZooKeeper, для нереплицированных - на файловую систему). Сама мутация выполняется асинхронно, используя настройки системного профиля. Следить за ходом её выполнения можно по таблице [`system.mutations`](../operations/system_tables.md#system_tables-mutations). Добавленные мутации будут выполняться до конца даже в случае перезапуска серверов ClickHouse. Откатить мутацию после её добавления нельзя, но если мутация по какой-то причине не может выполниться до конца, её можно остановить с помощью запроса [`KILL MUTATION`](misc.md#kill-mutation). +Запрос завершается немедленно после добавления информации о мутации (для реплицированных таблиц - в ZooKeeper, для нереплицированных - на файловую систему). Сама мутация выполняется асинхронно, используя настройки системного профиля. Следить за ходом её выполнения можно по таблице [`system.mutations`](../../operations/system_tables.md#system_tables-mutations). Добавленные мутации будут выполняться до конца даже в случае перезапуска серверов ClickHouse. Откатить мутацию после её добавления нельзя, но если мутация по какой-то причине не может выполниться до конца, её можно остановить с помощью запроса [`KILL MUTATION`](misc.md#kill-mutation). Записи о последних выполненных мутациях удаляются не сразу (количество сохраняемых мутаций определяется параметром движка таблиц `finished_mutations_to_keep`). Более старые записи удаляются. diff --git a/docs/ru/query_language/create.md b/docs/ru/sql_reference/statements/create.md similarity index 94% rename from docs/ru/query_language/create.md rename to docs/ru/sql_reference/statements/create.md index dfaae11a359..7a5e3c9dc08 100644 --- a/docs/ru/query_language/create.md +++ b/docs/ru/sql_reference/statements/create.md @@ -20,11 +20,11 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(.. - `ENGINE` - - [MySQL](../database_engines/mysql.md) + - [MySQL](../../sql_reference/statements/create.md) Позволяет получать данные с удаленного сервера MySQL. - По умолчанию ClickHouse использует собственный [движок баз данных](../database_engines/index.md). + По умолчанию ClickHouse использует собственный [движок баз данных](../../sql_reference/statements/create.md). ## CREATE TABLE {#create-table-query} @@ -65,7 +65,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать. -После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../operations/table_engines/index.md#table_engines). +После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../sql_reference/statements/create.md#table_engines). ### Значения по умолчанию {#create-default-values} @@ -123,11 +123,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ### Выражение для TTL {#vyrazhenie-dlia-ttl} -Определяет время хранения значений. Может быть указано только для таблиц семейства MergeTree. Подробнее смотрите в [TTL для столбцов и таблиц](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl). +Определяет время хранения значений. Может быть указано только для таблиц семейства MergeTree. Подробнее смотрите в [TTL для столбцов и таблиц](../../sql_reference/statements/create.md#table_engine-mergetree-ttl). ### Кодеки сжатия столбцов {#codecs} -По умолчанию, ClickHouse применяет к столбцу метод сжатия, определённый в [конфигурации сервера](../operations/server_settings/settings.md#compression). Кроме этого, можно задать метод сжатия для каждого отдельного столбца в запросе `CREATE TABLE`. +По умолчанию, ClickHouse применяет к столбцу метод сжатия, определённый в [конфигурации сервера](../../sql_reference/statements/create.md#compression). Кроме этого, можно задать метод сжатия для каждого отдельного столбца в запросе `CREATE TABLE`. ``` sql CREATE TABLE codec_example @@ -149,10 +149,10 @@ ENGINE = Сжатие поддерживается для следующих движков таблиц: -- [MergeTree family](../operations/table_engines/mergetree.md) -- [Log family](../operations/table_engines/log_family.md) -- [Set](../operations/table_engines/set.md) -- [Join](../operations/table_engines/join.md) +- [MergeTree family](../../sql_reference/statements/create.md) +- [Log family](../../sql_reference/statements/create.md) +- [Set](../../sql_reference/statements/create.md) +- [Join](../../sql_reference/statements/create.md) ClickHouse поддерживает кодеки общего назначения и специализированные кодеки. @@ -213,7 +213,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name В большинстве случаев, временные таблицы создаются не вручную, а при использовании внешних данных для запроса, или при распределённом `(GLOBAL) IN`. Подробнее см. соответствующие разделы -Вместо временных можно использовать обычные таблицы с [ENGINE = Memory](../operations/table_engines/memory.md). +Вместо временных можно использовать обычные таблицы с [ENGINE = Memory](../../sql_reference/statements/create.md). ## Распределенные DDL запросы (секция ON CLUSTER) {#raspredelennye-ddl-zaprosy-sektsiia-on-cluster} @@ -289,12 +289,12 @@ LAYOUT(LAYOUT_NAME([param_name param_value])) LIFETIME([MIN val1] MAX val2) ``` -Создаёт [внешний словарь](dicts/external_dicts.md) с заданной [структурой](dicts/external_dicts_dict_structure.md), [источником](dicts/external_dicts_dict_sources.md), [способом размещения в памяти](dicts/external_dicts_dict_layout.md) и [периодом обновления](dicts/external_dicts_dict_lifetime.md). +Создаёт [внешний словарь](../../sql_reference/statements/create.md) с заданной [структурой](../../sql_reference/statements/create.md), [источником](../../sql_reference/statements/create.md), [способом размещения в памяти](../../sql_reference/statements/create.md) и [периодом обновления](../../sql_reference/statements/create.md). Структура внешнего словаря состоит из атрибутов. Атрибуты словаря задаются как столбцы таблицы. Единственным обязательным свойством атрибута является его тип, все остальные свойства могут иметь значения по умолчанию. -В зависимости от [способа размещения словаря в памяти](dicts/external_dicts_dict_layout.md), ключами словаря могут быть один и более атрибутов. +В зависимости от [способа размещения словаря в памяти](../../sql_reference/statements/create.md), ключами словаря могут быть один и более атрибутов. -Смотрите [Внешние словари](dicts/external_dicts.md). +Смотрите [Внешние словари](../../sql_reference/statements/create.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/create/) diff --git a/docs/ru/sql_reference/statements/index.md b/docs/ru/sql_reference/statements/index.md new file mode 100644 index 00000000000..1adf93a153e --- /dev/null +++ b/docs/ru/sql_reference/statements/index.md @@ -0,0 +1,5 @@ +--- +toc_folder_title: Statements +toc_priority: 31 +--- + diff --git a/docs/ru/query_language/insert_into.md b/docs/ru/sql_reference/statements/insert_into.md similarity index 88% rename from docs/ru/query_language/insert_into.md rename to docs/ru/sql_reference/statements/insert_into.md index 7a9540e059c..ed07d6d3b1c 100644 --- a/docs/ru/query_language/insert_into.md +++ b/docs/ru/sql_reference/statements/insert_into.md @@ -13,9 +13,9 @@ INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), . - Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы. - Нули и пустые строки, если `DEFAULT` не определены. -Если [strict\_insert\_defaults=1](../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе. +Если [strict\_insert\_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе. -В INSERT можно передавать данные любого [формата](../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде: +В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set @@ -37,7 +37,7 @@ INSERT INTO t FORMAT TabSeparated 22 Qwerty ``` -С помощью консольного клиента или HTTP интерфейса можно вставлять данные отдельно от запроса. Как это сделать, читайте в разделе «[Интерфейсы](../interfaces/index.md#interfaces)». +С помощью консольного клиента или HTTP интерфейса можно вставлять данные отдельно от запроса. Как это сделать, читайте в разделе «[Интерфейсы](../../interfaces/index.md#interfaces)». ### Ограничения (constraints) {#ogranicheniia-constraints} @@ -56,7 +56,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... Не поддерживаются другие запросы на модификацию части данных: `UPDATE`, `DELETE`, `REPLACE`, `MERGE`, `UPSERT`, `INSERT UPDATE`. Вы можете удалять старые данные с помощью запроса `ALTER TABLE ... DROP PARTITION`. -Для табличной функции [input()](table_functions/input.md) после секции `SELECT` должна следовать +Для табличной функции [input()](../table_functions/input.md) после секции `SELECT` должна следовать секция `FORMAT`. ### Замечания о производительности {#zamechaniia-o-proizvoditelnosti} diff --git a/docs/ru/query_language/misc.md b/docs/ru/sql_reference/statements/misc.md similarity index 87% rename from docs/ru/query_language/misc.md rename to docs/ru/sql_reference/statements/misc.md index 476c57d34a5..d8278534d46 100644 --- a/docs/ru/query_language/misc.md +++ b/docs/ru/sql_reference/statements/misc.md @@ -25,17 +25,17 @@ CHECK TABLE [db.]name Запрос `CHECK TABLE` сравнивает текущие размеры файлов (в которых хранятся данные из колонок) с ожидаемыми значениями. Если значения не совпадают, данные в таблице считаются поврежденными. Искажение возможно, например, из-за сбоя при записи данных. -Ответ содержит колонку `result`, содержащую одну строку с типом [Boolean](../data_types/boolean.md). Допустимые значения: +Ответ содержит колонку `result`, содержащую одну строку с типом [Boolean](../../sql_reference/data_types/boolean.md). Допустимые значения: - 0 - данные в таблице повреждены; - 1 - данные не повреждены. Запрос `CHECK TABLE` поддерживает следующие движки таблиц: -- [Log](../operations/table_engines/log.md) -- [TinyLog](../operations/table_engines/tinylog.md) -- [StripeLog](../operations/table_engines/stripelog.md) -- [Семейство MergeTree](../operations/table_engines/mergetree.md) +- [Log](../../engines/table_engines/log_family/log.md) +- [TinyLog](../../engines/table_engines/log_family/tinylog.md) +- [StripeLog](../../engines/table_engines/log_family/stripelog.md) +- [Семейство MergeTree](../../engines/table_engines/mergetree_family/index.md) При попытке выполнить запрос с таблицами с другими табличными движками, ClickHouse генерирует исключение. @@ -48,7 +48,7 @@ CHECK TABLE [db.]name В этом случае можно скопировать оставшиеся неповрежденные данные в другую таблицу. Для этого: 1. Создайте новую таблицу с такой же структурой, как у поврежденной таблицы. Для этого выполните запрос `CREATE TABLE AS `. -2. Установите значение параметра [max\_threads](../operations/settings/settings.md#settings-max_threads) в 1. Это нужно для того, чтобы выполнить следующий запрос в одном потоке. Установить значение параметра можно через запрос: `SET max_threads = 1`. +2. Установите значение параметра [max\_threads](../../operations/settings/settings.md#settings-max_threads) в 1. Это нужно для того, чтобы выполнить следующий запрос в одном потоке. Установить значение параметра можно через запрос: `SET max_threads = 1`. 3. Выполните запрос `INSERT INTO SELECT * FROM `. В результате неповрежденные данные будут скопированы в другую таблицу. Обратите внимание, будут скопированы только те данные, которые следуют до поврежденного участка. 4. Перезапустите `clickhouse-client`, чтобы вернуть предыдущее значение параметра `max_threads`. @@ -153,7 +153,7 @@ KILL MUTATION [ON CLUSTER cluster] [FORMAT format] ``` -Пытается остановить выполняющиеся в данные момент [мутации](alter.md#alter-mutations). Мутации для остановки выбираются из таблицы [`system.mutations`](../operations/system_tables.md#system_tables-mutations) с помощью условия, указанного в секции `WHERE` запроса `KILL`. +Пытается остановить выполняющиеся в данные момент [мутации](alter.md#alter-mutations). Мутации для остановки выбираются из таблицы [`system.mutations`](../../operations/system_tables.md#system_tables-mutations) с помощью условия, указанного в секции `WHERE` запроса `KILL`. Тестовый вариант запроса (`TEST`) только проверяет права пользователя и выводит список запросов для остановки. @@ -177,11 +177,11 @@ KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = ' OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE] ``` -Запрос пытается запустить внеплановый мёрж кусков данных для таблиц семейства [MergeTree](../operations/table_engines/mergetree.md). Другие движки таблиц не поддерживаются. +Запрос пытается запустить внеплановый мёрж кусков данных для таблиц семейства [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). Другие движки таблиц не поддерживаются. -Если `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../operations/table_engines/replication.md), ClickHouse создаёт задачу на мёрж и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`). +Если `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table_engines/mergetree_family/replication.md), ClickHouse создаёт задачу на мёрж и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`). -- Если `OPTIMIZE` не выполняет мёрж по любой причине, ClickHouse не оповещает об этом клиента. Чтобы включить оповещения, используйте настройку [optimize\_throw\_if\_noop](../operations/settings/settings.md#setting-optimize_throw_if_noop). +- Если `OPTIMIZE` не выполняет мёрж по любой причине, ClickHouse не оповещает об этом клиента. Чтобы включить оповещения, используйте настройку [optimize\_throw\_if\_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop). - Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter.md#alter-how-to-specify-part-expr). - Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске. - Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех колонках), имеет смысл только для движка MergeTree. @@ -205,7 +205,7 @@ RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... SET param = value ``` -Устанавливает значение `value` для [настройки](../operations/settings/index.md) `param` в текущей сессии. [Конфигурационные параметры сервера](../operations/server_settings/index.md) нельзя изменить подобным образом. +Устанавливает значение `value` для [настройки](../../operations/settings/index.md) `param` в текущей сессии. [Конфигурационные параметры сервера](../../operations/server_configuration_parameters/settings.md) нельзя изменить подобным образом. Можно одним запросом установить все настройки из заданного профиля настроек. @@ -213,7 +213,7 @@ SET param = value SET profile = 'profile-name-from-the-settings-file' ``` -Подробности смотрите в разделе [Настройки](../operations/settings/settings.md). +Подробности смотрите в разделе [Настройки](../../operations/settings/settings.md). ## TRUNCATE {#truncate} @@ -223,7 +223,7 @@ TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] Удаляет все данные из таблицы. Если условие `IF EXISTS` не указано, запрос вернет ошибку, если таблицы не существует. -Запрос `TRUNCATE` не поддерживается для следующих движков: [View](../operations/table_engines/view.md), [File](../operations/table_engines/file.md), [URL](../operations/table_engines/url.md) и [Null](../operations/table_engines/null.md). +Запрос `TRUNCATE` не поддерживается для следующих движков: [View](../../engines/table_engines/special/view.md), [File](../../engines/table_engines/special/file.md), [URL](../../engines/table_engines/special/url.md) и [Null](../../engines/table_engines/special/null.md). ## USE {#use} diff --git a/docs/ru/query_language/select.md b/docs/ru/sql_reference/statements/select.md similarity index 91% rename from docs/ru/query_language/select.md rename to docs/ru/sql_reference/statements/select.md index 759a65f155a..bd9dc21e2aa 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/sql_reference/statements/select.md @@ -107,7 +107,7 @@ Cекция `FROM` определяет источник данных: - Таблица - Подзапрос -- [Табличная функция](table_functions/index.md) +- [Табличная функция](../../sql_reference/statements/select.md) Также могут присутствовать `ARRAY JOIN` и обычный `JOIN` (смотрите ниже). @@ -117,17 +117,17 @@ Cекция `FROM` определяет источник данных: Для выполнения запроса, из соответствующей таблицы, вынимаются все столбцы, перечисленные в запросе. Из подзапросов выкидываются столбцы, не нужные для внешнего запроса. Если в запросе не перечислено ни одного столбца (например, `SELECT count() FROM t`), то из таблицы всё равно вынимается один какой-нибудь столбец (предпочитается самый маленький), для того, чтобы можно было посчитать количество строк. -Модификатор `FINAL` может быть использован в запросе `SELECT` из таблиц семейства [MergeTree](../operations/table_engines/mergetree.md). При указании `FINAL`, данные будут выбираться полностью «домерженными». Стоит учитывать, что использование `FINAL` приводит к чтению также столбцов, относящихся к первичному ключу. Также, запрос будет выполняться в один поток, и при выполнении запроса будет выполняться слияние данных. Это приводит к тому, что при использовании `FINAL`, запрос выполняется медленнее. В большинстве случаев, следует избегать использования `FINAL`. +Модификатор `FINAL` может быть использован в запросе `SELECT` из таблиц семейства [MergeTree](../../engines/table_engines/mergetree_family/index.md). При указании `FINAL`, данные будут выбираться полностью «домерженными». Стоит учитывать, что использование `FINAL` приводит к чтению также столбцов, относящихся к первичному ключу. Также, запрос будет выполняться в один поток, и при выполнении запроса будет выполняться слияние данных. Это приводит к тому, что при использовании `FINAL`, запрос выполняется медленнее. В большинстве случаев, следует избегать использования `FINAL`. Модификатор `FINAL` может быть использован для всех таблиц семейства `MergeTree`, которые производят преобразования данных в процессе фоновых слияний (кроме GraphiteMergeTree). #### FINAL Modifier {#select-from-final} -Применим при выборке данных из таблиц с движками таблиц семейства [MergeTree](../operations/table_engines/mergetree.md), кроме `GraphiteMergeTree`. Если в запросе используется `FINAL`, то ClickHouse полностью мёржит данные перед выдачей результата, таким образом выполняя все преобразования данных, которые производятся движком таблиц при мёржах. +Применим при выборке данных из таблиц с движками таблиц семейства [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)), кроме `GraphiteMergeTree`. Если в запросе используется `FINAL`, то ClickHouse полностью мёржит данные перед выдачей результата, таким образом выполняя все преобразования данных, которые производятся движком таблиц при мёржах. Также поддержан для движков: -- [Replicated](../operations/table_engines/replication.md)-версий `MergeTree`. -- [View](../operations/table_engines/view.md), [Buffer](../operations/table_engines/buffer.md), [Distributed](../operations/table_engines/distributed.md), и [MaterializedView](../operations/table_engines/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`. +- [Replicated](../../engines/table_engines/mergetree_family/replication.md)-версий `MergeTree`. +- [View](../../engines/table_engines/special/view.md), [Buffer](../../engines/table_engines/special/buffer.md), [Distributed](../../engines/table_engines/special/distributed.md), и [MaterializedView](../../engines/table_engines/special/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`. Запросы, использующие `FINAL` исполняются медленнее аналогичных запросов без `FINAL`, поскольку: @@ -153,9 +153,9 @@ Cекция `FROM` определяет источник данных: - Сэмплирование работает детерминированно. При многократном выполнении одного и того же запроса `SELECT .. SAMPLE`, результат всегда будет одинаковым. - Сэмплирование поддерживает консистентность для разных таблиц. Имеется в виду, что для таблиц с одним и тем же ключом сэмплирования, подмножество данных в выборках будет одинаковым (выборки при этом должны быть сформированы для одинаковой доли данных). Например, выборка по идентификаторам посетителей выберет из разных таблиц строки с одинаковым подмножеством всех возможных идентификаторов. Это свойство позволяет использовать выборки в подзапросах в секции [IN](#select-in-operators), а также объединять выборки с помощью [JOIN](#select-join). -- Сэмплирование позволяет читать меньше данных с диска. Обратите внимание, для этого необходимо корректно указать ключ сэмплирования. Подробнее см. в разделе [Создание таблицы MergeTree](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table). +- Сэмплирование позволяет читать меньше данных с диска. Обратите внимание, для этого необходимо корректно указать ключ сэмплирования. Подробнее см. в разделе [Создание таблицы MergeTree](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-creating-a-table). -Сэмплирование поддерживается только таблицами семейства [MergeTree](../operations/table_engines/mergetree.md) и только в том случае, если для таблиц был указан ключ сэмплирования (выражение, на основе которого должна производиться выборка). Подробнее см. в разделе [Создание таблиц MergeTree](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table). +Сэмплирование поддерживается только таблицами семейства [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-creating-a-table) и только в том случае, если для таблиц был указан ключ сэмплирования (выражение, на основе которого должна производиться выборка). Подробнее см. в разделе [Создание таблиц MergeTree](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-creating-a-table). Выражение `SAMPLE` в запросе можно задать следующими способами: @@ -198,7 +198,7 @@ ORDER BY PageViews DESC LIMIT 1000 При выполнении `SAMPLE n` коэффициент сэмплирования заранее неизвестен (то есть нет информации о том, относительно какого количества данных будет сформирована выборка). Чтобы узнать коэффициент сэмплирования, используйте столбец `_sample_factor`. -Виртуальный столбец `_sample_factor` автоматически создается в тех таблицах, для которых задано выражение `SAMPLE BY` (подробнее см. в разделе [Создание таблицы MergeTree](../operations/table_engines/mergetree.md#table_engine-mergetree-creating-a-table)). В столбце содержится коэффициент сэмплирования для таблицы – он рассчитывается динамически по мере добавления данных в таблицу. Ниже приведены примеры использования столбца `_sample_factor`. +Виртуальный столбец `_sample_factor` автоматически создается в тех таблицах, для которых задано выражение `SAMPLE BY` (подробнее см. в разделе [Создание таблицы MergeTree](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-creating-a-table)). В столбце содержится коэффициент сэмплирования для таблицы – он рассчитывается динамически по мере добавления данных в таблицу. Ниже приведены примеры использования столбца `_sample_factor`. Предположим, у нас есть таблица, в которой ведется статистика посещений сайта. Пример ниже показывает, как рассчитать суммарное число просмотров: @@ -252,7 +252,7 @@ SAMPLE 1/10 OFFSET 1/2 ### Секция ARRAY JOIN {#select-array-join-clause} -Позволяет выполнить `JOIN` с массивом или вложенной структурой данных. Смысл похож на функцию [arrayJoin](functions/array_join.md#functions_arrayjoin), но функциональность более широкая. +Позволяет выполнить `JOIN` с массивом или вложенной структурой данных. Смысл похож на функцию [arrayJoin](../../sql_reference/statements/select.md#functions_arrayjoin), но функциональность более широкая. ``` sql SELECT @@ -271,7 +271,7 @@ FROM - `ARRAY JOIN` — в этом случае результат `JOIN` не будет содержать пустые массивы; - `LEFT ARRAY JOIN` — пустые массивы попадут в результат выполнения `JOIN`. В качестве значения для пустых массивов устанавливается значение по умолчанию. Обычно это 0, пустая строка или NULL, в зависимости от типа элементов массива. -Рассмотрим примеры использования `ARRAY JOIN` и `LEFT ARRAY JOIN`. Для начала создадим таблицу, содержащую столбец с типом [Array](../data_types/array.md), и добавим в него значение: +Рассмотрим примеры использования `ARRAY JOIN` и `LEFT ARRAY JOIN`. Для начала создадим таблицу, содержащую столбец с типом [Array](../../sql_reference/statements/select.md), и добавим в него значение: ``` sql CREATE TABLE arrays_test @@ -389,7 +389,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS ma └───────┴─────────┴───┴─────┴────────┘ ``` -В примере ниже используется функция [arrayEnumerate](functions/array_functions.md#array_functions-arrayenumerate): +В примере ниже используется функция [arrayEnumerate](../../sql_reference/statements/select.md#array_functions-arrayenumerate): ``` sql SELECT s, arr, a, num, arrayEnumerate(arr) @@ -409,7 +409,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; #### ARRAY JOIN с вложенными структурами данных {#array-join-s-vlozhennymi-strukturami-dannykh} -`ARRAY JOIN` также работает с [вложенными структурами данных](../data_types/nested_data_structures/nested.md). Пример: +`ARRAY JOIN` также работает с [вложенными структурами данных](../../sql_reference/statements/select.md). Пример: ``` sql CREATE TABLE nested_test @@ -502,7 +502,7 @@ ARRAY JOIN nest AS n; └───────┴─────┴─────┴─────────┴────────────┘ ``` -Пример использования функции [arrayEnumerate](functions/array_functions.md#array_functions-arrayenumerate): +Пример использования функции [arrayEnumerate](../../sql_reference/statements/select.md#array_functions-arrayenumerate): ``` sql SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num @@ -534,7 +534,7 @@ FROM (ON )|(USING ) ... ``` -Вместо `` и `` можно указать имена таблиц. Это эквивалентно подзапросу `SELECT * FROM table`, за исключением особого случая таблицы с движком [Join](../operations/table_engines/join.md) – массива, подготовленного для присоединения. +Вместо `` и `` можно указать имена таблиц. Это эквивалентно подзапросу `SELECT * FROM table`, за исключением особого случая таблицы с движком [Join](../../sql_reference/statements/select.md) – массива, подготовленного для присоединения. #### Поддерживаемые типы `JOIN` {#select-join-types} @@ -618,9 +618,9 @@ USING (equi_column1, ... equi_columnN, asof_column) `ASOF JOIN` принимает метку времени пользовательского события из `table_1` и находит такое событие в `table_2` метка времени которого наиболее близка к метке времени события из `table_1` в соответствии с условием на ближайшее совпадение. При этом столбец `user_id` используется для объединения по равенству, а столбец `ev_time` для объединения по ближайшему совпадению. В нашем примере `event_1_1` может быть объединено с `event_2_1`, `event_1_2` может быть объединено с `event_2_3`, а `event_2_2` не объединяется. !!! note "Примечание" - `ASOF JOIN` не поддержан для движка таблиц [Join](../operations/table_engines/join.md). + `ASOF JOIN` не поддержан для движка таблиц [Join](../../sql_reference/statements/select.md). -Чтобы задать значение строгости по умолчанию, используйте сессионный параметр [join\_default\_strictness](../operations/settings/settings.md#settings-join_default_strictness). +Чтобы задать значение строгости по умолчанию, используйте сессионный параметр [join\_default\_strictness](../../operations/settings/settings.md#settings-join_default_strictness). #### GLOBAL JOIN {#global-join} @@ -684,27 +684,27 @@ LIMIT 10 «Правая» таблица (результат подзапроса) располагается в оперативной памяти. Если её не хватает, вы не сможете выполнить `JOIN`. -Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../operations/table_engines/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке. +Каждый раз для выполнения запроса с одинаковым `JOIN`, подзапрос выполняется заново — результат не кэшируется. Это можно избежать, используя специальный движок таблиц [Join](../../engines/table_engines/special/join.md), представляющий собой подготовленное множество для соединения, которое всегда находится в оперативке. В некоторых случаях более эффективно использовать `IN` вместо `JOIN`. Среди разных типов `JOIN`, наиболее эффективен `ANY LEFT JOIN`, следующий по эффективности `ANY INNER JOIN`. Наименее эффективны `ALL LEFT JOIN` и `ALL INNER JOIN`. -Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе [Внешние словари](dicts/external_dicts.md). +Если `JOIN` необходим для соединения с таблицами измерений (dimension tables - сравнительно небольшие таблицы, которые содержат свойства измерений - например, имена для рекламных кампаний), то использование `JOIN` может быть не очень удобным из-за громоздкости синтаксиса, а также из-за того, что правая таблица читается заново при каждом запросе. Специально для таких случаев существует функциональность «Внешние словари», которую следует использовать вместо `JOIN`. Дополнительные сведения смотрите в разделе [Внешние словари](../../sql_reference/statements/select.md). **Ограничения по памяти** ClickHouse использует алгоритм [hash join](https://en.wikipedia.org/wiki/Hash_join). ClickHouse принимает `` и создает для него хэш-таблицу в RAM. Чтобы ограничить потребление памяти операцией `JOIN`, используйте следующие параметры: -- [max\_rows\_in\_join](../operations/settings/query_complexity.md#settings-max_rows_in_join) — ограничивает количество строк в хэш-таблице. -- [max\_bytes\_in\_join](../operations/settings/query_complexity.md#settings-max_bytes_in_join) — ограничивает размер хэш-таблицы. +- [max\_rows\_in\_join](../../operations/settings/query_complexity.md#settings-max_rows_in_join) — ограничивает количество строк в хэш-таблице. +- [max\_bytes\_in\_join](../../operations/settings/query_complexity.md#settings-max_bytes_in_join) — ограничивает размер хэш-таблицы. -По достижении любого из этих ограничений, ClickHouse действует в соответствии с настройкой [join\_overflow\_mode](../operations/settings/query_complexity.md#settings-join_overflow_mode). +По достижении любого из этих ограничений, ClickHouse действует в соответствии с настройкой [join\_overflow\_mode](../../operations/settings/query_complexity.md#settings-join_overflow_mode). #### Обработка пустых ячеек и NULL {#obrabotka-pustykh-iacheek-i-null} -При слиянии таблиц могут появляться пустые ячейки. То, каким образом ClickHouse заполняет эти ячейки, определяется настройкой [join\_use\_nulls](../operations/settings/settings.md#join_use_nulls). +При слиянии таблиц могут появляться пустые ячейки. То, каким образом ClickHouse заполняет эти ячейки, определяется настройкой [join\_use\_nulls](../../operations/settings/settings.md#join_use_nulls). -Если ключами `JOIN` выступают поля типа [Nullable](../data_types/nullable.md), то строки, где хотя бы один из ключей имеет значение [NULL](syntax.md#null-literal), не соединяются. +Если ключами `JOIN` выступают поля типа [Nullable](../../sql_reference/statements/select.md), то строки, где хотя бы один из ключей имеет значение [NULL](../syntax.md#null-literal), не соединяются. #### Ограничения синтаксиса {#ogranicheniia-sintaksisa} @@ -723,9 +723,9 @@ ClickHouse использует алгоритм [hash join](https://en.wikipedi Результат выражения должен иметь тип `UInt8`. -ClickHouse использует в выражении индексы, если это позволяет [движок таблицы](../operations/table_engines/index.md). +ClickHouse использует в выражении индексы, если это позволяет [движок таблицы](../../sql_reference/statements/select.md). -Если в секции необходимо проверить [NULL](syntax.md#null-literal), то используйте операторы [IS NULL](operators.md#operator-is-null) и [IS NOT NULL](operators.md#is-not-null), а также соответствующие функции `isNull` и `isNotNull`. В противном случае выражение будет считаться всегда не выполненным. +Если в секции необходимо проверить [NULL](../syntax.md#null-literal), то используйте операторы [IS NULL](../operators.md#operator-is-null) и [IS NOT NULL](../operators.md#is-not-null), а также соответствующие функции `isNull` и `isNotNull`. В противном случае выражение будет считаться всегда не выполненным. Пример проверки на `NULL`: @@ -796,7 +796,7 @@ GROUP BY вычисляет для каждого встретившегося #### Обработка NULL {#obrabotka-null} -При группировке, ClickHouse рассматривает [NULL](syntax.md) как значение, причём `NULL=NULL`. +При группировке, ClickHouse рассматривает [NULL](../syntax.md) как значение, причём `NULL=NULL`. Рассмотрим, что это значит на примере. @@ -854,11 +854,11 @@ GROUP BY вычисляет для каждого встретившегося #### GROUP BY во внешней памяти {#select-group-by-in-external-memory} Можно включить сброс временных данных на диск, чтобы ограничить потребление оперативной памяти при выполнении `GROUP BY`. -Настройка [max\_bytes\_before\_external\_group\_by](../operations/settings/settings.md#settings-max_bytes_before_external_group_by) определяет пороговое значение потребления RAM, по достижении которого временные данные `GROUP BY` сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено. +Настройка [max\_bytes\_before\_external\_group\_by](../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) определяет пороговое значение потребления RAM, по достижении которого временные данные `GROUP BY` сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено. При использовании `max_bytes_before_external_group_by`, рекомендуем выставить `max_memory_usage` приблизительно в два раза больше. Это следует сделать, потому что агрегация выполняется в две стадии: чтение и формирование промежуточных данных (1) и слияние промежуточных данных (2). Сброс данных на файловую систему может производиться только на стадии 1. Если сброса временных данных не было, то на стадии 2 может потребляться до такого же объёма памяти, как на стадии 1. -Например, если [max\_memory\_usage](../operations/settings/settings.md#settings_max_memory_usage) было выставлено в 10000000000, и вы хотите использовать внешнюю агрегацию, то имеет смысл выставить `max_bytes_before_external_group_by` в 10000000000, а max\_memory\_usage в 20000000000. При срабатывании внешней агрегации (если был хотя бы один сброс временных данных в файловую систему) максимальное потребление оперативки будет лишь чуть-чуть больше `max_bytes_before_external_group_by`. +Например, если [max\_memory\_usage](../../operations/settings/settings.md#settings_max_memory_usage) было выставлено в 10000000000, и вы хотите использовать внешнюю агрегацию, то имеет смысл выставить `max_bytes_before_external_group_by` в 10000000000, а max\_memory\_usage в 20000000000. При срабатывании внешней агрегации (если был хотя бы один сброс временных данных в файловую систему) максимальное потребление оперативки будет лишь чуть-чуть больше `max_bytes_before_external_group_by`. При распределённой обработке запроса внешняя агрегация производится на удалённых серверах. Для того чтобы на сервере-инициаторе запроса использовалось немного оперативки, нужно выставить настройку `distributed_aggregation_memory_efficient` в 1. @@ -870,7 +870,7 @@ GROUP BY вычисляет для каждого встретившегося ### Секция LIMIT BY {#sektsiia-limit-by} -Запрос с секцией `LIMIT n BY expressions` выбирает первые `n` строк для каждого отличного значения `expressions`. Ключ `LIMIT BY` может содержать любое количество [выражений](syntax.md#syntax-expressions). +Запрос с секцией `LIMIT n BY expressions` выбирает первые `n` строк для каждого отличного значения `expressions`. Ключ `LIMIT BY` может содержать любое количество [выражений](../syntax.md#syntax-expressions). ClickHouse поддерживает следующий синтаксис: @@ -936,7 +936,7 @@ LIMIT 100 Запрос выберет топ 5 рефереров для каждой пары `domain, device_type`, но не более 100 строк (`LIMIT n BY + LIMIT`). -`LIMIT n BY` работает с [NULL](syntax.md) как если бы это было конкретное значение. Т.е. в результате запроса пользователь получит все комбинации полей, указанных в `BY`. +`LIMIT n BY` работает с [NULL](../syntax.md) как если бы это было конкретное значение. Т.е. в результате запроса пользователь получит все комбинации полей, указанных в `BY`. ### Секция HAVING {#sektsiia-having} @@ -1007,7 +1007,7 @@ WHERE и HAVING отличаются тем, что WHERE выполняется ### Секция SELECT {#select-select} -[Выражения](syntax.md#syntax-expressions) указанные в секции `SELECT` анализируются после завершения всех вычислений из секций, описанных выше. Вернее, анализируются выражения, стоящие над агрегатными функциями, если есть агрегатные функции. +[Выражения](../syntax.md#syntax-expressions) указанные в секции `SELECT` анализируются после завершения всех вычислений из секций, описанных выше. Вернее, анализируются выражения, стоящие над агрегатными функциями, если есть агрегатные функции. Сами агрегатные функции и то, что под ними, вычисляются при агрегации (`GROUP BY`). Эти выражения работают так, как будто применяются к отдельным строкам результата. Если в результат необходимо включить все столбцы, используйте символ звёздочка (`*`). Например, `SELECT * FROM ...`. @@ -1080,7 +1080,7 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of `DISTINCT` не поддерживается, если в `SELECT` присутствует хотя бы один столбец типа массив. -`DISTINCT` работает с [NULL](syntax.md) как если бы `NULL` был конкретным значением, причём `NULL=NULL`. Т.е. в результате `DISTINCT` разные комбинации с `NULL` встретятся только по одному разу. +`DISTINCT` работает с [NULL](../syntax.md) как если бы `NULL` был конкретным значением, причём `NULL=NULL`. Т.е. в результате `DISTINCT` разные комбинации с `NULL` встретятся только по одному разу. ClickHouse поддерживает использование в одном запросе секций `DISTINCT` и `ORDER BY` для разных столбцов. Секция `DISTINCT` исполняется перед секцией `ORDER BY`. @@ -1237,7 +1237,7 @@ ORDER BY EventDate ASC #### Обработка NULL {#obrabotka-null-1} -При обработке запроса оператор IN будет считать, что результат операции с [NULL](syntax.md) всегда равен `0`, независимо от того, находится `NULL` в правой или левой части оператора. Значения `NULL` не входят ни в какое множество, не соответствуют друг другу и не могут сравниваться. +При обработке запроса оператор IN будет считать, что результат операции с [NULL](../syntax.md) всегда равен `0`, независимо от того, находится `NULL` в правой или левой части оператора. Значения `NULL` не входят ни в какое множество, не соответствуют друг другу и не могут сравниваться. Рассмотрим для примера таблицу `t_null`: @@ -1275,7 +1275,7 @@ FROM t_null Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. !!! attention "Attention" - Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../operations/settings/settings.md) `distributed_product_mode`. + Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. @@ -1375,7 +1375,7 @@ SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL Вы можете получить в дополнение к результату также минимальные и максимальные значения по столбцам результата. Для этого выставите настройку **extremes** в 1. Минимумы и максимумы считаются для числовых типов, дат, дат-с-временем. Для остальных столбцов будут выведены значения по умолчанию. -Вычисляются дополнительные две строчки - минимумы и максимумы, соответственно. Эти две дополнительные строки выводятся в [форматах](../interfaces/formats.md) `JSON*`, `TabSeparated*`, и `Pretty*` отдельно от остальных строчек. В остальных форматах они не выводится. +Вычисляются дополнительные две строчки - минимумы и максимумы, соответственно. Эти две дополнительные строки выводятся в [форматах](../../interfaces/formats.md) `JSON*`, `TabSeparated*`, и `Pretty*` отдельно от остальных строчек. В остальных форматах они не выводится. Во форматах `JSON*`, экстремальные значения выводятся отдельным полем ‘extremes’. В форматах `TabSeparated*`, строка выводится после основного результата и после ‘totals’ если есть. Перед ней (после остальных данных) вставляется пустая строка. В форматах `Pretty*`, строка выводится отдельной таблицей после основного результата и после `totals` если есть. diff --git a/docs/ru/query_language/show.md b/docs/ru/sql_reference/statements/show.md similarity index 90% rename from docs/ru/query_language/show.md rename to docs/ru/sql_reference/statements/show.md index 03f99378ccc..545301d6166 100644 --- a/docs/ru/query_language/show.md +++ b/docs/ru/sql_reference/statements/show.md @@ -23,7 +23,7 @@ SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] ``` -Выводит содержимое таблицы [system.processes](../operations/system_tables.md#system_tables-processes), которая содержит список запросов, выполняющихся в данный момент времени, кроме самих запросов `SHOW PROCESSLIST`. +Выводит содержимое таблицы [system.processes](../../operations/system_tables.md#system_tables-processes), которая содержит список запросов, выполняющихся в данный момент времени, кроме самих запросов `SHOW PROCESSLIST`. Запрос `SELECT * FROM system.processes` возвращает данные обо всех текущих запросах. @@ -66,7 +66,7 @@ SHOW TABLES FROM system LIKE '%co%' LIMIT 2 ## SHOW DICTIONARIES {#show-dictionaries} -Выводит список [внешних словарей](dicts/external_dicts.md). +Выводит список [внешних словарей](../../sql_reference/statements/show.md). ``` sql SHOW DICTIONARIES [FROM ] [LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] diff --git a/docs/ru/query_language/system.md b/docs/ru/sql_reference/statements/system.md similarity index 80% rename from docs/ru/query_language/system.md rename to docs/ru/sql_reference/statements/system.md index 12909c12ce2..2a4acd15e7f 100644 --- a/docs/ru/query_language/system.md +++ b/docs/ru/sql_reference/statements/system.md @@ -17,7 +17,7 @@ ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Перегружает все словари, которые были успешно загружены до этого. -По умолчанию включена ленивая загрузка [dictionaries\_lazy\_load](../operations/server_settings/settings.md#dictionaries-lazy-load), поэтому словари не загружаются автоматически при старте, а только при первом обращении через dictGet или SELECT к ENGINE=Dictionary. После этого такие словари (LOADED) будут перегружаться командой `system reload dictionaries`. +По умолчанию включена ленивая загрузка [dictionaries\_lazy\_load](../../sql_reference/statements/system.md#dictionaries-lazy-load), поэтому словари не загружаются автоматически при старте, а только при первом обращении через dictGet или SELECT к ENGINE=Dictionary. После этого такие словари (LOADED) будут перегружаться командой `system reload dictionaries`. Всегда возвращает `Ok.`, вне зависимости от результата обновления словарей. ## RELOAD DICTIONARY dictionary\_name {#query_language-system-reload-dictionary} @@ -58,7 +58,7 @@ SELECT name, status FROM system.dictionaries; ## Управление распределёнными таблицами {#query-language-system-distributed} -ClickHouse может оперировать [распределёнными](../operations/table_engines/distributed.md) таблицами. Когда пользователь вставляет данные в эти таблицы, ClickHouse сначала формирует очередь из данных, которые должны быть отправлены на узлы кластера, а затем асинхронно отправляет подготовленные данные. Вы можете управлять очередью с помощью запросов [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) и [FLUSH DISTRIBUTED](#query_language-system-flush-distributed). Также есть возможность синхронно вставлять распределенные данные с помощью настройки `insert_distributed_sync`. +ClickHouse может оперировать [распределёнными](../../sql_reference/statements/system.md) таблицами. Когда пользователь вставляет данные в эти таблицы, ClickHouse сначала формирует очередь из данных, которые должны быть отправлены на узлы кластера, а затем асинхронно отправляет подготовленные данные. Вы можете управлять очередью с помощью запросов [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) и [FLUSH DISTRIBUTED](#query_language-system-flush-distributed). Также есть возможность синхронно вставлять распределенные данные с помощью настройки `insert_distributed_sync`. ### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} diff --git a/docs/ru/query_language/syntax.md b/docs/ru/sql_reference/syntax.md similarity index 97% rename from docs/ru/query_language/syntax.md rename to docs/ru/sql_reference/syntax.md index 0219de90685..b9576e48a59 100644 --- a/docs/ru/query_language/syntax.md +++ b/docs/ru/sql_reference/syntax.md @@ -68,13 +68,13 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') - Иначе — ошибка. Соответствующее значение будет иметь тип минимального размера, который вмещает значение. -Например, 1 парсится как `UInt8`, а 256 как `UInt16`. Подробнее о типах данных читайте в разделе [Типы данных](../data_types/index.md). +Например, 1 парсится как `UInt8`, а 256 как `UInt16`. Подробнее о типах данных читайте в разделе [Типы данных](../sql_reference/syntax.md). Примеры: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. ### Строковые {#syntax-string-literal} -Поддерживаются только строковые литералы в одинарных кавычках. Символы внутри могут быть экранированы с помощью обратного слеша. Следующие escape-последовательности имеют соответствующее специальное значение: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. Во всех остальных случаях, последовательности вида `\c`, где `c` — любой символ, преобразуется в `c` . Таким образом, могут быть использованы последовательности `\'` и `\\`. Значение будет иметь тип [String](../data_types/string.md). +Поддерживаются только строковые литералы в одинарных кавычках. Символы внутри могут быть экранированы с помощью обратного слеша. Следующие escape-последовательности имеют соответствующее специальное значение: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. Во всех остальных случаях, последовательности вида `\c`, где `c` — любой символ, преобразуется в `c` . Таким образом, могут быть использованы последовательности `\'` и `\\`. Значение будет иметь тип [String](../sql_reference/syntax.md). Минимальный набор символов, которых вам необходимо экранировать в строковых литералах: `'` и `\`. Одинарная кавычка может быть экранирована одинарной кавычкой, литералы `'It\'s'` и `'It''s'` эквивалентны. @@ -83,13 +83,13 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') Поддерживаются конструкции для массивов: `[1, 2, 3]` и кортежей: `(1, 'Hello, world!', 2)`. На самом деле, это вовсе не литералы, а выражение с оператором создания массива и оператором создания кортежа, соответственно. Массив должен состоять хотя бы из одного элемента, а кортеж - хотя бы из двух. -Кортежи носят служебное значение для использования в секции `IN` запроса `SELECT`. Кортежи могут быть получены как результат запроса, но они не могут быть сохранены в базе данных (за исключением таблицы [Memory](../operations/table_engines/memory.md).) +Кортежи носят служебное значение для использования в секции `IN` запроса `SELECT`. Кортежи могут быть получены как результат запроса, но они не могут быть сохранены в базе данных (за исключением таблицы [Memory](../sql_reference/syntax.md).) ### NULL {#null-literal} Обозначает, что значение отсутствует. -Чтобы в поле таблицы можно было хранить `NULL`, оно должно быть типа [Nullable](../data_types/nullable.md). +Чтобы в поле таблицы можно было хранить `NULL`, оно должно быть типа [Nullable](../sql_reference/syntax.md). В зависимости от формата данных (входных или выходных) `NULL` может иметь различное представление. Подробнее смотрите в документации для [форматов данных](../interfaces/formats.md#formats). @@ -123,7 +123,7 @@ expr AS alias Например, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - В функции [CAST](functions/type_conversion_functions.md#type_conversion_function-cast), ключевое слово `AS` имеет другое значение. Смотрите описание функции. + В функции [CAST](sql_reference/syntax.md#type_conversion_function-cast), ключевое слово `AS` имеет другое значение. Смотрите описание функции. - `expr` — любое выражение, которое поддерживает ClickHouse. diff --git a/docs/ru/query_language/table_functions/file.md b/docs/ru/sql_reference/table_functions/file.md similarity index 92% rename from docs/ru/query_language/table_functions/file.md rename to docs/ru/sql_reference/table_functions/file.md index d415b20858b..e0da3ddc15f 100644 --- a/docs/ru/query_language/table_functions/file.md +++ b/docs/ru/sql_reference/table_functions/file.md @@ -8,7 +8,7 @@ file(path, format, structure) **Входные параметры** -- `path` — относительный путь до файла от [user\_files\_path](../../operations/server_settings/settings.md#server_settings-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, \``'abc', 'def'` — строки. +- `path` — относительный путь до файла от [user\_files\_path](../../sql_reference/table_functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, \``'abc', 'def'` — строки. - `format` — [формат](../../interfaces/formats.md#formats) файла. - `structure` — структура таблицы. Формат `'colunmn1_name column1_ype, column2_name column2_type, ...'`. diff --git a/docs/ru/query_language/table_functions/generate.md b/docs/ru/sql_reference/table_functions/generate.md similarity index 100% rename from docs/ru/query_language/table_functions/generate.md rename to docs/ru/sql_reference/table_functions/generate.md diff --git a/docs/ru/query_language/table_functions/hdfs.md b/docs/ru/sql_reference/table_functions/hdfs.md similarity index 100% rename from docs/ru/query_language/table_functions/hdfs.md rename to docs/ru/sql_reference/table_functions/hdfs.md diff --git a/docs/ru/query_language/table_functions/index.md b/docs/ru/sql_reference/table_functions/index.md similarity index 62% rename from docs/ru/query_language/table_functions/index.md rename to docs/ru/sql_reference/table_functions/index.md index 446aa554243..f0e465c5cb0 100644 --- a/docs/ru/query_language/table_functions/index.md +++ b/docs/ru/sql_reference/table_functions/index.md @@ -4,11 +4,11 @@ Табличные функции можно использовать в: -- Секции [FROM](../select.md#select-from) запроса `SELECT`. +- Секции [FROM](../statements/select.md#select-from) запроса `SELECT`. Это способ создания временной таблицы, которая доступна только в текущем запросе. -- Запросе [CREATE TABLE AS \](../create.md#create-table-query). +- Запросе [CREATE TABLE AS \](../statements/create.md#create-table-query). Это один из методов создания таблицы. @@ -17,14 +17,14 @@ | Функция | Описание | |-----------------------|----------------------------------------------------------------------------------------------------------------------------------------| -| [file](file.md) | Создаёт таблицу с движком [File](../../operations/table_engines/file.md). | -| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../operations/table_engines/merge.md). | +| [file](file.md) | Создаёт таблицу с движком [File](../../sql_reference/table_functions/index.md). | +| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../sql_reference/table_functions/index.md). | | [numbers](numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. | -| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../operations/table_engines/distributed.md). | -| [url](url.md) | Создаёт таблицу с движком [Url](../../operations/table_engines/url.md). | -| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../operations/table_engines/mysql.md). | -| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../operations/table_engines/jdbc.md). | -| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../operations/table_engines/odbc.md). | -| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../operations/table_engines/hdfs.md). | +| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../sql_reference/table_functions/index.md). | +| [url](url.md) | Создаёт таблицу с движком [Url](../../sql_reference/table_functions/index.md). | +| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../sql_reference/table_functions/index.md). | +| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../sql_reference/table_functions/index.md). | +| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../sql_reference/table_functions/index.md). | +| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../sql_reference/table_functions/index.md). | [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/) diff --git a/docs/ru/query_language/table_functions/input.md b/docs/ru/sql_reference/table_functions/input.md similarity index 100% rename from docs/ru/query_language/table_functions/input.md rename to docs/ru/sql_reference/table_functions/input.md diff --git a/docs/ru/query_language/table_functions/jdbc.md b/docs/ru/sql_reference/table_functions/jdbc.md similarity index 100% rename from docs/ru/query_language/table_functions/jdbc.md rename to docs/ru/sql_reference/table_functions/jdbc.md diff --git a/docs/ru/query_language/table_functions/merge.md b/docs/ru/sql_reference/table_functions/merge.md similarity index 100% rename from docs/ru/query_language/table_functions/merge.md rename to docs/ru/sql_reference/table_functions/merge.md diff --git a/docs/ru/query_language/table_functions/mysql.md b/docs/ru/sql_reference/table_functions/mysql.md similarity index 93% rename from docs/ru/query_language/table_functions/mysql.md rename to docs/ru/sql_reference/table_functions/mysql.md index 228b0bbf38e..a26f89ced2f 100644 --- a/docs/ru/query_language/table_functions/mysql.md +++ b/docs/ru/sql_reference/table_functions/mysql.md @@ -73,7 +73,7 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') ## Смотрите также {#smotrite-takzhe} -- [Движок таблиц ‘MySQL’](../../operations/table_engines/mysql.md) -- [Использование MySQL как источника данных для внешнего словаря](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-mysql) +- [Движок таблиц ‘MySQL’](../../sql_reference/table_functions/mysql.md) +- [Использование MySQL как источника данных для внешнего словаря](../../sql_reference/table_functions/mysql.md#dicts-external_dicts_dict_sources-mysql) [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/mysql/) diff --git a/docs/ru/query_language/table_functions/numbers.md b/docs/ru/sql_reference/table_functions/numbers.md similarity index 100% rename from docs/ru/query_language/table_functions/numbers.md rename to docs/ru/sql_reference/table_functions/numbers.md diff --git a/docs/ru/query_language/table_functions/odbc.md b/docs/ru/sql_reference/table_functions/odbc.md similarity index 95% rename from docs/ru/query_language/table_functions/odbc.md rename to docs/ru/sql_reference/table_functions/odbc.md index bff2c23cf47..0d277b2b26d 100644 --- a/docs/ru/query_language/table_functions/odbc.md +++ b/docs/ru/sql_reference/table_functions/odbc.md @@ -95,7 +95,7 @@ SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') ## Смотрите также {#smotrite-takzhe} -- [Внешние словари ODBC](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) -- [Движок таблиц ODBC](../../operations/table_engines/odbc.md). +- [Внешние словари ODBC](../../sql_reference/table_functions/odbc.md#dicts-external_dicts_dict_sources-odbc) +- [Движок таблиц ODBC](../../sql_reference/table_functions/odbc.md). [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/jdbc/) diff --git a/docs/ru/query_language/table_functions/remote.md b/docs/ru/sql_reference/table_functions/remote.md similarity index 100% rename from docs/ru/query_language/table_functions/remote.md rename to docs/ru/sql_reference/table_functions/remote.md diff --git a/docs/ru/query_language/table_functions/url.md b/docs/ru/sql_reference/table_functions/url.md similarity index 100% rename from docs/ru/query_language/table_functions/url.md rename to docs/ru/sql_reference/table_functions/url.md diff --git a/docs/ru/whats_new/changelog/2017.md b/docs/ru/whats_new/changelog/2017.md new file mode 100644 index 00000000000..1c820453901 --- /dev/null +++ b/docs/ru/whats_new/changelog/2017.md @@ -0,0 +1,266 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +### ClickHouse релиз 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} + +Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54318: + +- Исправлена ошибка с возможным состоянием гонки в репликации, которая могла привести к потере данных. Эта проблема затрагивает версии 1.1.54310 и 1.1.54318. Если вы используете одну из этих версий с Реплицированными таблицами, настоятельно рекомендуется обновить ее. Эта проблема отображается в журналах в предупреждающих сообщениях, таких как `Part ... from own log doesn't exist.` Эта проблема актуальна, даже если вы не видите эти сообщения в журналах. + +### ClickHouse релиз 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30} + +Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54310: + +- Исправлено некорректное удаление строк при слияниях в движке SummingMergeTree +- Исправлена утечка памяти в несложных движках MergeTree +- Исправлено снижение производительности при частых вставках в двигатели MergeTree +- Исправлена ошибка, из-за которой очередь репликации останавливалась +- Исправлена ротация и архивация журналов сервера + +### ClickHouse релиз 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01} + +#### Новые средства: {#new-features} + +- Пользовательский ключ секционирования для семейства движков таблиц MergeTree. +- [Кафка](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) настольный двигатель. +- Добавлена поддержка загрузки [CatBoost](https://catboost.yandex/) модели и их применение к данным, хранящимся в ClickHouse. +- Добавлена поддержка часовых поясов с нецелочисленными смещениями от UTC. +- Добавлена поддержка арифметических операций с временными интервалами. +- Диапазон значений для типов Date и DateTime расширен до 2105 года. +- Добавил тот `CREATE MATERIALIZED VIEW x TO y` запрос (указывает существующую таблицу для хранения данных материализованного представления). +- Добавил тот `ATTACH TABLE` запрос без аргументов. +- Логика обработки вложенных столбцов с именами, оканчивающимися на-Map в таблице SummingMergeTree, была извлечена в агрегатную функцию sumMap. Теперь вы можете указать такие столбцы явно. +- Максимальный размер словаря IP trie увеличен до 128 миллионов записей. +- Добавлена функция getSizeOfEnumType. +- Добавлена агрегатная функция sumWithOverflow. +- Добавлена поддержка формата ввода Cap'n Proto. +- Теперь вы можете настроить уровень сжатия при использовании алгоритма zstd. + +#### Назад несовместимые изменения: {#backward-incompatible-changes} + +- Создание временных таблиц с движком, отличным от памяти, не допускается. +- Явное создание таблиц с помощью движка View или MaterializedView не допускается. +- Во время создания таблицы новая проверка проверяет, что выражение ключа выборки включено в первичный ключ. + +#### Устранение ошибок: {#bug-fixes} + +- Исправлены зависания при синхронной вставке в распределенную таблицу. +- Исправлено неатомное добавление и удаление деталей в реплицируемых таблицах. +- Данные, вставленные в материализованное представление, не подвергаются ненужной дедупликации. +- Выполнение запроса к распределенной таблице, для которой локальная реплика отстает, а удаленные реплики недоступны, больше не приводит к ошибке. +- Пользователям не нужны разрешения на доступ к `default` база данных для создания временных таблиц больше не существует. +- Исправлен сбой при указании типа массива без аргументов. +- Исправлены зависания, когда дисковый том, содержащий журналы сервера, заполнен. +- Исправлено переполнение в функции toRelativeWeekNum для первой недели эпохи Unix. + +#### Улучшения сборки: {#build-improvements} + +- Несколько сторонних библиотек (особенно Poco) были обновлены и преобразованы в подмодули git. + +### ClickHouse релиз 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19} + +#### Новые средства: {#new-features-1} + +- Поддержка TLS в собственном протоколе (чтобы включить, установите `tcp_ssl_port` в `config.xml` ). + +#### Устранение ошибок: {#bug-fixes-1} + +- `ALTER` для реплицированных таблиц теперь пытается начать работать как можно скорее. +- Исправлен сбой при чтении данных с настройкой `preferred_block_size_bytes=0.` +- Исправлены сбои в работе `clickhouse-client` при нажатии на кнопку `Page Down` +- Правильная интерпретация некоторых сложных запросов с помощью `GLOBAL IN` и `UNION ALL` +- `FREEZE PARTITION` теперь он всегда работает атомарно. +- Пустые почтовые запросы теперь возвращают ответ с кодом 411. +- Исправлены ошибки интерпретации таких выражений, как `CAST(1 AS Nullable(UInt8)).` +- Исправлена ошибка при чтении `Array(Nullable(String))` колонки от `MergeTree` таблицы. +- Исправлен сбой при разборе таких запросов, как `SELECT dummy AS dummy, dummy AS b` +- Пользователи обновляются правильно с недопустимым `users.xml` +- Правильная обработка, когда исполняемый словарь возвращает ненулевой код ответа. + +### ClickHouse релиз 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20} + +#### Новые средства: {#new-features-2} + +- Добавил тот `pointInPolygon` функция для работы с координатами на координатной плоскости. +- Добавил тот `sumMap` агрегатная функция для вычисления суммы массивов, аналогичная `SummingMergeTree`. +- Добавил тот `trunc` функция. Улучшена производительность функций округления (`round`, `floor`, `ceil`, `roundToExp2`) и скорректировал логику их работы. Поменялась логика игры `roundToExp2` функция для дробей и отрицательных чисел. +- Исполняемый файл ClickHouse теперь меньше зависит от версии libc. Один и тот же исполняемый файл ClickHouse может работать на самых разных системах Linux. Существует еще зависимость при использовании скомпилированных запросов (с настройкой `compile = 1` , который не используется по умолчанию). +- Сократилось время, необходимое для динамической компиляции запросов. + +#### Устранение ошибок: {#bug-fixes-2} + +- Исправлена ошибка, которая иногда производилась `part ... intersects previous part` сообщения и ослабленная согласованность реплик. +- Исправлена ошибка, из-за которой сервер блокировался, если ZooKeeper был недоступен во время завершения работы. +- Удалено избыточное ведение журнала при восстановлении реплик. +- Исправлена ошибка в объединении всех реализаций. +- Исправлена ошибка в функции concat, возникшая, если первый столбец в блоке имеет тип массива. +- Прогресс теперь отображается в системе правильно.таблица слияний. + +### ClickHouse релиз 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13} + +#### Новые средства: {#new-features-3} + +- `SYSTEM` запросы для администрирования сервера: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. +- Добавлены функции для работы с массивами: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. +- Добавлен `root` и `identity` параметры для конфигурации ZooKeeper. Это позволяет изолировать отдельных пользователей в одном кластере ZooKeeper. +- Добавлены статистические функции `groupBitAnd`, `groupBitOr`, и `groupBitXor` (для совместимости они также доступны под названиями `BIT_AND`, `BIT_OR`, и `BIT_XOR`). +- Внешние словари можно загрузить из MySQL, указав сокет в файловой системе. +- Внешние словари могут быть загружены из MySQL по протоколу SSL (`ssl_cert`, `ssl_key`, `ssl_ca` параметры). +- Добавил тот `max_network_bandwidth_for_user` настройка для ограничения общего использования полосы пропускания для запросов на одного пользователя. +- Поддержка `DROP TABLE` для временных таблиц. +- Поддержка чтения `DateTime` значения в формате временных меток Unix от `CSV` и `JSONEachRow` форматы. +- Запаздывающие реплики в распределенных запросах теперь исключаются по умолчанию (пороговое значение по умолчанию-5 минут). +- Блокировка FIFO используется во время ALTER: запрос ALTER не блокируется бесконечно для непрерывно выполняемых запросов. +- Возможность установки `umask` в конфигурационном файле. +- Улучшенная производительность для запросов с помощью `DISTINCT` . + +#### Устранение ошибок: {#bug-fixes-3} + +- Улучшен процесс удаления старых узлов в ZooKeeper. Раньше старые узлы иногда не удалялись, если были очень частые вставки, что приводило к медленному завершению работы сервера, среди прочего. +- Исправлена рандомизация при выборе хостов для подключения к ZooKeeper. +- Исправлено исключение запаздывающих реплик в распределенных запросах, если реплика является localhost. +- Исправлена ошибка, когда часть данных в a `ReplicatedMergeTree` стол может быть сломан после запуска `ALTER MODIFY` на элементе в `Nested` структура. +- Исправлена ошибка, которая могла привести к тому, что запросы SELECT «hang». +- Улучшения в распределенных DDL-запросах. +- Исправлен запрос `CREATE TABLE ... AS `. +- Разрешен тупик в работе `ALTER ... CLEAR COLUMN IN PARTITION` запрос для `Buffer` таблицы. +- Исправлено недопустимое значение по умолчанию для `Enum` s (0 вместо минимума) при использовании `JSONEachRow` и `TSKV` форматы. +- Разрешен внешний вид зомби-процессов при использовании словаря с помощью `executable` источник. +- Исправлена обработка выхода онлайн / оффлайн для запроса. + +#### Улучшен рабочий процесс разработки и сборки ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse} + +- Вы можете использовать `pbuilder` чтобы построить ClickHouse. +- Вы можете использовать `libc++` вместо `libstdc++` для сборок на Linux. +- Добавлены инструкции по использованию инструментов статического анализа кода: `Coverage`, `clang-tidy`, `cppcheck`. + +#### Пожалуйста, обратите внимание при обновлении: {#please-note-when-upgrading} + +- Теперь существует более высокое значение по умолчанию для параметра MergeTree `max_bytes_to_merge_at_max_space_in_pool` (максимальный общий размер частей данных для слияния, в байтах): он увеличился со 100 гигабайт до 150 гигабайт. Это может привести к большим слияниям, выполняемым после обновления сервера, что может привести к увеличению нагрузки на дисковую подсистему. Если свободное пространство, доступное на сервере, меньше чем в два раза общего объема выполняемых слияний, это приведет к остановке всех других слияний, включая слияния небольших частей данных. В результате запросы INSERT завершатся ошибкой с сообщением «Merges are processing significantly slower than inserts.» Используйте `SELECT * FROM system.merges` запрос на мониторинг ситуации. Вы также можете проверить следующее: `DiskSpaceReservedForMerge` метрика в системе `system.metrics` таблица, или в графите. Вам не нужно ничего делать, чтобы исправить это, так как проблема будет решена сама собой, как только большие слияния закончатся. Если вы сочтете это неприемлемым, вы можете восстановить предыдущее значение для `max_bytes_to_merge_at_max_space_in_pool` установка. Чтобы сделать это, перейдите в раздел раздел в конфигурации.xml, набор ``` ``107374182400 ``` и перезагрузите сервер. + +### ClickHouse релиз 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29} + +- Это исправленный выпуск для предыдущей версии 1.1.54282. Он исправляет утечки в каталоге запчастей в ZooKeeper. + +### ClickHouse релиз 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23} + +Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54276: + +- Исправлено `DB::Exception: Assertion violation: !_path.empty()` при вставке в распределенную таблицу. +- Исправлен синтаксический анализ при вставке в формат RowBinary, если входные данные начинаются с';'. +- Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). + +### Clickhouse Релиз 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16} + +#### Новые средства: {#new-features-4} + +- Добавлен дополнительный раздел для запроса SELECT. Пример запроса: `WITH 1+1 AS a SELECT a, a*a` +- Вставка может быть выполнена синхронно в распределенной таблице: OK возвращается только после того, как все данные сохранены на всех осколках. Это активируется установкой insert\_distributed\_sync=1. +- Добавлен тип данных UUID для работы с 16-байтовыми идентификаторами. +- Добавлены псевдонимы CHAR, FLOAT и других типов для совместимости с таблицей. +- Добавлены функции toYYYYMM, toYYYYMMDD и toYYYYMMDDhhmmss для преобразования времени в числа. +- Вы можете использовать IP-адреса (вместе с именем хоста) для идентификации серверов для кластеризованных запросов DDL. +- Добавлена поддержка непостоянных аргументов и отрицательных смещений в функции `substring(str, pos, len).` +- Добавлен параметр max\_size для `groupArray(max_size)(column)` агрегатная функция и оптимизированная ее производительность. + +#### Основное изменение: {#main-changes} + +- Улучшения безопасности: все файлы сервера создаются с разрешениями 0640 (могут быть изменены с помощью параметр config). +- Улучшены сообщения об ошибках для запросов с неверным синтаксисом. +- Значительно сокращается потребление памяти и повышается производительность при слиянии больших разделов данных MergeTree. +- Значительно повысилась производительность слияний данных для заменяющего движка Mergetree. +- Улучшена производительность асинхронных вставок из распределенной таблицы за счет объединения нескольких исходных вставок. Чтобы включить эту функцию, используйте параметр distributed\_directory\_monitor\_batch\_inserts=1. + +#### Назад несовместимые изменения: {#backward-incompatible-changes-1} + +- Изменен двоичный формат агрегатных состояний `groupArray(array_column)` функции для массивов. + +#### Полный список изменений: {#complete-list-of-changes} + +- Добавил тот `output_format_json_quote_denormals` настройка, которая позволяет выводить значения nan и inf в формате JSON. +- Оптимизировано распределение потока при чтении из распределенной таблицы. +- Настройки можно настроить в режиме только для чтения, если значение не изменяется. +- Добавлена возможность извлечения нецелочисленных гранул движка MergeTree для выполнения ограничений на размер блока, указанных в параметре preferred\_block\_size\_bytes. Цель состоит в том, чтобы уменьшить потребление оперативной памяти и увеличить локальность кэша при обработке запросов из таблиц с большими столбцами. +- Эффективное использование индексов, содержащих такие выражения, как `toStartOfHour(x)` для таких условий, как `toStartOfHour(x) op сonstexpr.` +- Добавлены новые настройки для движков MergeTree (раздел merge\_tree в config.XML): + - replicated\_deduplication\_window\_seconds задает количество секунд, разрешенных для дедуплицирующих вставок в реплицируемые таблицы. + - cleanup\_delay\_period устанавливает, как часто нужно запустить программу очистки, чтобы удалить устаревшие данные. + - replicated\_can\_become\_leader может препятствовать тому, чтобы реплика становилась лидером (и назначала слияния). +- Ускоренная очистка для удаления устаревших данных из ZooKeeper. +- Множество улучшений и исправлений для кластеризованных DDL-запросов. Особый интерес представляет новая настройка distributed\_ddl\_task\_timeout, которая ограничивает время ожидания ответа от серверов в кластере. Если запрос ddl не был выполнен на всех хостах, ответ будет содержать ошибку таймаута, и запрос будет выполнен в асинхронном режиме. +- Улучшено отображение трассировок стека в журналах сервера. +- Добавил тот «none» значение для метода сжатия. +- Вы можете использовать несколько разделов dictionaries\_config в config.XML. +- Можно подключиться к MySQL через сокет в файловой системе. +- Система.в таблице деталей появился новый столбец с информацией о размере меток, в байтах. + +#### Устранение ошибок: {#bug-fixes-4} + +- Распределенные таблицы, использующие таблицу слияния, теперь корректно работают для запроса SELECT с условием на `_table` поле. +- Исправлено редкое состояние гонки в ReplicatedMergeTree при проверке частей данных. +- Исправлена возможная заморозка на «leader election» при запуске сервера. +- Параметр max\_replica\_delay\_for\_distributed\_queries был проигнорирован при использовании локальной реплики источника данных. Это было исправлено. +- Исправлено некорректное поведение `ALTER TABLE CLEAR COLUMN IN PARTITION` при попытке очистить несуществующий столбец. +- Исправлено исключение в функции multif при использовании пустых массивов или строк. +- Исправлено чрезмерное выделение памяти при десериализации собственного формата. +- Исправлено некорректное автоматическое обновление словарей Trie. +- Исправлено исключение при выполнении запросов с предложением GROUP BY из таблицы слияния при использовании SAMPLE. +- Исправлена ошибка, из группы при использовании distributed\_aggregation\_memory\_efficient=1. +- Теперь вы можете указать базу данных.таблицы в правой стороне и присоединиться. +- Слишком много потоков было использовано для параллельной агрегации. Это было исправлено. +- Исправлено как то «if» функция работает с аргументами FixedString. +- Выберите из распределенной таблицы неправильно сработавшие осколки с весом 0. Это было исправлено. +- Бегущий `CREATE VIEW IF EXISTS no longer causes crashes.` +- Исправлено некорректное поведение при установке input\_format\_skip\_unknown\_fields=1 и наличии отрицательных чисел. +- Исправлен бесконечный цикл в `dictGetHierarchy()` функция, если в словаре есть какие-то недопустимые данные. +- Исправлено `Syntax error: unexpected (...)` ошибки при выполнении распределенных запросов с вложенными запросами в предложении IN или JOIN и таблицах слияния. +- Исправлена неправильная интерпретация запроса SELECT из таблиц справочника. +- Исправлена ошибка «Cannot mremap» ошибка при использовании массивов в предложениях IN и JOIN с более чем 2 миллиардами элементов. +- Исправлена ошибка отработки отказа для словарей с MySQL в качестве источника. + +#### Улучшен рабочий процесс разработки и сборки ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse-1} + +- Сборки могут быть собраны в Аркадии. +- Вы можете использовать gcc 7 для компиляции ClickHouse. +- Параллельные сборки с использованием ccache+distcc теперь работают быстрее. + +### ClickHouse релиз 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04} + +#### Новые средства: {#new-features-5} + +- Распределенный DDL (например, `CREATE TABLE ON CLUSTER`) +- Реплицированный запрос `ALTER TABLE CLEAR COLUMN IN PARTITION.` +- Движок для таблиц словаря (доступ к данным словаря в виде таблицы). +- Компонент Dictionary database engine (этот тип базы данных автоматически содержит таблицы словарей, доступные для всех подключенных внешних словарей). +- Вы можете проверить наличие обновлений в словаре, отправив запрос источнику. +- Полные имена столбцов +- Цитирование идентификаторов с использованием двойных кавычек. +- Сеансы в интерфейсе HTTP. +- Запрос оптимизации для реплицированной таблицы может выполняться не только на лидере. + +#### Назад несовместимые изменения: {#backward-incompatible-changes-2} + +- Удалить набор глобальных. + +#### Несущественные изменения: {#minor-changes} + +- Теперь после срабатывания предупреждения журнал печатает полную трассировку стека. +- Ослаблена проверка количества поврежденных / лишних частей данных при запуске (было слишком много ложных срабатываний). + +#### Устранение ошибок: {#bug-fixes-5} + +- Исправлена плохая связь «sticking» при вставке в распределенную таблицу. +- GLOBAL IN теперь работает для запроса из таблицы слияния, которая смотрит на распределенную таблицу. +- Неверное количество ядер было обнаружено на виртуальной машине Google Compute Engine. Это было исправлено. +- Изменения в том, как работает исполняемый источник кэшированных внешних словарей. +- Исправлено сравнение строк, содержащих нулевые символы. +- Исправлено сравнение полей первичного ключа Float32 с константами. +- Ранее неверная оценка размера поля могла привести к чрезмерно большим распределениям. +- Исправлена ошибка, при отправке запроса столбец допускает значения NULL в таблицу с помощью инструкции Alter. +- Исправлена ошибка при сортировке по нулевому столбцу, если количество строк меньше предельного. +- Исправлен порядок по подзапросу, состоящему только из постоянных значений. +- Ранее реплицированная таблица могла оставаться в недопустимом состоянии после неудачного удаления таблицы. +- Псевдонимы для скалярных подзапросов с пустыми результатами больше не теряются. +- Теперь запрос, который использовал компиляцию, не завершается ошибкой, если файл .so поврежден. diff --git a/docs/ru/whats_new/changelog/2018.md b/docs/ru/whats_new/changelog/2018.md new file mode 100644 index 00000000000..5de3ba68437 --- /dev/null +++ b/docs/ru/whats_new/changelog/2018.md @@ -0,0 +1,1061 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +## ClickHouse релиз 18.16 {#clickhouse-release-18-16} + +### ClickHouse релиз 18.16.1, 2018-12-21 {#clickhouse-release-18-16-1-2018-12-21} + +#### Устранение ошибок: {#bug-fixes} + +- Исправлена ошибка, которая приводила к проблемам с обновлением словарей с источником ODBC. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) +- JIT-компиляция агрегатных функций теперь работает с колонками LowCardinality. [\#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) + +#### Улучшения: {#improvements} + +- Добавил тот `low_cardinality_allow_in_native_format` настройка (включена по умолчанию). Если этот параметр отключен, столбцы с низким коэффициентом полезности будут преобразованы в обычные столбцы для запросов SELECT, а обычные столбцы будут ожидаться для запросов INSERT. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) + +#### Улучшения сборки: {#build-improvements} + +- Исправления для сборок на macOS и ARM. + +### ClickHouse релиз 18.16.0, 2018-12-14 {#clickhouse-release-18-16-0-2018-12-14} + +#### Новые средства: {#new-features} + +- `DEFAULT` выражения вычисляются для пропущенных полей при загрузке данных в полуструктурированные входные форматы (`JSONEachRow`, `TSKV`). Эта функция включена с помощью `insert_sample_with_metadata` установка. [\#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) +- То `ALTER TABLE` запрос теперь имеет следующее значение `MODIFY ORDER BY` действие для изменения ключа сортировки при добавлении или удалении столбца таблицы. Это полезно для таблиц в `MergeTree` семейство, выполняющее дополнительные задачи при слиянии на основе этого ключа сортировки, например `SummingMergeTree`, `AggregatingMergeTree` и так далее. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [\#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) +- Для столиков в центре города `MergeTree` семья, теперь вы можете указать другой ключ сортировки (`ORDER BY`) и индекс (`PRIMARY KEY`). Ключ сортировки может быть длиннее индекса. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) +- Добавил тот `hdfs` функция таблицы и `HDFS` механизм таблиц для импорта и экспорта данных в HDFS. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) +- Добавлены функции для работы с base64: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/3350) +- Теперь вы можете использовать параметр для настройки точности `uniqCombined` агрегатная функция (выбор количества ячеек Гиперлога). [\#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) +- Добавил тот `system.contributors` таблица, содержащая имена всех, кто совершил коммиты в ClickHouse. [\#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) +- Добавлена возможность опустить Раздел для `ALTER TABLE ... FREEZE` запрос для резервного копирования всех разделов сразу. [\#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) +- Добавлен `dictGet` и `dictGetOrDefault` функции, которые не требуют указания типа возвращаемого значения. Тип определяется автоматически из описания словаря. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3564) +- Теперь вы можете указать комментарии для столбца в описании таблицы и изменить его с помощью `ALTER`. [\#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) +- Чтение поддерживается для `Join` введите таблицы с простыми ключами. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3728) +- Теперь вы можете указать следующие параметры `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`, и `join_overflow_mode` при создании `Join` типизированная таблица. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3728) +- Добавил тот `joinGet` функция, которая позволяет вам использовать a `Join` введите таблицу, как словарь. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3728) +- Добавил тот `partition_key`, `sorting_key`, `primary_key`, и `sampling_key` колонны в сторону `system.tables` таблица для того, чтобы предоставить информацию о ключах таблицы. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) +- Добавил тот `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`, и `is_in_sampling_key` колонны в сторону `system.columns` стол. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) +- Добавил тот `min_time` и `max_time` колонны в сторону `system.parts` стол. Эти столбцы заполняются, когда ключ секционирования является выражением, состоящим из `DateTime` столбцы. [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) + +#### Устранение ошибок: {#bug-fixes-1} + +- Исправления и улучшения производительности для `LowCardinality` тип данных. `GROUP BY` с помощью `LowCardinality(Nullable(...))`. Получение значений `extremes`. Обработка функций высокого порядка. `LEFT ARRAY JOIN`. Распределенный `GROUP BY`. Функции, которые возвращают `Array`. Исполнение приказа `ORDER BY`. Написание в адрес `Distributed` таблицы (nicelulu). Обратная совместимость для `INSERT` запросы от старых клиентов, которые реализуют `Native` протокол. Поддержка `LowCardinality` для `JOIN`. Улучшена производительность при работе в одном потоке. [\#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [\#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [\#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [\#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [\#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [\#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [\#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [\#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [\#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [\#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [\#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [\#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [\#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) +- Исправлено как то `select_sequential_consistency` вариант работает. Ранее, когда этот параметр был включен, неполный результат иногда возвращался после начала записи в новый раздел. [\#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) +- Базы данных правильно задаются при выполнении DDL `ON CLUSTER` запросы и `ALTER UPDATE/DELETE`. [\#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) +- Базы данных правильно задаются для вложенных запросов внутри представления. [\#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) +- Исправлена ошибка в работе `PREWHERE` с `FINAL` для `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) +- Теперь вы можете использовать `KILL QUERY` чтобы отменить запросы, которые еще не начались, потому что они ждут блокировки таблицы. [\#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) +- Исправлены расчеты даты и времени, если часы были перенесены назад в полночь (это происходит в Иране, а произошло в Москве с 1981 по 1983 год). Ранее это приводило к тому, что время сбрасывалось на день раньше необходимого, а также вызывало неправильное форматирование даты и времени в текстовом формате. [\#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) +- Исправлены ошибки в некоторых случаях `VIEW` и подзапросы, которые опускают базу данных. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3521) +- Исправлено состояние гонки при одновременном чтении из `MATERIALIZED VIEW` и удаление `MATERIALIZED VIEW` из-за того, что внутренняя дверь не запирается `MATERIALIZED VIEW`. [\#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [\#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) +- Исправлена ошибка `Lock handler cannot be nullptr.` [\#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) +- Исправлена обработка запросов, когда `compile_expressions` опция включена (она включена по умолчанию). Недетерминированные постоянные выражения, такие как `now` функции больше не разворачиваются. [\#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) +- Исправлена ошибка при указании непостоянного аргумента масштаба в `toDecimal32/64/128` функции. +- Исправлена ошибка при попытке вставить массив с помощью `NULL` элементы в системе `Values` форматирование в столбец типа `Array` без `Nullable` (если `input_format_values_interpret_expressions` = 1). [\#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [\#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) +- Исправлена непрерывная ошибка входа в систему `DDLWorker` если смотритель зоопарка не доступен. [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) +- Исправлен тип возврата для `quantile*` функции от `Date` и `DateTime` тип аргумента. [\#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) +- Исправлена ошибка `WITH` предложение, если оно указывает простой псевдоним без выражений. [\#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) +- Исправлена обработка запросов с именованными подзапросами и квалифицированными именами столбцов, когда `enable_optimize_predicate_expression` это включено. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3588) +- Исправлена ошибка `Attempt to attach to nullptr thread group` при работе с материализованными представлениями. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) +- Исправлена ошибка при передаче некоторых неверных аргументов в систему `arrayReverse` функция. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) +- Исправлено переполнение буфера в системе `extractURLParameter` функция. Повышение производительности. Добавлена корректная обработка строк, содержащих ноль байт. [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) +- Исправлено переполнение буфера в системе `lowerUTF8` и `upperUTF8` функции. Удалена возможность выполнения этих функций сверх `FixedString` аргумент типа. [\#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) +- Исправлено редкое состояние гонки при удалении `MergeTree` таблицы. [\#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) +- Исправлено состояние гонки при чтении с `Buffer` таблицы и одновременно выполнять `ALTER` или `DROP` на целевых столах. [\#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) +- Исправлен сегфолт, если `max_temporary_non_const_columns` лимит был превышен. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) + +#### Улучшения: {#improvements-1} + +- Сервер не записывает обработанные конфигурационные файлы в систему. `/etc/clickhouse-server/` каталог. Вместо этого он спасает их в будущем. `preprocessed_configs` каталог внутри `path`. Это означает, что `/etc/clickhouse-server/` Директория не имеет доступа на запись для `clickhouse` пользователь, что повышает безопасность. [\#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) +- То `min_merge_bytes_to_use_direct_io` по умолчанию параметр установлен на 10 гигабайт. Слияние, которое образует большие части таблиц из семейства MergeTree, будет выполнено в `O_DIRECT` режим, который предотвращает чрезмерное вытеснение кэша страниц. [\#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) +- Ускоренный запуск сервера при наличии очень большого количества таблиц. [\#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) +- Добавлен пул соединений и HTTP `Keep-Alive` для связи между репликами. [\#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) +- Если синтаксис запроса неверен, то `400 Bad Request` код возвращается в виде `HTTP` интерфейс (ранее было возвращено 500). [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) +- То `join_default_strictness` параметр установлен в значение `ALL` по умолчанию для обеспечения совместимости. [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) +- Удалено ведение журнала в `stderr` из `re2` библиотека для недопустимых или сложных регулярных выражений. [\#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) +- Добавлено в `Kafka` механизм таблиц: проверяет наличие подписок перед началом чтения из Kafka; параметр kafka\_max\_block\_size для таблицы. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) +- То `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`, и `murmurHash3_64` функции теперь работают для любого количества аргументов и для аргументов в виде кортежей. [\#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [\#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) +- То `arrayReverse` функция теперь работает с любыми типами массивов. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) +- Добавлен необязательный параметр: размер слота для `timeSlots` функция. [Кирилл Шваков](https://github.com/ClickHouse/ClickHouse/pull/3724) +- Для `FULL` и `RIGHT JOIN`, этот `max_block_size` настройка используется для потока несвязанных данных из правой таблицы. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3699) +- Добавил тот `--secure` параметр командной строки в `clickhouse-benchmark` и `clickhouse-performance-test` чтобы включить TLS. [\#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [\#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) +- Тип преобразования, когда структура a `Buffer` таблица типов не соответствует структуре целевой таблицы. [Виталий Баранов](https://github.com/ClickHouse/ClickHouse/pull/3603) +- Добавил тот `tcp_keep_alive_timeout` опция для включения пакетов keep-alive после бездействия в течение заданного интервала времени. [\#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) +- Удалены ненужные кавычки значений для ключа раздела В разделе `system.parts` таблица, если она состоит из одного столбца. [\#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) +- Функция по модулю работает для `Date` и `DateTime` тип данных. [\#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) +- Добавлены синонимы для этого `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`, и `MID` функции. [\#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [\#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) Некоторые имена функций не зависят от регистра для обеспечения совместимости со стандартом SQL. Добавлен синтаксический сахар `SUBSTRING(expr FROM start FOR length)` для совместимости с SQL. [\#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) +- Добавлена возможность `mlock` страницы памяти, соответствующие `clickhouse-server` исполняемый код, чтобы предотвратить его вытеснение из памяти. По умолчанию эта функция отключена. [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) +- Улучшенная производительность при чтении с `O_DIRECT` (с помощью `min_bytes_to_use_direct_io` опция включена). [\#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) +- Улучшенная производительность системы `dictGet...OrDefault` функция для постоянного ключевого аргумента и непостоянного аргумента по умолчанию. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3563) +- То `firstSignificantSubdomain` функция теперь обрабатывает Домены `gov`, `mil`, и `edu`. [Игорь Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) Повышение производительности. [\#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) +- Возможность указать пользовательские переменные среды для запуска `clickhouse-server` с помощью `SYS-V init.d` сценарий по определению `CLICKHOUSE_PROGRAM_ENV` в `/etc/default/clickhouse`. + [Павел Башинский](https://github.com/ClickHouse/ClickHouse/pull/3612) +- Правильный код возврата для сценария clickhouse-server init. [\#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) +- То `system.metrics` таблица теперь имеет `VersionInteger` метрика и `system.build_options` есть ли добавленная строка `VERSION_INTEGER`, который содержит числовую форму версии ClickHouse, например `18016000`. [\#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) +- Удалена возможность сравнения `Date` введите с номером, чтобы избежать потенциальных ошибок, таких как `date = 2018-12-17`, где кавычки вокруг даты опущены по ошибке. [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) +- Исправлено поведение статусных функций, таких как `rowNumberInAllBlocks`. Ранее они выводили результат, который был на одно число больше из-за запуска во время анализа запроса. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3729) +- Если `force_restore_data` файл не может быть удален, отображается сообщение об ошибке. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3794) + +#### Улучшения сборки: {#build-improvements-1} + +- Обновлено приложение `jemalloc` библиотека, которая исправляет потенциальную утечку памяти. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3557) +- Профилирование с помощью `jemalloc` включен по умолчанию для отладки сборок. [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) +- Добавлена возможность запуска интеграционных тестов только тогда, когда `Docker` устанавливается в системе. [\#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) +- Добавлен тест выражения fuzz в запросах SELECT. [\#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) +- Добавлен стресс-тест для коммитов, который выполняет функциональные тесты параллельно и в случайном порядке, чтобы обнаружить больше условий гонки. [\#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) +- Улучшен метод запуска clickhouse-сервера в образе Docker. [Эльгазал Ахмед](https://github.com/ClickHouse/ClickHouse/pull/3663) +- Для Docker образ, добавлена поддержка для инициализации базы данных с помощью файлов в `/docker-entrypoint-initdb.d` каталог. [Константин Лебедев](https://github.com/ClickHouse/ClickHouse/pull/3695) +- Исправления опирается на руку. [\#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) + +#### Назад несовместимые изменения: {#backward-incompatible-changes} + +- Удалена возможность сравнения `Date` тип с номером. Вместо `toDate('2018-12-18') = 17883`, вы должны использовать явное преобразование типов `= toDate(17883)` [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) + +## ClickHouse релиз 18.14 {#clickhouse-release-18-14} + +### ClickHouse релиз 18.14.19, 2018-12-19 {#clickhouse-release-18-14-19-2018-12-19} + +#### Устранение ошибок: {#bug-fixes-2} + +- Исправлена ошибка, которая привела к проблемам с обновлением словарей с источником ODBC. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) +- Базы данных правильно задаются при выполнении DDL `ON CLUSTER` запросы. [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) +- Исправлен сегфолт, если `max_temporary_non_const_columns` лимит был превышен. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) + +#### Улучшения сборки: {#build-improvements-2} + +- Исправления опирается на руку. + +### ClickHouse релиз 18.14.18, 2018-12-04 {#clickhouse-release-18-14-18-2018-12-04} + +#### Устранение ошибок: {#bug-fixes-3} + +- Исправлена ошибка в работе `dictGet...` функция для словарей типа `range`, если один из аргументов является постоянным, а другой-нет. [\#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) +- Исправлена ошибка, приводившая к появлению сообщений `netlink: '...': attribute type 1 has an invalid length` чтобы быть напечатанным в журнале ядра Linux, это происходило только на достаточно свежих версиях ядра Linux. [\#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) +- Исправлена обработка выхода онлайн / оффлайн в функции `empty` для аргументации из `FixedString` тип. [Дэниел, Дао Куанг Мин](https://github.com/ClickHouse/ClickHouse/pull/3703) +- Исправлено чрезмерное выделение памяти при использовании большого значения `max_query_size` настройка (фрагмент памяти из `max_query_size` байты были предварительно распределены сразу). [\#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) + +#### Изменения в сборке: {#build-changes} + +- Исправлена сборка с библиотеками LLVM/Clang версии 7 из пакетов ОС (эти библиотеки используются для компиляции запросов во время выполнения). [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) + +### ClickHouse релиз 18.14.17, 2018-11-30 {#clickhouse-release-18-14-17-2018-11-30} + +#### Устранение ошибок: {#bug-fixes-4} + +- Исправлены случаи, когда процесс моста ODBC не завершался с основным серверным процессом. [\#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) +- Исправлено одновременное включение в `Distributed` таблица со списком столбцов, который отличается от списка столбцов удаленной таблицы. [\#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) +- Исправлено редкое состояние гонки, которое может привести к аварии при падении таблицы MergeTree. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) +- Исправлена взаимоблокировка запросов в случае сбоя при создании потока запросов с помощью `Resource temporarily unavailable` ошибка. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) +- Исправлен разбор текста `ENGINE` п. Когда `CREATE AS table` был использован синтаксис, а также `ENGINE` оговорка была указана еще до того, как `AS table` (ошибка привела к игнорированию указанного движка). [\#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) + +### ClickHouse релиз 18.14.15, 2018-11-21 {#clickhouse-release-18-14-15-2018-11-21} + +#### Устранение ошибок: {#bug-fixes-5} + +- Размер блока памяти был завышен при десериализации столбца типа `Array(String)` это приводит к тому, что «Memory limit exceeded» ошибки. Проблема появилась в версии 18.12.13. [\#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) + +### ClickHouse релиз 18.14.14, 2018-11-20 {#clickhouse-release-18-14-14-2018-11-20} + +#### Устранение ошибок: {#bug-fixes-6} + +- Исправлено `ON CLUSTER` запросы, когда кластер настроен как безопасный (флаг ``). [\#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) + +#### Изменения в сборке: {#build-changes-1} + +- Исправлены неполадки (llvm-7 от system, macos) [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) + +### ClickHouse релиз 18.14.13, 2018-11-08 {#clickhouse-release-18-14-13-2018-11-08} + +#### Устранение ошибок: {#bug-fixes-7} + +- Исправлена ошибка `Block structure mismatch in MergingSorted stream` ошибка. [\#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) +- Исправлено `ON CLUSTER` запросы в случае, если в конфигурации кластера были включены защищенные соединения (the `` флаг). [\#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) +- Исправлена ошибка в запросах, которые использовались `SAMPLE`, `PREWHERE` и столбцы псевдонимов. [\#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) +- Исправлена редкая ошибка `unknown compression method` ошибка, когда `min_bytes_to_use_direct_io` настройка была включена. [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) + +#### Улучшения в производительности: {#performance-improvements} + +- Исправлена регрессия производительности запросов с помощью `GROUP BY` столбцов типа UInt16 или Date при выполнении на процессорах AMD EPYC. [Игорь Лапко](https://github.com/ClickHouse/ClickHouse/pull/3512) +- Исправлена регрессия производительности запросов, обрабатывающих длинные строки. [\#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) + +#### Улучшения сборки: {#build-improvements-3} + +- Улучшения для упрощения сборки Arcadia. [\#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [\#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) + +### ClickHouse релиз 18.14.12, 2018-11-02 {#clickhouse-release-18-14-12-2018-11-02} + +#### Устранение ошибок: {#bug-fixes-8} + +- Исправлена ошибка при соединении двух безымянных подзапросов. [\#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) +- Исправлена генерация некорректных запросов (с пустым именем `WHERE` пункт 2) при запросе внешних баз данных. [хотид](https://github.com/ClickHouse/ClickHouse/pull/3477) +- Исправлено использование неверного значения таймаута в словарях ODBC. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) + +### ClickHouse релиз 18.14.11, 2018-10-29 {#clickhouse-release-18-14-11-2018-10-29} + +#### Устранение ошибок: {#bug-fixes-9} + +- Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` в предельных запросах. [\#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) +- Исправлены ошибки при объединении данных в таблицах, содержащих массивы внутри вложенных структур. [\#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) +- Исправлены неправильные результаты запроса, если `merge_tree_uniform_read_distribution` настройка отключена (по умолчанию она включена). [\#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) +- Исправлена ошибка при вставках в распределенную таблицу в собственном формате. [\#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) + +### ClickHouse релиз 18.14.10, 2018-10-23 {#clickhouse-release-18-14-10-2018-10-23} + +- То `compile_expressions` настройка (JIT-компиляция выражений) по умолчанию отключена. [\#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) +- То `enable_optimize_predicate_expression` по умолчанию этот параметр отключен. + +### ClickHouse релиз 18.14.9, 2018-10-16 {#clickhouse-release-18-14-9-2018-10-16} + +#### Новые средства: {#new-features-1} + +- То `WITH CUBE` модификатор для `GROUP BY` (альтернативный синтаксис `GROUP BY CUBE(...)` также доступный). [\#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) +- Добавил тот `formatDateTime` функция. [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/2770) +- Добавил тот `JDBC` двигатель таблицы и `jdbc` табличная функция (требуется установка clickhouse-jdbc-bridge). [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/3210) +- Добавлены функции для работы с номером недели ISO: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, и `toDayOfYear`. [\#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) +- Теперь вы можете использовать `Nullable` колонки для `MySQL` и `ODBC` таблицы. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) +- Вложенные структуры данных могут быть прочитаны как вложенные объекты в `JSONEachRow` формат. Добавил тот `input_format_import_nested_json` установка. [Веломан Юнкан](https://github.com/ClickHouse/ClickHouse/pull/3144) +- Параллельная обработка доступна для многих `MATERIALIZED VIEW`s при вставке данных. Смотрите сами `parallel_view_processing` установка. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) +- Добавил тот `SYSTEM FLUSH LOGS` запрос (принудительный сброс журнала в системные таблицы, такие как `query_log`) [\#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) +- Теперь вы можете использовать заранее определенные `database` и `table` макросы при объявлении `Replicated` таблицы. [\#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) +- Добавлена возможность чтения `Decimal` введите значения в инженерной нотации (с указанием степеней десять). [\#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) + +#### Экспериментальная возможность: {#experimental-features} + +- Оптимизация группы по предложению для `LowCardinality data types.` [\#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) +- Оптимизированный расчет выражений для `LowCardinality data types.` [\#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) + +#### Улучшения: {#improvements-2} + +- Значительно уменьшено потребление памяти для запросов с помощью `ORDER BY` и `LIMIT`. Смотрите сами `max_bytes_before_remerge_sort` установка. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) +- В случае отсутствия `JOIN` (`LEFT`, `INNER`, …), `INNER JOIN` предполагается. [\#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) +- Квалифицированные звездочки корректно работают в запросах с `JOIN`. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3202) +- То `ODBC` механизм таблиц правильно выбирает метод для цитирования идентификаторов на диалекте SQL удаленной базы данных. [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/3210) +- То `compile_expressions` настройка (JIT-компиляция выражений) включена по умолчанию. +- Исправлено поведение для одновременного удаления базы данных / таблицы, если она существует, и создания базы данных/таблицы, если она не существует. Ранее, а `CREATE DATABASE ... IF NOT EXISTS` запрос может вернуть сообщение об ошибке «File … already exists», и то `CREATE TABLE ... IF NOT EXISTS` и `DROP TABLE IF EXISTS` запросы могут вернуться `Table ... is creating or attaching right now`. [\#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) +- Как и в выражениях с постоянной правой половиной, они передаются на удаленный сервер при запросе из таблиц MySQL или ODBC. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) +- Сравнения с постоянными выражениями в предложении WHERE передаются удаленному серверу при запросе из таблиц MySQL и ODBC. Раньше проходили только сравнения с константами. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) +- Правильный расчет ширины строки в терминале для `Pretty` форматы, в том числе строки с иероглифами. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3257). +- `ON CLUSTER` может быть указан для `ALTER UPDATE` запросы. +- Улучшенная производительность для считывания данных в `JSONEachRow` формат. [\#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) +- Добавлены синонимы для этого `LENGTH` и `CHARACTER_LENGTH` функции для обеспечения совместимости. То `CONCAT` функция больше не зависит от регистра. [\#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) +- Добавил тот `TIMESTAMP` синоним для этого `DateTime` тип. [\#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) +- В журналах сервера всегда есть место, зарезервированное для query\_id, даже если строка журнала не связана с запросом. Это упрощает синтаксический анализ текстовых журналов сервера с помощью сторонних инструментов. +- Потребление памяти запросом регистрируется, когда оно превышает следующий уровень целого числа гигабайт. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) +- Добавлен режим совместимости для случая, когда клиентская библиотека, использующая собственный протокол, по ошибке отправляет меньше столбцов, чем сервер ожидает для запроса INSERT. Этот сценарий был возможен при использовании библиотеки clickhouse-cpp. Ранее этот сценарий приводил к сбою сервера. [\#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) +- В пользовательском выражении WHERE in `clickhouse-copier`, теперь вы можете использовать a `partition_key` псевдоним (для дополнительной фильтрации по исходному разделу таблицы). Это полезно, если схема секционирования изменяется во время копирования,но только незначительно. [\#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) +- Рабочий процесс компании `Kafka` движок был перемещен в фоновый пул потоков, чтобы автоматически снизить скорость считывания данных при высоких нагрузках. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). +- Поддержка чтения `Tuple` и `Nested` значения таких структур, как `struct` в `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) +- Список доменов верхнего уровня для `firstSignificantSubdomain` функция теперь включает в себя домен `biz`. [деказеал](https://github.com/ClickHouse/ClickHouse/pull/3219) +- В конфигурации внешних справочников, `null_value` интерпретируется как значение типа данных по умолчанию. [\#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) +- Поддержка для the `intDiv` и `intDivOrZero` функции для `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) +- Поддержка для the `Date`, `DateTime`, `UUID`, и `Decimal` типы в качестве ключа для `sumMap` статистическая функция. [\#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) +- Поддержка для the `Decimal` тип данных во внешних справочниках. [\#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) +- Поддержка для the `Decimal` введите данные в поле `SummingMergeTree` таблицы. [\#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) +- Добавлены специализации для `UUID` в `if`. [\#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) +- Уменьшилось количество `open` и `close` системные вызовы для чтения `MergeTree table`. [\#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) +- A `TRUNCATE TABLE` запрос может быть выполнен на любой реплике (запрос передается в реплику лидера). [Кирилл Шваков](https://github.com/ClickHouse/ClickHouse/pull/3375) + +#### Устранение ошибок: {#bug-fixes-10} + +- Исправлена проблема с `Dictionary` таблицы для `range_hashed` словари. Эта ошибка произошла в версии 18.12.17. [\#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) +- Исправлена ошибка при загрузке `range_hashed` словари (сообщение `Unsupported type Nullable (...)`). Эта ошибка произошла в версии 18.12.17. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) +- Исправлены ошибки в работе `pointInPolygon` функция обусловлена накоплением неточных вычислений для полигонов с большим количеством вершин, расположенных близко друг к другу. [\#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [\#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) +- Если после слияния частей данных контрольная сумма для результирующей части отличается от результата того же слияния в другой реплике, то результат слияния удаляется и часть данных загружается из другой реплики (это правильное поведение). Но после загрузки части данных она не могла быть добавлена в рабочий набор из-за ошибки, что часть уже существует (потому что часть данных была удалена с некоторой задержкой после слияния). Это привело к циклическим попыткам загрузить одни и те же данные. [\#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) +- Исправлено неправильное вычисление общего потребления памяти запросами (из-за неправильного вычисления `max_memory_usage_for_all_queries` установка сработала неправильно и то `MemoryTracking` метрика имела неверное значение). Эта ошибка произошла в версии 18.12.13. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) +- Исправлена функциональность программы `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` Эта ошибка произошла в версии 18.12.13. [\#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) +- Исправлена ненужная подготовка структур данных для `JOIN`s на сервере, который инициирует запрос, если `JOIN` выполняется только на удаленных серверах. [\#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) +- Исправлены ошибки в работе `Kafka` движок: взаимоблокировки после исключений при запуске чтения данных и блокировки по завершении работы [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). +- Для `Kafka` таблицы, опциональные `schema` параметр не был передан (схема `Cap'n'Proto` формат). [Войтех Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) +- Если в ансамбле серверов ZooKeeper есть серверы, которые принимают соединение, но затем немедленно закрывают его вместо ответа на рукопожатие, ClickHouse выбирает для подключения другой сервер. Ранее это приводило к ошибке `Cannot read all data. Bytes read: 0. Bytes expected: 4.` и сервер не мог запуститься. [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) +- Если ансамбль серверов ZooKeeper содержит серверы, для которых DNS-запрос возвращает ошибку, эти серверы игнорируются. [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) +- Фиксированное преобразование типа между `Date` и `DateTime` при вставке данных в `VALUES` формат (если `input_format_values_interpret_expressions = 1`). Ранее преобразование производилось между числовым значением числа дней в Unix Epoch time и unix timestamp, что приводило к неожиданным результатам. [\#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) +- Исправлено преобразование типов между `Decimal` и целые числа. [\#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) +- Исправлены ошибки в работе `enable_optimize_predicate_expression` установка. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3231) +- Исправлена ошибка синтаксического анализа в формате CSV с числами с плавающей запятой, если используется нестандартный разделитель CSV, например `;` [\#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) +- Исправлена ошибка `arrayCumSumNonNegative` функция (она не накапливает отрицательных значений, если накопитель меньше нуля). [Алексей Студнев](https://github.com/ClickHouse/ClickHouse/pull/3163) +- Исправлено как `Merge` таблицы работают на верхней части `Distributed` таблицы при использовании `PREWHERE`. [\#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) +- Исправлены ошибки в системе `ALTER UPDATE` запрос. +- Исправлены ошибки в работе `odbc` табличная функция, появившаяся в версии 18.12. [\#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) +- Исправлена работа агрегатных функций с помощью `StateArray` комбинаторы. [\#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) +- Исправлена ошибка при делении `Decimal` значение по нулю. [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) +- Фиксированный вывод типов для использования операций `Decimal` и целочисленные аргументы. [\#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) +- Исправлена обработка выхода онлайн / оффлайн в `GROUP BY` на `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) +- То `log_query_threads` настройка (протоколирование информации о каждом потоке выполнения запроса) теперь вступает в силу только в том случае, если `log_queries` параметр (протоколирование информации о запросах) имеет значение 1. Поскольку `log_query_threads` опция включена по умолчанию, информация о потоках ранее регистрировалась, даже если ведение журнала запросов было отключено. [\#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) +- Исправлена ошибка в распределенной работе агрегатной функции квантилей (сообщение об ошибке `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) +- Исправлена проблема совместимости при работе с кластером серверов версии 18.12.17 и более старых серверов одновременно. Для распределенных запросов с ключами GROUP BY как фиксированной, так и не фиксированной длины при наличии большого объема данных для агрегирования возвращаемые данные не всегда были полностью агрегированы (две разные строки содержали одни и те же ключи агрегирования). [\#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) +- Исправлена обработка подстановок в `clickhouse-performance-test`, если запрос содержит только часть подстановок, объявленных в тесте. [\#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) +- Исправлена ошибка при использовании `FINAL` с `PREWHERE`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) +- Исправлена ошибка при использовании `PREWHERE` над столбцами, которые были добавлены во время `ALTER`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) +- Добавлена проверка на отсутствие `arrayJoin` для `DEFAULT` и `MATERIALIZED` выражения. Ранее, `arrayJoin` это привело к ошибке при вставке данных. [\#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) +- Добавлена проверка на отсутствие `arrayJoin` в `PREWHERE` пункт. Ранее это приводило к таким сообщениям, как `Size ... doesn't match` или `Unknown compression method` при выполнении запросов. [\#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) +- Исправлена ошибка segfault, которая могла возникнуть в редких случаях после оптимизации, которая заменила и цепочки из оценок равенства с соответствующим выражением IN. [люимин-бытданс](https://github.com/ClickHouse/ClickHouse/pull/3339) +- Незначительные исправления к `clickhouse-benchmark`: раньше информация о клиенте не отправлялась на сервер, теперь количество выполненных запросов вычисляется более точно при выключении и ограничении количества итераций. [\#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [\#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) + +#### Назад несовместимые изменения: {#backward-incompatible-changes-1} + +- Удалил то `allow_experimental_decimal_type` вариант. То `Decimal` тип данных доступен для использования по умолчанию. [\#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) + +## ClickHouse релиз 18.12 {#clickhouse-release-18-12} + +### ClickHouse релиз 18.12.17, 2018-09-16 {#clickhouse-release-18-12-17-2018-09-16} + +#### Новые средства: {#new-features-2} + +- `invalidate_query` (возможность задать запрос для проверки необходимости обновления внешнего словаря) реализована для `clickhouse` источник. [\#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) +- Добавлена возможность использования `UInt*`, `Int*`, и `DateTime` типы данных (вместе с `Date` типа) как `range_hashed` внешний ключ словаря, определяющий границы диапазонов. Сейчас `NULL` может использоваться для обозначения открытого диапазона. [Василий Немков](https://github.com/ClickHouse/ClickHouse/pull/3123) +- То `Decimal` тип теперь поддерживает `var*` и `stddev*` статистическая функция. [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) +- То `Decimal` тип теперь поддерживает математические функции (`exp`, `sin` и так далее.) [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) +- То `system.part_log` таблица теперь имеет `partition_id` колонка. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) + +#### Устранение ошибок: {#bug-fixes-11} + +- `Merge` теперь работает правильно на `Distributed` таблицы. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3159) +- Исправлена несовместимость (ненужная зависимость от `glibc` версия), что сделало невозможным запуск ClickHouse на `Ubuntu Precise` и более старые версии. Несовместимость возникла в версии 18.12.13. [\#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) +- Исправлены ошибки в работе `enable_optimize_predicate_expression` установка. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3107) +- Исправлена незначительная проблема с обратной совместимостью, возникшая при работе с кластером реплик на версиях ранее 18.12.13 и одновременном создании новой реплики таблицы на сервере с более новой версией (показано в сообщении `Can not clone replica, because the ... updated to new ClickHouse version`, что вполне логично, но не должно произойти). [\#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) + +#### Назад несовместимые изменения: {#backward-incompatible-changes-2} + +- То `enable_optimize_predicate_expression` опция включена по умолчанию (что довольно оптимистично). Если возникают ошибки анализа запросов, связанные с поиском имен столбцов, установите `enable_optimize_predicate_expression` до 0. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3107) + +### ClickHouse релиз 18.12.14, 2018-09-13 {#clickhouse-release-18-12-14-2018-09-13} + +#### Новые средства: {#new-features-3} + +- Добавлена поддержка для `ALTER UPDATE` запросы. [\#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) +- Добавил тот `allow_ddl` опция, которая ограничивает доступ пользователя к DDL-запросам. [\#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) +- Добавил тот `min_merge_bytes_to_use_direct_io` вариант для `MergeTree` движки, которые позволяют установить пороговое значение для общего размера слияния (при превышении порогового значения файлы частей данных будут обрабатываться с помощью O\_DIRECT). [\#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) +- То `system.merges` системная таблица теперь содержит `partition_id` колонка. [\#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) + +#### Улучшения {#improvements-3} + +- Если часть данных остается неизменной во время мутации, она не загружается репликами. [\#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) +- Автозаполнение доступно для имен настроек при работе с ними `clickhouse-client`. [\#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) + +#### Устранение ошибок: {#bug-fixes-12} + +- Добавлена проверка размеров массивов, являющихся элементами `Nested` введите поля при вставке. [\#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) +- Исправлена ошибка обновления внешних словарей с помощью `ODBC` источник и `hashed` место хранения. Эта ошибка произошла в версии 18.12.13. +- Исправлена ошибка при создании временной таблицы из запроса с помощью `IN` состояние. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3098) +- Исправлена ошибка в агрегатных функциях для массивов, которые могут иметь `NULL` элементы. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3097) + +### ClickHouse релиз 18.12.13, 2018-09-10 {#clickhouse-release-18-12-13-2018-09-10} + +#### Новые средства: {#new-features-4} + +- Добавил тот `DECIMAL(digits, scale)` тип данных (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). Чтобы включить его, используйте параметр `allow_experimental_decimal_type`. [\#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [\#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [\#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [\#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) +- Новый `WITH ROLLUP` модификатор для `GROUP BY` (альтернативный синтаксис: `GROUP BY ROLLUP(...)`). [\#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) +- В запросах с соединением символ звезды расширяется до списка столбцов во всех таблицах в соответствии со стандартом SQL. Вы можете восстановить старое поведение, установив `asterisk_left_columns_only` до 1 на уровне конфигурации пользователя. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2787) +- Добавлена поддержка соединения с табличными функциями. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2907) +- Автозаполнение осуществляется нажатием клавиши Tab в clickhouse-клиенте. [Сергей Щербин](https://github.com/ClickHouse/ClickHouse/pull/2447) +- Сочетание клавиш CTRL+C в clickhouse-клиент сбрасывает запрос, который был введен. [\#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) +- Добавил тот `join_default_strictness` уставка: `"`, `'any'`, `'all'`). Это позволяет вам не указывать `ANY` или `ALL` для `JOIN`. [\#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) +- Каждая строка журнала сервера, связанная с обработкой запросов, показывает идентификатор запроса. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- Теперь вы можете получить журналы выполнения запросов в clickhouse-клиенте (используйте `send_logs_level` установочный). При распределенной обработке запросов журналы каскадируются со всех серверов. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- То `system.query_log` и `system.processes` (`SHOW PROCESSLIST`) таблицы теперь содержат информацию обо всех измененных настройках при выполнении запроса (вложенная структура запроса). `Settings` данные). Добавил тот `log_query_settings` установка. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- То `system.query_log` и `system.processes` теперь в таблицах отображается информация о количестве потоков, участвующих в выполнении запроса (см. `thread_numbers` колонка). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- Добавлен `ProfileEvents` счетчики, которые измеряют время, затраченное на чтение и запись по сети и чтение и запись на диск, количество сетевых ошибок и время ожидания, когда пропускная способность сети ограничена. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- Добавлен `ProfileEvents`счетчики, содержащие системные метрики из rusage (их можно использовать для получения информации об использовании ЦП в пользовательском пространстве и ядре, сбоях страниц и переключателях контекста), а также метрики taskstats (используйте их для получения информации о времени ожидания ввода-вывода, времени ожидания ЦП и объеме данных, считываемых и записываемых как с помощью кэша страниц, так и без него). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- То `ProfileEvents` счетчики применяются глобально и для каждого запроса, а также для каждого потока выполнения запроса, что позволяет детально профилировать потребление ресурсов запросом. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- Добавил тот `system.query_thread_log` таблица, содержащая информацию о каждом потоке выполнения запроса. Добавил тот `log_query_threads` установка. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- То `system.metrics` и `system.events` таблицы теперь имеют встроенную документацию. [\#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) +- Добавил тот `arrayEnumerateDense` функция. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2975) +- Добавил тот `arrayCumSumNonNegative` и `arrayDifference` функции. [Алексей Студнев](https://github.com/ClickHouse/ClickHouse/pull/2942) +- Добавил тот `retention` статистическая функция. [Вашим Ли](https://github.com/ClickHouse/ClickHouse/pull/2887) +- Теперь вы можете добавить (объединить) состояния агрегатных функций с помощью оператора плюс и умножить состояния агрегатных функций на неотрицательную константу. [\#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [\#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) +- Таблицы в семействе MergeTree теперь имеют виртуальный столбец `_partition_id`. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) + +#### Экспериментальная возможность: {#experimental-features-1} + +- Добавил тот `LowCardinality(T)` тип данных. Этот тип данных автоматически создает локальный словарь значений и позволяет обрабатывать данные без распаковки словаря. [\#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) +- Добавлен кэш JIT-скомпилированных функций и счетчик количества использований перед компиляцией. Чтобы выполнить JIT-компиляцию выражений, включите `compile_expressions` установка. [\#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [\#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) + +#### Улучшения: {#improvements-4} + +- Исправлена проблема с неограниченным накоплением журнала репликации при наличии брошенных реплик. Добавлен эффективный режим восстановления для реплик с длительным запаздыванием. +- Улучшенная производительность `GROUP BY` с несколькими полями агрегации, когда одно из них является строковым, а другие-фиксированной длины. +- Улучшенная производительность при использовании `PREWHERE` и с неявной передачей выражений в `PREWHERE`. +- Улучшена производительность синтаксического анализа для текстовых форматов (`CSV`, `TSV`). [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2977) [\#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) +- Улучшена производительность чтения строк и массивов в двоичных форматах. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2955) +- Повышенная производительность и снижение потребления памяти для запросов к `system.tables` и `system.columns` при наличии очень большого количества таблиц на одном сервере. [\#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) +- Исправлена проблема производительности в случае большого потока запросов, приводящих к ошибке (the `_dl_addr` функция видна в `perf top`, но сервер не использует много процессора). [\#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) +- Условия бросаются в поле зрения (когда `enable_optimize_predicate_expression` включен). [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2907) +- Улучшения в функциональности для `UUID` тип данных. [\#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [\#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) +- То `UUID` тип данных поддерживается в словарях-Alchemist. [\#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) +- То `visitParamExtractRaw` функция корректно работает с вложенными структурами. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2974) +- Когда `input_format_skip_unknown_fields` настройка включена, поля объекта в поле `JSONEachRow` формат пропущен правильно. [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) +- Для `CASE` выражение с условиями теперь можно опустить `ELSE`, что эквивалентно `ELSE NULL`. [\#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) +- Тайм-аут операции теперь можно настроить при работе с ZooKeeper. [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) +- Вы можете указать смещение для `LIMIT n, m` как `LIMIT n OFFSET m`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) +- Вы можете использовать `SELECT TOP n` синтаксис как альтернатива для `LIMIT`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) +- Увеличен размер очереди для записи в системные таблицы, так что `SystemLog parameter queue is full` ошибки случаются не так часто. +- То `windowFunnel` агрегатная функция теперь поддерживает события, удовлетворяющие нескольким условиям. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2801) +- Повторяющиеся столбцы могут быть использованы в a `USING` пунктом `JOIN`. [\#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) +- `Pretty` форматы теперь имеют ограничение на выравнивание столбцов по ширине. Используйте `output_format_pretty_max_column_pad_width` установка. Если значение больше, то оно все равно будет отображаться полностью, но другие ячейки таблицы не будут слишком широкими. [\#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) +- То `odbc` функция таблицы теперь позволяет указать имя базы данных / схемы. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2885) +- Добавлена возможность использовать имя Пользователя, указанное в `clickhouse-client` конфигурационный файл. [Владимир Козбин](https://github.com/ClickHouse/ClickHouse/pull/2909) +- То `ZooKeeperExceptions` счетчик был разделен на три счетчика: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, и `ZooKeeperOtherExceptions`. +- `ALTER DELETE` запросы работают для материализованных представлений. +- Добавлена рандомизация при периодическом запуске потока очистки для `ReplicatedMergeTree` таблицы во избежание периодических скачков нагрузки при наличии очень большого количества `ReplicatedMergeTree` таблицы. +- Поддержка `ATTACH TABLE ... ON CLUSTER` запросы. [\#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) + +#### Устранение ошибок: {#bug-fixes-13} + +- Исправлена проблема с `Dictionary` таблицы (бросает то `Size of offsets doesn't match size of column` или `Unknown compression method` исключение). Эта ошибка появилась в версии 18.10.3. [\#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) +- Исправлена ошибка при слиянии `CollapsingMergeTree` таблицы, если одна из частей данных пуста (эти части формируются во время слияния или `ALTER DELETE` если все данные были удалены), а также `vertical` для слияния был использован алгоритм. [\#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) +- Исправлено состояние гонки во время `DROP` или `TRUNCATE` для `Memory` столы с одновременным `SELECT`, что может привести к сбоям сервера. Эта ошибка появилась в версии 1.1.54388. [\#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) +- Исправлена возможность потери данных при вставке в систему `Replicated` таблицы, если `Session is expired` возвращается ошибка (потеря данных может быть обнаружена с помощью `ReplicatedDataLoss` метрический). Эта ошибка произошла в версии 1.1.54378. [\#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [\#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [\#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) +- Исправлен сегфолт при `JOIN ... ON`. [\#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) +- Исправлена ошибка поиска имен столбцов, когда `WHERE` выражение полностью состоит из квалифицированного имени столбца, например `WHERE table.column`. [\#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) +- Исправлена ошибка «Not found column» ошибка, возникшая при выполнении распределенных запросов, если с удаленного сервера запрашивается один столбец, состоящий из выражения IN с вложенным запросом. [\#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) +- Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` ошибка, возникшая для распределенных запросов, если один из сегментов является локальным, а другой-нет, и оптимизация перемещения в `PREWHERE` это срабатывает. [\#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [\#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [\#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [\#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [\#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [\#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [\#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) +- Исправлена ошибка `pointInPolygon` функция для некоторых случаев невыпуклых многоугольников. [\#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) +- Исправлен неверный результат при сравнении `nan` с целыми числами. [\#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) +- Исправлена ошибка в системе `zlib-ng` библиотека, которая в редких случаях может привести к segfault. [\#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) +- Исправлена утечка памяти при вставке в таблицу с помощью `AggregateFunction` столбцы, если состояние агрегатной функции не простое (выделяет память отдельно), и если один запрос на вставку приводит к нескольким небольшим блокам. [\#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) +- Исправлено состояние гонки при создании и удалении одного и того же объекта `Buffer` или `MergeTree` стол одновременно. +- Исправлена возможность segfault при сравнении кортежей, составленных из определенных нетривиальных типов, таких как кортежи. [\#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) +- Исправлена возможность возникновения segfault при запуске некоторых `ON CLUSTER` запросы. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2960) +- Исправлена ошибка в системе `arrayDistinct` функция для `Nullable` элемент массива. [\#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [\#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) +- То `enable_optimize_predicate_expression` опция теперь корректно поддерживает случаи с `SELECT *`. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2929) +- Исправлена ошибка segfault при повторной инициализации сеанса ZooKeeper. [\#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) +- Исправлена блокировка при работе с зоопарка. +- Исправлен неверный код для добавления вложенных структур данных в a `SummingMergeTree`. +- При выделении памяти для состояний агрегатных функций корректно учитывается выравнивание, что позволяет использовать операции, требующие выравнивания при реализации состояний агрегатных функций. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) + +#### Исправление безопасности: {#security-fix} + +- Безопасное использование источников данных ODBC. Взаимодействие с драйверами ODBC использует отдельный интерфейс `clickhouse-odbc-bridge` процесс. Ошибки в сторонних драйверах ODBC больше не вызывают проблем со стабильностью сервера или уязвимостями. [\#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [\#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [\#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [\#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [\#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) +- Исправлена неправильная проверка пути к файлу в системе `catBoostPool` табличная функция. [\#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) +- Содержание системных таблиц (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, и `replication_queue`) фильтруются в соответствии с настроенным пользователем доступом к базам данных (`allow_databases`). [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2856) + +#### Назад несовместимые изменения: {#backward-incompatible-changes-3} + +- В запросах с соединением символ звезды расширяется до списка столбцов во всех таблицах в соответствии со стандартом SQL. Вы можете восстановить старое поведение, установив `asterisk_left_columns_only` до 1 на уровне конфигурации пользователя. + +#### Изменения в сборке: {#build-changes-2} + +- Большинство интеграционных тестов теперь можно запускать с помощью commit. +- Проверка стиля кода также может выполняться с помощью commit. +- То `memcpy` реализация выбрана правильно при построении на CentOS7/Fedora. [Этьен Шампетье](https://github.com/ClickHouse/ClickHouse/pull/2912) +- При использовании clang для сборки, некоторые предупреждения от `-Weverything` были добавлены, в дополнение к обычным `-Wall-Wextra -Werror`. [\#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) +- Отладка сборки использует следующие методы: `jemalloc` вариант отладки. +- Интерфейс библиотеки для взаимодействия с ZooKeeper объявлен абстрактным. [\#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) + +## ClickHouse релиз 18.10 {#clickhouse-release-18-10} + +### ClickHouse релиз 18.10.3, 2018-08-13 {#clickhouse-release-18-10-3-2018-08-13} + +#### Новые средства: {#new-features-5} + +- HTTPS можно использовать для репликации. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) +- Добавлены функции `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, и `murmurHash3_128` в дополнение к существующим `murmurHash2_32`. [\#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) +- Поддержка типы, допускающие значения NULL в драйвере ODBC ClickHouse (`ODBCDriver2` выходной формат). [\#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) +- Поддержка `UUID` в ключевых колонках. + +#### Улучшения: {#improvements-5} + +- Кластеры могут быть удалены без перезагрузки сервера, когда они удаляются из конфигурационных файлов. [\#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) +- Внешние словари могут быть удалены без перезагрузки сервера, когда они удаляются из конфигурационных файлов. [\#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) +- Добавлен `SETTINGS` поддержка для the `Kafka` настольный двигатель. [Александр Маршалов](https://github.com/ClickHouse/ClickHouse/pull/2781) +- Улучшения для компании `UUID` тип данных (еще не полный). [\#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) +- Поддержка для пустых частей после слияния в `SummingMergeTree`, `CollapsingMergeTree` и `VersionedCollapsingMergeTree` двигатели. [\#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) +- Старые записи завершенных мутаций удаляются (`ALTER DELETE`). [\#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) +- Добавил тот `system.merge_tree_settings` стол. [Кирилл Шваков](https://github.com/ClickHouse/ClickHouse/pull/2841) +- То `system.tables` таблица теперь имеет столбцы зависимостей: `dependencies_database` и `dependencies_table`. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2851) +- Добавил тот `max_partition_size_to_drop` вариант конфигурации. [\#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) +- Добавил тот `output_format_json_escape_forward_slashes` вариант. [Александр Бочаров](https://github.com/ClickHouse/ClickHouse/pull/2812) +- Добавил тот `max_fetch_partition_retries_count` установка. [\#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) +- Добавил тот `prefer_localhost_replica` настройка для отключения предпочтения для локальной реплики и перехода к локальной реплике без межпроцессного взаимодействия. [\#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) +- То `quantileExact` возвращает агрегатная функция `nan` в случае агрегации на пустом месте `Float32` или `Float64` набор. [Вашим Ли](https://github.com/ClickHouse/ClickHouse/pull/2855) + +#### Устранение ошибок: {#bug-fixes-14} + +- Удалено ненужное экранирование параметров строки подключения для ODBC, что сделало невозможным установление соединения. Эта ошибка произошла в версии 18.6.0. +- Исправлена логика обработки `REPLACE PARTITION` команды в очереди репликации. Если их будет двое `REPLACE` команды для одного и того же раздела, неправильная логика может привести к тому, что один из них останется в очереди репликации и не будет выполнен. [\#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) +- Исправлена ошибка слияния, когда все части данных были пусты (части, которые были сформированы из слияния или из `ALTER DELETE` если все данные были удалены). Эта ошибка появилась в версии 18.1.0. [\#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) +- Исправлена ошибка при одновременном использовании `Set` или `Join`. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2823) +- Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` ошибка, которая произошла для `UNION ALL` запросы внутри подзапроса, если один из `SELECT` запросы содержат повторяющиеся имена столбцов. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2094) +- Исправлена утечка памяти, если при подключении к серверу MySQL возникало исключение. +- Исправлен неверный код ответа clickhouse-клиента в случае ошибки запроса. +- Исправлено некорректное поведение материализованных представлений, содержащих отличия. [\#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) + +#### Назад несовместимые изменения {#backward-incompatible-changes-4} + +- Удалена поддержка запросов CHECK TABLE для распределенных таблиц. + +#### Изменения в сборке: {#build-changes-3} + +- Распределитель был заменен: `jemalloc` теперь используется вместо `tcmalloc`. В некоторых сценариях это увеличивает скорость до 20%. Однако есть запросы, которые замедлились до 20%. Потребление памяти было уменьшено приблизительно на 10% в некоторых сценариях, с улучшенной стабильностью. При высокой конкурентной нагрузке использование процессора в пользовательском пространстве и в системе показывает лишь небольшое увеличение. [\#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) +- Использование libressl из подмодуля. [\#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [\#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) +- Использование unixodbc из подмодуля. [\#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) +- Использование mariadb-connector-c из подмодуля. [\#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) +- Добавлены функциональные тестовые файлы в репозиторий, зависящие от доступности тестовых данных (пока без самих тестовых данных). + +## ClickHouse релиз 18.6 {#clickhouse-release-18-6} + +### ClickHouse релиз 18.6.0, 2018-08-02 {#clickhouse-release-18-6-0-2018-08-02} + +#### Новые средства: {#new-features-6} + +- Добавлена поддержка выражений для соединения на синтаксис: + `JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` + Выражение должно быть цепочкой равенств, Соединенных оператором и. Каждая сторона равенства может быть произвольным выражением над столбцами одной из таблиц. Поддерживается использование полных имен столбцов (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) для правильного стола. [\#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) +- HTTPS может быть включен для репликации. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) + +#### Улучшения: {#improvements-6} + +- Сервер передает клиенту компонент исправлений своей версии. Данные о компоненте версии патча находятся в `system.processes` и `query_log`. [\#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) + +## ClickHouse релиз 18.5 {#clickhouse-release-18-5} + +### ClickHouse релиз 18.5.1, 2018-07-31 {#clickhouse-release-18-5-1-2018-07-31} + +#### Новые средства: {#new-features-7} + +- Добавлена хэш-функция `murmurHash2_32` [\#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). + +#### Улучшения: {#improvements-7} + +- Теперь вы можете использовать `from_env` [\#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) атрибут для установки значений в конфигурационных файлах из переменных окружения. +- Добавлены версии с нечувствительностью к регистру символов `coalesce`, `ifNull`, и `nullIf functions` [\#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). + +#### Устранение ошибок: {#bug-fixes-15} + +- Исправлена возможная ошибка при запуске реплики [\#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). + +## ClickHouse релиз 18.4 {#clickhouse-release-18-4} + +### ClickHouse релиз 18.4.0, 2018-07-28 {#clickhouse-release-18-4-0-2018-07-28} + +#### Новые средства: {#new-features-8} + +- Добавлены системные таблицы: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [\#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). +- Добавлена возможность использовать табличную функцию вместо таблицы в качестве аргумента a `remote` или `cluster table function` [\#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). +- Поддержка `HTTP Basic` аутентификация в протоколе репликации [\#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). +- То `has` функция теперь позволяет искать числовое значение в массиве `Enum` ценности [Максим Хрисанфов](https://github.com/ClickHouse/ClickHouse/pull/2699). +- Поддержка добавления произвольных разделителей сообщений при чтении из `Kafka` [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2701). + +#### Улучшения: {#improvements-8} + +- То `ALTER TABLE t DELETE WHERE` запрос не перезаписывает части данных, которые не были затронуты условием WHERE [\#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). +- То `use_minimalistic_checksums_in_zookeeper` вариант для `ReplicatedMergeTree` таблицы включены по умолчанию. Этот параметр был добавлен в версии 1.1.54378, 2018-04-16. Версии, которые старше 1.1.54378, больше не могут быть установлены. +- Поддержка для бега `KILL` и `OPTIMIZE` запросы, которые определяют `ON CLUSTER` [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2689). + +#### Устранение ошибок: {#bug-fixes-16} + +- Исправлена ошибка `Column ... is not under an aggregate function and not in GROUP BY` для агрегатирования с выражением. Эта ошибка появилась в версии 18.1.0. ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) +- Исправлена ошибка в системе `windowFunnel aggregate function` [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2735). +- Исправлена ошибка в системе `anyHeavy` статистическая функция ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) +- Исправлен сбой сервера при использовании `countArray()` статистическая функция. + +#### Назад несовместимые изменения: {#backward-incompatible-changes-5} + +- Параметры для `Kafka` двигатель был изменен с `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` к `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. Если ваши таблицы используют `kafka_schema` или `kafka_num_consumers` параметры, вы должны вручную редактировать файлы метаданных `path/metadata/database/table.sql` и добавить `kafka_row_delimiter` параметр с `''` ценность. + +## ClickHouse релиз 18.1 {#clickhouse-release-18-1} + +### ClickHouse релиз 18.1.0, 2018-07-23 {#clickhouse-release-18-1-0-2018-07-23} + +#### Новые средства: {#new-features-9} + +- Поддержка для the `ALTER TABLE t DELETE WHERE` запрос на нереплицируемые MergeTree таблицы ([\#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). +- Поддержка произвольных типов для `uniq*` семейство агрегатных функций ([\#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). +- Поддержка произвольных типов в операторах сравнения ([\#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). +- То `users.xml` файл позволяет установить маску подсети в формате `10.0.0.1/255.255.255.0`. Это необходимо для использования масок для сетей IPv6 с нулями посередине ([\#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). +- Добавил тот `arrayDistinct` функция ([\#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). +- Движок SummingMergeTree теперь может работать со столбцами типа AggregateFunction ([Константин Сергеевич Пан](https://github.com/ClickHouse/ClickHouse/pull/2566)). + +#### Улучшения: {#improvements-9} + +- Изменена схема нумерации для версий выпуска. Теперь первая часть содержит год выпуска (A. D., Московский часовой пояс, минус 2000), вторая часть содержит номер для крупных изменений (увеличивается для большинства релизов), а третья часть-это патч-версия. Релизы по-прежнему имеют обратную совместимость, если в списке изменений не указано иное. +- Более быстрое преобразование чисел с плавающей запятой в строку ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2664)). +- Если некоторые строки были пропущены во время вставки из-за ошибок синтаксического анализа (это возможно с помощью `input_allow_errors_num` и `input_allow_errors_ratio` настройки включены), количество пропущенных строк теперь записывается в журнал сервера ([Леонардо Чекки](https://github.com/ClickHouse/ClickHouse/pull/2669)). + +#### Устранение ошибок: {#bug-fixes-17} + +- Исправлена команда усечения для временных таблиц ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2624)). +- Исправлена редкая тупиковая ситуация в клиентской библиотеке ZooKeeper, возникшая при возникновении сетевой ошибки при чтении ответа ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). +- Исправлена ошибка во время бросания на типы, допускающие значение null ([\#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). +- Исправлен неверный результат работы системы `maxIntersection()` функция, когда границы интервалов совпадают ([Майкл Фурмур](https://github.com/ClickHouse/ClickHouse/pull/2657)). +- Исправлено некорректное преобразование цепочки выражений OR в аргумент функции ([chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). +- Исправлено снижение производительности для запросов, содержащих `IN (subquery)` выражения внутри другого подзапроса ([\#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). +- Исправлена несовместимость серверов с различными версиями в распределенных запросах, использующих a `CAST` функция, которая не написана прописными буквами ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). +- Добавлено отсутствующее цитирование идентификаторов для запросов к внешней СУБД ([\#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). + +#### Назад несовместимые изменения: {#backward-incompatible-changes-6} + +- Преобразование строки, содержащей нулевое число, в DateTime не работает. Пример: `SELECT toDateTime('0')`. Это также является причиной того, что `DateTime DEFAULT '0'` не работает в таблицах, а также `0` в словарях. Решение: заменить `0` с `0000-00-00 00:00:00`. + +## ClickHouse release 1.1 {#clickhouse-release-1-1} + +### ClickHouse релиз 1.1.54394, 2018-07-12 {#clickhouse-release-1-1-54394-2018-07-12} + +#### Новые средства: {#new-features-10} + +- Добавил тот `histogram` статистическая функция ([Михаил Сурин](https://github.com/ClickHouse/ClickHouse/pull/2521)). +- Сейчас `OPTIMIZE TABLE ... FINAL` может использоваться без указания разделов для `ReplicatedMergeTree` ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2600)). + +#### Устранение ошибок: {#bug-fixes-18} + +- Исправлена проблема с очень малым таймаутом для сокетов (одна секунда) для чтения и записи при отправке и загрузке реплицированных данных, что делало невозможной загрузку больших частей при наличии нагрузки на сеть или диск (это приводило к циклическим попыткам загрузки частей). Эта ошибка произошла в версии 1.1.54388. +- Исправлены проблемы при использовании chroot в ZooKeeper, если вы вставили дубликаты блоков данных в таблицу. +- То `has` функция теперь корректно работает для массива с нулевыми элементами ([\#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). +- То `system.tables` таблица теперь работает корректно при использовании в распределенных запросах. То `metadata_modification_time` и `engine_full` столбцы теперь не являются виртуальными. Исправлена ошибка, возникавшая при запросе из таблицы только этих столбцов. +- Исправлено как пустой `TinyLog` таблица работает после вставки пустого блока данных ([\#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). +- То `system.zookeeper` таблица работает, если значение узла в ZooKeeper равно NULL. + +### ClickHouse релиз 1.1.54390, 2018-07-06 {#clickhouse-release-1-1-54390-2018-07-06} + +#### Новые средства: {#new-features-11} + +- Запросы могут быть отправлены в `multipart/form-data` формат (в виде `query` поле), что полезно, если внешние данные также отправляются для обработки запросов ([Ольга Хвостикова](https://github.com/ClickHouse/ClickHouse/pull/2490)). +- Добавлена возможность включения или отключения обработки одинарных или двойных кавычек при чтении данных в формате CSV. Вы можете настроить это в разделе `format_csv_allow_single_quotes` и `format_csv_allow_double_quotes` настройки ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2574)). +- Сейчас `OPTIMIZE TABLE ... FINAL` может использоваться без указания раздела для нереплицированных вариантов `MergeTree` ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2599)). + +#### Улучшения: {#improvements-10} + +- Улучшенная производительность, снижение потребления памяти и корректное отслеживание потребления памяти с использованием оператора IN, когда можно использовать табличный индекс ([\#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). +- Удалена избыточная проверка контрольных сумм при добавлении части данных. Это важно при наличии большого количества реплик, так как в этих случаях общее число проверок было равно N^2. +- Добавлена поддержка для `Array(Tuple(...))` аргументы в пользу этого `arrayEnumerateUniq` функция ([\#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). +- Добавлен `Nullable` поддержка для the `runningDifference` функция ([\#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). +- Улучшена производительность анализа запросов при наличии очень большого количества выражений ([\#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). +- Более быстрый выбор частей данных для слияния в `ReplicatedMergeTree` таблицы. Более быстрое восстановление сеанса смотрителя зоопарка ([\#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). +- То `format_version.txt` файл для `MergeTree` таблицы создаются заново, если они отсутствуют, что имеет смысл, если ClickHouse запускается после копирования структуры каталогов без файлов ([Киприан Хакман](https://github.com/ClickHouse/ClickHouse/pull/2593)). + +#### Устранение ошибок: {#bug-fixes-19} + +- Исправлена ошибка при работе с ZooKeeper, которая могла сделать невозможным восстановление сеанса и состояний таблиц только для чтения перед перезапуском сервера. +- Исправлена ошибка при работе с ZooKeeper, которая могла привести к тому, что старые узлы не удалялись, если сеанс прерывался. +- Исправлена ошибка в системе `quantileTDigest` функции для Аргументов с плавающей точкой (эта ошибка была введена в версии 1.1.54388) ([Михаил Сурин](https://github.com/ClickHouse/ClickHouse/pull/2553)). +- Исправлена ошибка в индексе для таблиц MergeTree, если столбец первичного ключа находится внутри функции преобразования типов между знаковыми и беззнаковыми целыми числами одинакового размера ([\#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). +- Исправлена обработка выхода онлайн / оффлайн если `macros` используются, но их нет в файле конфигурации ([\#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). +- Исправлено переключение на базу данных по умолчанию при повторном подключении клиента ([\#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). +- Исправлена ошибка, возникшая при появлении `use_index_for_in_with_subqueries` настройка была отключена. + +#### Исправление безопасности: {#security-fix-1} + +- Отправка файлов больше не возможна при подключении к MySQL (`LOAD DATA LOCAL INFILE`). + +### ClickHouse релиз 1.1.54388, 2018-06-28 {#clickhouse-release-1-1-54388-2018-06-28} + +#### Новые средства: {#new-features-12} + +- Поддержка для the `ALTER TABLE t DELETE WHERE` запрос для реплицированных таблиц. Добавил тот `system.mutations` таблица для отслеживания хода выполнения запросов этого типа. +- Поддержка для the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` запрос для таблиц \* MergeTree. +- Поддержка для the `TRUNCATE TABLE` запрос ([Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2260)) +- Несколько новый `SYSTEM` запросы к реплицируемым таблицам (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). +- Добавлена возможность записи в таблицу с помощью движка MySQL и соответствующей табличной функции ([Санди-ли](https://github.com/ClickHouse/ClickHouse/pull/2294)). +- Добавил тот `url()` функция таблицы и `URL` настольный двигатель ([Александр Сапин](https://github.com/ClickHouse/ClickHouse/pull/2501)). +- Добавил тот `windowFunnel` статистическая функция ([Санди-ли](https://github.com/ClickHouse/ClickHouse/pull/2352)). +- Новый `startsWith` и `endsWith` функции для строк ([Вадим Плахтинский](https://github.com/ClickHouse/ClickHouse/pull/2429)). +- То `numbers()` функция таблицы теперь позволяет указать смещение ([Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2535)). +- Пароль к нему `clickhouse-client` может быть введен в интерактивном режиме. +- Теперь журналы сервера можно отправлять в системный журнал ([Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/2459)). +- Поддержка входа в словари с общим источником библиотеки ([Александр Сапин](https://github.com/ClickHouse/ClickHouse/pull/2472)). +- Поддержка пользовательских разделителей CSV ([Иван Жуков](https://github.com/ClickHouse/ClickHouse/pull/2263)) +- Добавил тот `date_time_input_format` установка. Если вы переключите этот параметр на `'best_effort'`, Значения DateTime будут считываться в широком диапазоне форматов. +- Добавил тот `clickhouse-obfuscator` утилита для запутывания данных. Пример использования: публикация данных, используемых в тестах производительности. + +#### Экспериментальная возможность: {#experimental-features-2} + +- Добавлена возможность расчета `and` аргументы только там, где они нужны ([Анастасия Царькова](https://github.com/ClickHouse/ClickHouse/pull/2272)) +- JIT компиляция в машинный код теперь доступна для некоторых выражений ([Плес](https://github.com/ClickHouse/ClickHouse/pull/2277)). + +#### Устранение ошибок: {#bug-fixes-20} + +- Дубликаты больше не появляются для запроса с `DISTINCT` и `ORDER BY`. +- Запросы с помощью `ARRAY JOIN` и `arrayFilter` больше не возвращайте неверный результат. +- Исправлена ошибка при чтении столбца массива из вложенной структуры ([\#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). +- Исправлена ошибка при анализе запросов с предложением HAVING, например `HAVING tuple IN (...)`. +- Исправлена ошибка при анализе запросов с рекурсивными псевдонимами. +- Исправлена ошибка при чтении из ReplacingMergeTree с условием в PREWHERE, которое фильтрует все строки ([\#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). +- Настройки профиля пользователя не применялись при использовании сеансов в интерфейсе HTTP. +- Исправлено применение настроек из параметров командной строки в clickhouse-local. +- Клиентская библиотека ZooKeeper теперь использует тайм-аут сеанса, полученный от сервера. +- Исправлена ошибка в клиентской библиотеке ZooKeeper, когда клиент ждал ответа сервера дольше, чем тайм-аут. +- Исправлена обрезка деталей для запросов с условиями по ключевым столбцам разделов ([\#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). +- Слияния теперь возможны после `CLEAR COLUMN IN PARTITION` ([\#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). +- Исправлено отображение типов в функции таблицы ODBC ([Санди-ли](https://github.com/ClickHouse/ClickHouse/pull/2268)). +- Сравнение типов было исправлено для `DateTime` с часовым поясом и без него ([Александр Бочаров](https://github.com/ClickHouse/ClickHouse/pull/2400)). +- Исправлен синтаксический разбор и форматирование текста `CAST` оператор. +- Исправлена вставка в материализованный вид для механизма распределенных таблиц ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). +- Исправлено состояние гонки при записи данных из `Kafka` двигатель к материализованным представлениям ([Янкуань Лю](https://github.com/ClickHouse/ClickHouse/pull/2448)). +- Исправлена ошибка SSRF в функции таблицы remote (). +- Исправлено поведение выхода из системы `clickhouse-client` в многострочном режиме ([\#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). + +#### Улучшения: {#improvements-11} + +- Фоновые задачи в реплицированных таблицах теперь выполняются в пуле потоков, а не в отдельных потоках ([Сильвиу Развивается](https://github.com/ClickHouse/ClickHouse/pull/1722)). +- Улучшена производительность сжатия LZ4. +- Более быстрый анализ запросов с большим количеством соединений и подзапросов. +- Кэш DNS теперь обновляется автоматически, когда возникает слишком много сетевых ошибок. +- Вставка таблицы больше не происходит, если вставка в один из материализованных видов невозможна из-за слишком большого количества деталей. +- Исправлено несоответствие в счетчиках событий `Query`, `SelectQuery`, и `InsertQuery`. +- Такие выражения, как `tuple IN (SELECT tuple)` разрешены, если типы кортежей совпадают. +- Сервер с реплицированными таблицами может запуститься, даже если вы еще не настроили ZooKeeper. +- При расчете количества доступных ядер ЦП теперь учитываются ограничения на контрольные группы ([Атри Шарма](https://github.com/ClickHouse/ClickHouse/pull/2325)). +- Добавлено меню для конфигурации каталогов в файл systemd конфиг ([Михаил Ширяев](https://github.com/ClickHouse/ClickHouse/pull/2421)). + +#### Изменения в сборке: {#build-changes-4} + +- Компилятор gcc8 можно использовать для сборки. +- Добавлена возможность построения llvm из подмодуля. +- Версия библиотеки librdkafka была обновлена с v0.11.4. +- Добавлена возможность использования системной библиотеки libcpuid. Версия библиотеки была обновлена до версии 0.4.0. +- Исправлена сборка с использованием библиотеки vectorclass ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). +- Cmake теперь генерирует файлы для ninja по умолчанию (например, при использовании `-G Ninja`). +- Добавлена возможность использовать библиотеку libtinfo вместо libtermcap ([Георгий Кондратьев](https://github.com/ClickHouse/ClickHouse/pull/2519)). +- Исправлен конфликт заголовочных файлов в Fedora Rawhide ([\#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). + +#### Назад несовместимые изменения: {#backward-incompatible-changes-7} + +- Удален побег в `Vertical` и `Pretty*` форматы и удалил `VerticalRaw` формат. +- Если серверы с версией 1.1.54388 (или более поздней) и серверы с более старой версией используются одновременно в распределенном запросе, то запрос имеет следующее значение: `cast(x, 'Type')` выражение лица без лица `AS` ключевое слово и не имеет этого слова `cast` в верхнем регистре исключение будет выдано с сообщением типа `Not found column cast(0, 'UInt8') in block`. Решение: обновите сервер на всем кластере. + +### ClickHouse релиз 1.1.54385, 2018-06-01 {#clickhouse-release-1-1-54385-2018-06-01} + +#### Устранение ошибок: {#bug-fixes-21} + +- Исправлена ошибка,которая в некоторых случаях приводила к блокировке операций ZooKeeper. + +### ClickHouse релиз 1.1.54383, 2018-05-22 {#clickhouse-release-1-1-54383-2018-05-22} + +#### Устранение ошибок: {#bug-fixes-22} + +- Исправлено замедление очереди репликации, если таблица содержит много реплик. + +### ClickHouse релиз 1.1.54381, 2018-05-14 {#clickhouse-release-1-1-54381-2018-05-14} + +#### Устранение ошибок: {#bug-fixes-23} + +- Исправлена утечка узлов в ZooKeeper, когда ClickHouse теряет соединение с сервером ZooKeeper. + +### ClickHouse релиз 1.1.54380, 2018-04-21 {#clickhouse-release-1-1-54380-2018-04-21} + +#### Новые средства: {#new-features-13} + +- Добавлена функция таблицы `file(path, format, structure)`. Пример чтения байтов из `/dev/urandom`: ``` ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10" ```. + +#### Улучшения: {#improvements-12} + +- Вложенные запросы могут быть обернуты в `()` скобки для повышения удобочитаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)`. +- Простой `SELECT` запросы от компании `system.processes` таблица не входит в состав `max_concurrent_queries` предел. + +#### Устранение ошибок: {#bug-fixes-24} + +- Исправлено некорректное поведение устройства `IN` оператор при выборе из `MATERIALIZED VIEW`. +- Исправлена некорректная фильтрация по индексу раздела в выражениях типа `partition_key_column IN (...)`. +- Исправлена невозможность выполнения `OPTIMIZE` запрос на реплику без лидера, если `REANAME` было исполнено на столе. +- Исправлена ошибка авторизации при выполнении `OPTIMIZE` или `ALTER` запросы к реплике, не являющейся лидером. +- Исправлено зависание `KILL QUERY`. +- Исправлена ошибка в клиентской библиотеке ZooKeeper, которая приводила к потере часов, замораживанию распределенной очереди DDL и замедлению работы очереди репликации, если она была непустой. `chroot` префикс используется в конфигурации ZooKeeper. + +#### Назад несовместимые изменения: {#backward-incompatible-changes-8} + +- Удалена поддержка таких выражений как `(a, b) IN (SELECT (a, b))` (вы можете использовать эквивалентное выражение `(a, b) IN (SELECT a, b)`). В предыдущих выпусках эти выражения приводили к неопределенным `WHERE` фильтрация или вызванные ошибки. + +### ClickHouse релиз 1.1.54378, 2018-04-16 {#clickhouse-release-1-1-54378-2018-04-16} + +#### Новые средства: {#new-features-14} + +- Уровень ведения журнала можно изменить без перезагрузки сервера. +- Добавил тот `SHOW CREATE DATABASE` запрос. +- То `query_id` может быть передан в `clickhouse-client` (локтевой зал). +- Новая настройка: `max_network_bandwidth_for_all_users`. +- Добавлена поддержка для `ALTER TABLE ... PARTITION ...` для `MATERIALIZED VIEW`. +- Добавлена информация о размере частей данных в несжатом виде в системной таблице. +- Поддержка межсерверного шифрования распределенных таблиц (`1` в конфигурации реплики in ``). +- Конфигурация уровня таблицы для `ReplicatedMergeTree` семья для того, чтобы свести к минимуму объем данных, хранящихся в Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` +- Конфигурация системы `clickhouse-client` срочный. По умолчанию имена серверов теперь выводятся в приглашение. Отображаемое имя сервера может быть изменено. Он также отправлен в США. `X-ClickHouse-Display-Name` Заголовок HTTP (Кирилл Шваков). +- Несколько разделенных запятыми `topics` может быть указан для `Kafka` двигатель (Тобиас Адамсон) +- Когда запрос остановлен `KILL QUERY` или `replace_running_query`, клиент получает `Query was canceled` исключение вместо неполного результата. + +#### Улучшения: {#improvements-13} + +- `ALTER TABLE ... DROP/DETACH PARTITION` запросы выполняются в передней части очереди репликации. +- `SELECT ... FINAL` и `OPTIMIZE ... FINAL` может использоваться даже в том случае, если таблица содержит одну часть данных. +- A `query_log` таблица воссоздается на лету, если она была удалена вручную (Кирилл Шваков). +- То `lengthUTF8` функция работает быстрее (zhang2014). +- Улучшенная производительность синхронных вставок в `Distributed` таблицы (`insert_distributed_sync = 1`) при наличии очень большого количества осколков. +- Сервер принимает следующее: `send_timeout` и `receive_timeout` настройки от клиента и применяет их при подключении к клиенту (они применяются в обратном порядке: сокет сервера `send_timeout` устанавливается в положение `receive_timeout` ценность, полученная от клиента, и наоборот). +- Более надежное аварийное восстановление для асинхронной вставки в систему `Distributed` таблицы. +- Возвращаемый тип объекта `countEqual` функция изменяется от `UInt32` к `UInt64` (谢磊). + +#### Устранение ошибок: {#bug-fixes-25} + +- Исправлена ошибка с помощью `IN` когда левая сторона выражения является `Nullable`. +- Правильные результаты теперь возвращаются при использовании кортежей с `IN` когда некоторые компоненты кортежа находятся в индексе таблицы. +- То `max_execution_time` limit теперь корректно работает с распределенными запросами. +- Исправлены ошибки при вычислении размера составных столбцов в системе `system.columns` стол. +- Исправлена ошибка при создании временной таблицы `CREATE TEMPORARY TABLE IF NOT EXISTS.` +- Исправлены ошибки в работе `StorageKafka` (\#\#2075) +- Исправлены сбои сервера из-за недопустимых аргументов некоторых агрегатных функций. +- Исправлена ошибка, которая помешала `DETACH DATABASE` запрос от остановки фоновых задач для `ReplicatedMergeTree` таблицы. +- `Too many parts` состояние с меньшей вероятностью произойдет при вставке в агрегированные материализованные представления (\#\#2084). +- Исправлена рекурсивная обработка подстановок в конфигурации, если за подстановкой должна следовать другая подстановка на том же уровне. +- Исправлен синтаксис в файле метаданных при создании `VIEW` это использует запрос с `UNION ALL`. +- `SummingMergeTree` теперь корректно работает суммирование вложенных структур данных с помощью составного ключа. +- Исправлена возможность возникновения расового состояния при выборе лидера для участия в гонке. `ReplicatedMergeTree` таблицы. + +#### Изменения в сборке: {#build-changes-5} + +- Сборка поддерживает `ninja` вместо `make` и использует `ninja` по умолчанию для построения релизов. +- Переименованные пакеты: `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте `clickhouse-server clickhouse-client`. Пакеты со старыми именами по-прежнему будут загружаться в репозитории для обеспечения обратной совместимости. + +#### Назад несовместимые изменения: {#backward-incompatible-changes-9} + +- Удалена специальная интерпретация выражения IN, если массив указан с левой стороны. Ранее выражение `arr IN (set)` было истолковано как «at least one `arr` element belongs to the `set`». Чтобы получить такое же поведение в новой версии, напишите `arrayExists(x -> x IN (set), arr)`. +- Отключено неправильное использование опции сокета `SO_REUSEPORT`, который был неправильно включен по умолчанию в библиотеке Poco. Обратите внимание, что в Linux больше нет никаких причин одновременно указывать адреса `::` и `0.0.0.0` for listen – use just `::`, что позволяет прослушивать соединение как по IPv4, так и по IPv6 (с настройками конфигурации ядра по умолчанию). Вы также можете вернуться к поведению из предыдущих версий, указав `1` в конфигурации. + +### ClickHouse релиз 1.1.54370, 2018-03-16 {#clickhouse-release-1-1-54370-2018-03-16} + +#### Новые средства: {#new-features-15} + +- Добавил тот `system.macros` таблица и автоматическое обновление макросов при изменении конфигурационного файла. +- Добавил тот `SYSTEM RELOAD CONFIG` запрос. +- Добавил тот `maxIntersections(left_col, right_col)` агрегатная функция, возвращающая максимальное количество одновременно пересекающихся интервалов `[left; right]`. То `maxIntersectionsPosition(left, right)` функция возвращает начало строки «maximum» интервал. ([Майкл Фурмур](https://github.com/ClickHouse/ClickHouse/pull/2012)). + +#### Улучшения: {#improvements-14} + +- При вставке данных в `Replicated` таблица, меньше запросов делается к `ZooKeeper` (и большинство ошибок на уровне пользователя исчезли с экрана. `ZooKeeper` бревно). +- Добавлена возможность создавать псевдонимы для наборов данных. Пример: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. + +#### Устранение ошибок: {#bug-fixes-26} + +- Исправлена ошибка `Illegal PREWHERE` ошибка при чтении из таблиц слияния для `Distributed`таблицы. +- Добавлены исправления, позволяющие запускать clickhouse-сервер в контейнерах Docker только для IPv4. +- Исправлено состояние гонки при считывании из системы `system.parts_columns tables.` +- Удалена двойная буферизация во время синхронной вставки в a `Distributed` таблица, которая могла бы вызвать тайм-аут соединения. +- Исправлена ошибка, приводившая к чрезмерно долгому ожиданию недоступной реплики перед началом работы. `SELECT` запрос. +- Исправлены неверные даты в программе `system.parts` стол. +- Исправлена ошибка, из-за которой невозможно было вставить данные в `Replicated` таблица если `chroot` был непустым в конфигурации системы. `ZooKeeper` скопление. +- Исправлен алгоритм вертикального слияния для пустого объекта `ORDER BY` стол. +- Восстановлена возможность использования словарей в запросах к удаленным таблицам, даже если эти словари отсутствуют на сервере-запросчике. Эта функциональность была потеряна в выпуске 1.1.54362. +- Восстановлено поведение для таких запросов, как `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` когда правая сторона `IN` следует использовать пульт дистанционного управления `default.table` а не какой-нибудь местный. Это поведение было нарушено в версии 1.1.54358. +- Удалено постороннее протоколирование уровня ошибок `Not found column ... in block`. + +### Clickhouse Релиз 1.1.54362, 2018-03-11 {#clickhouse-release-1-1-54362-2018-03-11} + +#### Новые средства: {#new-features-16} + +- Агрегация без `GROUP BY` для пустого набора (например, `SELECT count(*) FROM table WHERE 0`) теперь возвращает результат с одной строкой с нулевыми значениями для агрегатных функций, в соответствии со стандартом SQL. Чтобы восстановить старое поведение (вернуть пустой результат), установите `empty_result_for_aggregation_by_empty_set` до 1. +- Добавлено преобразование типов для `UNION ALL`. Здесь разрешены разные псевдонимы `SELECT` должности в `UNION ALL`, в соответствии со стандартом SQL. +- Произвольные выражения поддерживаются в `LIMIT BY` статьи. Ранее можно было использовать только столбцы, полученные в результате `SELECT`. +- Индекс из `MergeTree` таблицы используются, когда `IN` применяется к кортежу выражений из столбцов первичного ключа. Пример: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Анастасия Царькова). +- Добавил тот `clickhouse-copier` инструмент для копирования между кластерами и пересчета данных (бета-версия). +- Добавлены последовательные функции хэширования: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. Они могут быть использованы в качестве ключа сегментирования для уменьшения объема сетевого трафика во время последующих повторных сегментирования. +- Добавленные функции: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. +- Добавил тот `arrayCumSum` функция (Хави Сантана). +- Добавил тот `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, и `parseDateTimeBestEffortOrNull` функции для чтения DateTime из строки, содержащей текст в широком спектре возможных форматов. +- Данные могут быть частично перезагружены из внешних словарей во время обновления (загружаются только те записи, в которых значение указанного поля больше, чем в предыдущей загрузке) (Арсен Акопян). +- Добавил тот `cluster` табличная функция. Пример: `cluster(cluster_name, db, table)`. То `remote` табличная функция может принять имя кластера в качестве первого аргумента, если оно указано в качестве идентификатора. +- То `remote` и `cluster` функции таблицы можно использовать в `INSERT` запросы. +- Добавил тот `create_table_query` и `engine_full` виртуальные столбцы для `system.tables`стол. То `metadata_modification_time` колонка виртуальная. +- Добавил тот `data_path` и `metadata_path` колонны до `system.tables`и`system.databases` таблицы, а также добавил `path` колонка к столу `system.parts` и `system.parts_columns` таблицы. +- Добавлена дополнительная информация о слияниях в системе `system.part_log` стол. +- Для этого можно использовать произвольный ключ секционирования. `system.query_log` стол (Кирилл Шваков). +- То `SHOW TABLES` запрос теперь также показывает временные таблицы. Добавлены временные таблицы и `is_temporary` столбец `system.tables` (zhang2014). +- Добавлен `DROP TEMPORARY TABLE` и `EXISTS TEMPORARY TABLE` запросы (zhang2014). +- Поддержка `SHOW CREATE TABLE` для временных таблиц (zhang2014). +- Добавил тот `system_profile` параметр конфигурации для параметров, используемых внутренними процессами. +- Поддержка для загрузки `object_id` в качестве атрибута `MongoDB` словари (Павел Литвиненко). +- Чтение `null` в качестве значения по умолчанию при загрузке данных для внешнего словаря с помощью `MongoDB` источник (Павел Литвиненко). +- Чтение `DateTime` значения в системе `Values` форматирование из временной метки Unix без одинарных кавычек. +- Отказоустойчивость поддерживается в `remote` табличные функции для случаев, когда некоторые реплики отсутствуют в запрашиваемой таблице. +- Параметры конфигурации могут быть переопределены в командной строке при запуске `clickhouse-server`. Пример: `clickhouse-server -- --logger.level=information`. +- Реализовано следующее `empty` функция от `FixedString` аргумент: функция возвращает 1, если строка полностью состоит из нулевых байтов (zhang2014). +- Добавил тот `listen_try`параметр конфигурации для прослушивания хотя бы одного из прослушиваемых адресов без выхода из системы, если некоторые адреса не могут быть прослушаны (полезно для систем с отключенной поддержкой IPv4 или IPv6). +- Добавил тот `VersionedCollapsingMergeTree` настольный двигатель. +- Поддержка строк и произвольных числовых типов для `library` источник словаря. +- `MergeTree` таблицы можно использовать и без первичного ключа (необходимо указать `ORDER BY tuple()`). +- A `Nullable` тип может быть `CAST` не-`Nullable` введите если аргумент не является таковым `NULL`. +- `RENAME TABLE` может быть выполнена для `VIEW`. +- Добавил тот `throwIf` функция. +- Добавил тот `odbc_default_field_size` опция, которая позволяет расширить максимальный размер значения, загруженного из источника ODBC (по умолчанию это 1024). +- То `system.processes` стол и `SHOW PROCESSLIST` теперь у вас есть `is_cancelled` и `peak_memory_usage` столбцы. + +#### Улучшения: {#improvements-15} + +- Ограничения и квоты на результат больше не применяются к промежуточным данным для `INSERT SELECT` запросы или для `SELECT` подзапросы. +- Меньше ложных срабатываний `force_restore_data` при проверке состояния `Replicated` таблицы при запуске сервера. +- Добавил тот `allow_distributed_ddl` вариант. +- Недетерминированные функции не допускаются в выражениях для `MergeTree` ключи от стола. +- Файлы с заменами из `config.d` каталоги загружаются в алфавитном порядке. +- Улучшенная производительность системы `arrayElement` функция в случае постоянного многомерного массива с пустым массивом в качестве одного из элементов. Пример: `[[1], []][x]`. +- Теперь сервер запускается быстрее при использовании конфигурационных файлов с очень большими заменами (например, очень большими списками IP-сетей). +- При выполнении запроса функции с табличным значением выполняются один раз. Ранее, `remote` и `mysql` функции с табличным значением дважды выполняли один и тот же запрос для получения структуры таблицы с удаленного сервера. +- То `MkDocs` используется генератор документации. +- При попытке удалить столбец таблицы, который `DEFAULT`/`MATERIALIZED` выражения других столбцов зависят от того, возникает ли исключение (zhang2014). +- Добавлена возможность разбирать пустую строку в текстовых форматах как число 0 для `Float` тип данных. Эта функция была ранее доступна, но была потеряна в выпуске 1.1.54342. +- `Enum` значения могут быть использованы в `min`, `max`, `sum` и некоторые другие функции. В этих случаях он использует соответствующие числовые значения. Эта функция была ранее доступна, но была потеряна в выпуске 1.1.54337. +- Добавлен `max_expanded_ast_elements` чтобы ограничить размер AST после рекурсивного расширения псевдонимов. + +#### Устранение ошибок: {#bug-fixes-27} + +- Исправлены случаи, когда ненужные столбцы были удалены из подзапросов по ошибке или не были удалены из подзапросов, содержащих `UNION ALL`. +- Исправлена ошибка в слияниях для `ReplacingMergeTree` таблицы. +- Исправлены синхронные вставки в `Distributed` таблицы (`insert_distributed_sync = 1`). +- Исправлена обработка выхода онлайн / оффлайн для определенного использования `FULL` и `RIGHT JOIN` с повторяющимися столбцами в подзапросах. +- Исправлена ошибка segfault для некоторых видов использования `replace_running_query` и `KILL QUERY`. +- Исправлен порядок следования `source` и `last_exception` колонны в центре города `system.dictionaries` стол. +- Исправлена ошибка, когда `DROP DATABASE` запрос не удалил файл с метаданными. +- Исправлена ошибка `DROP DATABASE` запрос для `Dictionary` база данных. +- Исправлена низкая точность `uniqHLL12` и `uniqCombined` функции для кардинальностей, превышающих 100 миллионов единиц (Алексей Бочаров). +- Исправлено вычисление неявных значений по умолчанию при необходимости одновременного вычисления явных выражений по умолчанию в `INSERT` запросы (zhang2014). +- Исправлен редкий случай, когда запрос к a `MergeTree` стол не смог закончить (chenxing-xc). +- Исправлена ошибка, возникшая при запуске программы `CHECK` запрос для `Distributed` таблицы, если все осколки являются локальными (chenxing.xc). +- Исправлена небольшая регрессия производительности с функциями, использующими регулярные выражения. +- Исправлена регрессия производительности при создании многомерных массивов из сложных выражений. +- Исправлена ошибка, которая могла привести к дополнительному `FORMAT` раздел, который будет отображаться в `.sql` файл с метаданными. +- Исправлена ошибка, которая вызвала `max_table_size_to_drop` ограничение для применения при попытке удалить a `MATERIALIZED VIEW` глядя на явно заданную таблицу. +- Исправлена несовместимость со старыми клиентами (старые клиенты иногда отправляли данные вместе со старыми клиентами). `DateTime('timezone')` типа, которого они не понимают). +- Исправлена ошибка при чтении `Nested` элементы столбцов структур, которые были добавлены с помощью `ALTER` но это пусто для старых разделов, когда условия для этих столбцов переместились в `PREWHERE`. +- Исправлена ошибка при фильтрации таблиц по виртуальным `_table` столбцы в запросах к `Merge` таблицы. +- Исправлена ошибка при использовании `ALIAS` колонны внутри `Distributed` таблицы. +- Исправлена ошибка, которая делала невозможной динамическую компиляцию запросов с агрегатными функциями из `quantile` семья. +- Исправлено условие гонки в конвейере выполнения запросов, которое возникало в очень редких случаях при использовании `Merge` таблицы с большим количеством таблиц, а при использовании `GLOBAL` подзапросы. +- Исправлена ошибка при передаче массивов разных размеров в `arrayReduce` функция при использовании агрегатных функций из нескольких аргументов. +- Запрещено использование запросов с помощью `UNION ALL` в `MATERIALIZED VIEW`. +- Исправлена ошибка при инициализации программы. `part_log` системная таблица при запуске сервера (по умолчанию, `part_log` отключен). + +#### Назад несовместимые изменения: {#backward-incompatible-changes-10} + +- Удалил то `distributed_ddl_allow_replicated_alter` вариант. Это поведение включено по умолчанию. +- Удалил то `strict_insert_defaults` установка. Если вы использовали эту функцию, напишите нам `clickhouse-feedback@yandex-team.com`. +- Удалил то `UnsortedMergeTree` двигатель. + +### Clickhouse Релиз 1.1.54343, 2018-02-05 {#clickhouse-release-1-1-54343-2018-02-05} + +- Добавлена поддержка макросов для определения имен кластеров в распределенных DDL запросах и конструкторах распределенных таблиц: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. +- Теперь такие запросы, как `SELECT ... FROM table WHERE expr IN (subquery)` обрабатываются с помощью `table` индекс. +- Улучшена обработка дубликатов при вставке в реплицируемые таблицы, поэтому они больше не замедляют выполнение очереди репликации. + +### Clickhouse Релиз 1.1.54342, 2018-01-22 {#clickhouse-release-1-1-54342-2018-01-22} + +Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54337: + +- Исправлена регрессия в 1.1.54337: если пользователь по умолчанию имеет доступ только для чтения, то сервер отказывается запускаться с сообщением `Cannot create database in readonly mode`. +- Исправлена регрессия в 1.1.54337: в системах с systemd журналы всегда записываются в syslog независимо от конфигурации; сценарий watchdog все еще использует init.д. +- Исправлена регрессия в 1.1.54337: неправильная конфигурация по умолчанию в образе Docker. +- Исправлено недетерминированное поведение GraphiteMergeTree (вы можете увидеть его в сообщениях журнала `Data after merge is not byte-identical to the data on another replicas`). +- Исправлена ошибка, которая могла привести к несогласованным слияниям после оптимизации запроса к Реплицируемым таблицам (вы можете увидеть это в сообщениях журнала `Part ... intersects the previous part`). +- Буферные таблицы теперь работают правильно, когда материализованные столбцы присутствуют в целевой таблице (по zhang2014). +- Исправлена ошибка в реализации NULL. + +### Clickhouse Релиз 1.1.54337, 2018-01-18 {#clickhouse-release-1-1-54337-2018-01-18} + +#### Новые средства: {#new-features-17} + +- Добавлена поддержка хранения многомерных массивов и кортежей (`Tuple` тип данных) в таблицах. +- Поддержка функций таблицы для `DESCRIBE` и `INSERT` запросы. Добавлена поддержка вложенных запросов в `DESCRIBE`. Примеры: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Поддержка `INSERT INTO TABLE` в дополнение к `INSERT INTO`. +- Улучшена поддержка часовых поясов. То `DateTime` тип данных может быть аннотирован с помощью часового пояса, который используется для синтаксического анализа и форматирования в текстовых форматах. Пример: `DateTime('Europe/Moscow')`. Когда часовые пояса указаны в функциях для `DateTime` аргументы, возвращаемый тип будет отслеживать часовой пояс, и значение будет отображаться, как и ожидалось. +- Добавлены функции `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. То `toRelativeHour`/`Minute`/`Second` функции могут принимать значение типа `Date` в качестве аргумента. То `now` имя функции чувствительно к регистру. +- Добавил тот `toStartOfFifteenMinutes` функция (Кирилл Шваков). +- Добавил тот `clickhouse format` инструмент для форматирования запросов. +- Добавил тот `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` формат. Файлы схемы могут быть расположены только в указанном каталоге. +- Добавлена поддержка подстановок конфигураций (`incl` и `conf.d`) для настройки внешних словарей и моделей (Павел Якунин). +- Добавлена колонка с документацией для `system.settings` стол (Кирилл Шваков). +- Добавил тот `system.parts_columns` таблица с информацией о размерах столбцов в каждой части данных `MergeTree` таблицы. +- Добавил тот `system.models` таблица с информацией о загруженных данных `CatBoost` модели машинного обучения. +- Добавил тот `mysql` и `odbc` таблица функций и соответствующих `MySQL` и `ODBC` табличные движки для доступа к удаленным базам данных. Эта функциональность находится в стадии бета-тестирования. +- Добавлена возможность передачи аргумента типа `AggregateFunction` для `groupArray` агрегатная функция (таким образом, вы можете создать массив состояний некоторой агрегатной функции). +- Сняты ограничения на различные комбинации комбинаторов агрегатных функций. Например, вы можете использовать `avgForEachIf` так же как `avgIfForEach` агрегатные функции, которые имеют различное поведение. +- То `-ForEach` комбинатор агрегатных функций расширен для случая агрегатных функций с несколькими аргументами. +- Добавлена поддержка агрегатных функций `Nullable` аргументы даже в тех случаях, когда функция возвращает не --`Nullable` результат (добавлено с вкладом Сильвиу Карагеа). Пример: `groupArray`, `groupUniqArray`, `topK`. +- Добавил тот `max_client_network_bandwidth` для `clickhouse-client` (Кирилл Шваков). +- Пользователи с помощью `readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT…) (Kirill Shvakov). +- Добавлена поддержка использования нескольких потребителей с помощью `Kafka` двигатель. Расширенные параметры конфигурации для `Kafka` (Marek Vavruša). +- Добавил тот `intExp3` и `intExp4` функции. +- Добавил тот `sumKahan` статистическая функция. +- Добавлены функции to \* Number\* OrNull, где \* Number\* - это числовой тип. +- Добавлена поддержка для `WITH` положения для `INSERT SELECT` запрос (автор: zhang2014). +- Добавлены настройки: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. В частности, эти параметры используются для загрузки частей данных для репликации. Изменение этих параметров позволяет ускорить отработку отказа при перегрузке сети. +- Добавлена поддержка для `ALTER` для таблиц типа `Null` (Анастасия Царькова). +- То `reinterpretAsString` функция расширена для всех типов данных, которые хранятся последовательно в памяти. +- Добавил тот `--silent` вариант для самого `clickhouse-local` инструмент. Он подавляет печать информации о выполнении запроса в stderr. +- Добавлена поддержка считывания значений типа `Date` из текста в формате, где месяц и / или день месяца указывается с использованием одной цифры вместо двух цифр (Amos Bird). + +#### Оптимизация производительности: {#performance-optimizations} + +- Улучшена производительность агрегатных функций `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` из строковых аргументов. +- Улучшенная производительность функций `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. +- Улучшена производительность синтаксического анализа и форматирования `Date` и `DateTime` введите значения в текстовом формате. +- Улучшена производительность и точность синтаксического анализа чисел с плавающей запятой. +- Пониженное использование памяти для `JOIN` в том случае, когда левая и правая части имеют столбцы с одинаковыми именами, которые не содержатся в `USING` . +- Улучшена производительность агрегатных функций `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` за счет снижения вычислительной стабильности. Старые функции доступны под названиями `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. + +#### Устранение ошибок: {#bug-fixes-28} + +- Исправлена дедупликация данных после запуска a `DROP` или `DETACH PARTITION` запрос. В предыдущей версии удаление раздела и вставка тех же данных снова не работали, потому что вставленные блоки считались дубликатами. +- Исправлена ошибка, которая могла привести к неправильной интерпретации текста. `WHERE` пунктом `CREATE MATERIALIZED VIEW` запросы с помощью `POPULATE` . +- Исправлена ошибка в использовании `root_path` параметр в поле `zookeeper_servers` конфигурация. +- Исправлены непредвиденные результаты прохождения теста `Date` аргумент в пользу `toStartOfDay` . +- Исправлена ошибка `addMonths` и `subtractMonths` функции и арифметика для `INTERVAL n MONTH` в тех случаях, когда результат имеет предыдущий год. +- Добавлена отсутствующая поддержка для `UUID` тип данных для `DISTINCT` , `JOIN` , и `uniq` агрегатные функции и внешние словари (Евгений Иванов). Поддержка `UUID` он все еще не завершен. +- Исправлено `SummingMergeTree` поведение в тех случаях, когда строки суммируются до нуля. +- Различные исправления для `Kafka` engine (Marek Vavruša). +- Исправлено некорректное поведение устройства `Join` настольный движок (птица Амос). +- Исправлено некорректное поведение распределителя под FreeBSD и OS X. +- То `extractAll` функция теперь поддерживает пустые матчи. +- Исправлена ошибка, которая блокировала использование `libressl` вместо `openssl` . +- Исправлена ошибка `CREATE TABLE AS SELECT` запрос из временных таблиц. +- Исправлена неатомность обновления очереди репликации. Это может привести к тому, что реплики будут несинхронизированы до тех пор, пока сервер не перезагрузится. +- Исправлено возможное переполнение внутри `gcd` , `lcm` и `modulo` (`%` оператор) (Макс Скороход). +- `-preprocessed` файлы теперь создаются после изменения `umask` (`umask` можно изменить в конфигурации). +- Исправлена ошибка в фоновой проверке деталей (`MergeTreePartChecker` ) при использовании пользовательского ключа раздела. +- Исправлен разбор кортежей (значения `Tuple` тип данных) в текстовых форматах. +- Улучшены сообщения об ошибках о несовместимых типах, передаваемых в `multiIf` , `array` и некоторые другие функции. +- Переработанная поддержка для `Nullable` типы. Исправлены ошибки, которые могут привести к сбою сервера. Исправлены почти все другие ошибки, связанные с `NULL` поддержка: некорректное преобразование типов в вставьте выберите, недостаточная поддержка значения NULL в наличии и PREWHERE, `join_use_nulls` режим, типы, допускающие значения NULL в качестве аргументов `OR` оператор и т. д. +- Исправлены различные ошибки, связанные с внутренней семантикой типов данных. Примеры: ненужное суммирование `Enum` поля, тип в `SummingMergeTree` ; выравнивание `Enum` напечатать `Pretty` форматы и т. д. +- Более строгие проверки допустимых комбинаций составных столбцов. +- Исправлено переполнение при указании очень большого параметра для `FixedString` тип данных. +- Исправлена ошибка в системе `topK` агрегатная функция в общем случае. +- Добавлена недостающая проверка на равенство размеров массива в аргументах n-арных вариантов агрегатных функций с АНА - `-Array` комбинатор. +- Исправлена ошибка в работе `--pager` для `clickhouse-client` (автор: кс1322). +- Исправлена точность установки `exp10` функция. +- Исправлено поведение объекта `visitParamExtract` функция для лучшего соответствия документации. +- Исправлена ошибка при указании неверных типов данных. +- Исправлено поведение `DISTINCT` в том случае, когда все столбцы являются константами. +- Исправлено форматирование запроса в случае использования `tupleElement` функция со сложным постоянным выражением в качестве индекса элемента кортежа. +- Исправлена ошибка в работе `Dictionary` таблицы для `range_hashed` словари. +- Исправлена ошибка, приводившая к избыточным строкам в результате `FULL` и `RIGHT JOIN` (Эймос Берд). +- Исправлен сбой сервера при создании и удалении временных файлов в системе `config.d` каталоги во время перезагрузки конфигурации. +- Исправлена ошибка `SYSTEM DROP DNS CACHE` запрос: Кэш был очищен, но адреса узлов кластера не были обновлены. +- Исправлено поведение `MATERIALIZED VIEW` после выполнения `DETACH TABLE` for the table under the view (Marek Vavruša). + +#### Улучшения сборки: {#build-improvements-4} + +- То `pbuilder` инструмент используется для сборки. Процесс сборки практически полностью независим от среды узла сборки. +- Одна сборка используется для разных версий ОС. Пакеты и двоичные файлы были сделаны совместимыми с широким спектром систем Linux. +- Добавил тот `clickhouse-test` пакет. Он может быть использован для выполнения функциональных тестов. +- Исходный тарбол теперь можно опубликовать в репозитории. Он может быть использован для воспроизведения сборки без использования GitHub. +- Добавлена ограниченная интеграция с Travis CI. Из-за ограничений на время сборки в Travis тестируется только отладочная сборка и выполняется ограниченное подмножество тестов. +- Добавлена поддержка для `Cap'n'Proto` в сборке по умолчанию. +- Изменен формат источников документации с `Restricted Text` к `Markdown`. +- Добавлена поддержка для `systemd` (Владимир Смирнов). Он отключен по умолчанию из-за несовместимости с некоторыми образами ОС и может быть включен вручную. +- Для динамической генерации кода, `clang` и `lld` они встроены в систему `clickhouse` двоичный. Они также могут быть вызваны как `clickhouse clang` и `clickhouse lld` . +- Удалено использование расширений GNU из кода. Включил эту функцию `-Wextra` вариант. При строительстве с помощью `clang` значение по умолчанию равно `libc++` вместо `libstdc++`. +- Извлеченный `clickhouse_parsers` и `clickhouse_common_io` библиотеки для ускорения сборки различных инструментов. + +#### Назад несовместимые изменения: {#backward-incompatible-changes-11} + +- Формат для отметок в `Log` введите таблицы, которые содержат `Nullable` колонны были изменены обратно несовместимым образом. Если у вас есть эти таблицы, вы должны преобразовать их в следующие: `TinyLog` введите текст перед запуском новой версии сервера. Чтобы сделать это, замените `ENGINE = Log` с `ENGINE = TinyLog` в соответствующем разделе `.sql` файл в папке `metadata` каталог. Если ваш стол не имеет `Nullable` столбцы или если тип вашей таблицы не указан `Log`- тогда вам ничего не нужно делать. +- Удалил то `experimental_allow_extended_storage_definition_syntax` установка. Теперь эта функция включена по умолчанию. +- То `runningIncome` функция была переименована в `runningDifferenceStartingWithFirstvalue` избежать недоразумений. +- Удалил то `FROM ARRAY JOIN arr` синтаксис, когда соединение массива задается непосредственно после FROM без таблицы (Amos Bird). +- Удалил то `BlockTabSeparated` формат, который использовался исключительно в демонстрационных целях. +- Изменен формат состояния для агрегатных функций `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. Если вы сохранили состояния этих агрегатных функций в таблицах (с помощью `AggregateFunction` тип данных или материализованные представления с соответствующими состояниями), пожалуйста, напишите нам clickhouse-feedback@yandex-team.com-да. +- В предыдущих версиях сервера существовала недокументированная функция: если агрегатная функция зависит от параметров, то вы все равно можете указать ее без параметров в типе данных AggregateFunction. Пример: `AggregateFunction(quantiles, UInt64)` вместо `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. Эта особенность была утеряна. Хотя он был недокументирован, мы планируем снова поддержать его в будущих выпусках. +- Типы данных Enum не могут использоваться в агрегатных функциях min/max. Эта способность будет возвращена в следующем выпуске. + +#### Пожалуйста, обратите внимание при обновлении: {#please-note-when-upgrading} + +- При выполнении скользящего обновления в кластере в тот момент, когда некоторые реплики работают под управлением старой версии ClickHouse, а некоторые-под управлением новой версии, репликация временно прекращается и появляется сообщение `unknown parameter 'shard'` появляется в журнале регистрации. Репликация будет продолжена после обновления всех реплик кластера. +- Если на серверах кластера запущены разные версии ClickHouse, то вполне возможно, что распределенные запросы, использующие следующие функции, будут иметь неверные результаты: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. Вы должны обновить все узлы кластера. + +## [Список изменений на 2017 год](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2017.md) {#changelog-for-2017} diff --git a/docs/ru/whats_new/changelog/2019.md b/docs/ru/whats_new/changelog/2019.md new file mode 100644 index 00000000000..ea5bffd74c9 --- /dev/null +++ b/docs/ru/whats_new/changelog/2019.md @@ -0,0 +1,2072 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +## Релиз ClickHouse в19.17 {#clickhouse-release-v19-17} + +### Релиз ClickHouse в19.17.6.36, 2019-12-27 {#clickhouse-release-v19-17-6-36-2019-12-27} + +#### Исправление ошибок {#bug-fix} + +- Исправлено потенциальное переполнение буфера при распаковке. Злонамеренный пользователь может передавать сфабрикованные сжатые данные,которые могут вызвать чтение после буфера. Эту проблему обнаружил Эльдар Зайтов из команды информационной безопасности Яндекса. [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена возможная ошибка сервера (`std::terminate`) когда сервер не может отправлять или записывать данные в формате JSON или XML со значениями строкового типа данных (которые требуют проверки UTF-8) или при сжатии результирующих данных с помощью алгоритма Brotli или в некоторых других редких случаях. [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлены словари с исходным кодом из clickhouse `VIEW`, теперь чтение таких словарей не вызывает ошибки `There is no query`. [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлена проверка, разрешен ли клиентский хост с помощью host\_regexp, указанного в users.XML. [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Виталий Баранов](https://github.com/vitlibar)) +- `RENAME TABLE` для распределенной таблицы теперь переименовывается папка, содержащая вставленные данные перед отправкой в сегменты. Это исправляет проблему с последовательными переименованиями `tableA->tableB`, `tableC->tableA`. [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([тавплубикс](https://github.com/tavplubix)) +- `range_hashed` внешние словари, созданные запросами DDL, теперь допускают диапазоны произвольных числовых типов. [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([алесапин](https://github.com/alesapin)) +- Исправлено `INSERT INTO table SELECT ... FROM mysql(...)` табличная функция. [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена обработка выхода онлайн / оффлайн в `INSERT INTO TABLE FUNCTION file()` при вставке в файл, который не существует. Теперь в этом случае файл будет создан, а затем вставка будет обработана. [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Исправлена ошибка bitmapAnd при пересечении агрегированного растрового изображения и скалярного растрового изображения. [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Юе Хуанг](https://github.com/moon03432)) +- Исправлена обработка выхода онлайн / оффлайн, когда `EXISTS` запрос был использован без `TABLE` или `DICTIONARY` квалификатор, совсем как `EXISTS t`. [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Фиксированный тип возврата для функций `rand` и `randConstant` в случае ничтожного аргумента. Теперь функции всегда возвращаются `UInt32` и никогда `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлено `DROP DICTIONARY IF EXISTS db.dict`, теперь он не бросает исключение, если `db` его просто не существует. [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Виталий Баранов](https://github.com/vitlibar)) +- Если таблица не была полностью удалена из-за сбоя сервера, сервер попытается восстановить и загрузить ее [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([тавплубикс](https://github.com/tavplubix)) +- Исправлен тривиальный запрос count для распределенной таблицы, если существует более двух локальных таблиц shard. [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) +- Исправлена ошибка, приводившая к гонке данных в DB:: BlockStreamProfileInfo:: calculateRowsBeforeLimit() [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Александр казаков](https://github.com/Akazz)) +- Исправлено `ALTER table MOVE part` выполняется сразу же после слияния указанной детали, что может привести к перемещению детали, в которую данная деталь была объединена. Теперь он правильно перемещает указанную деталь. [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Владимир Чеботарев](https://github.com/excitoon)) +- Теперь выражения для словарей можно задавать в виде строк. Это полезно для вычисления атрибутов при извлечении данных из источников, отличных от ClickHouse, поскольку позволяет использовать синтаксис, отличающийся от ClickHouse, для этих выражений. [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([алесапин](https://github.com/alesapin)) +- Исправлена очень редкая гонка в `clickhouse-copier` из-за переполнения в ZXid. [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Дин Сян Фэй](https://github.com/dingxiangfei2009)) +- Исправлена ошибка, когда после неудачного запроса (из-за «Too many simultaneous queries» например) он не будет читать информацию о внешних таблицах, а также + следующий запрос будет интерпретировать эту информацию как начало следующего запроса, вызывающего ошибку типа `Unknown packet from client`. [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Азат Хужин](https://github.com/azat)) +- Избежать разыменования null после «Unknown packet X from server» [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Азат Хужин](https://github.com/azat)) +- Восстановите поддержку всех локалей ICU, добавьте возможность применять параметры сортировки для постоянных выражений и добавьте имя языка в систему.таблица сортировки. [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([алесапин](https://github.com/alesapin)) +- Количество потоков для чтения из `StorageFile` и `StorageHDFS` теперь он ограничен, чтобы не превысить лимит памяти. [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([алесапин](https://github.com/alesapin)) +- Исправлено `CHECK TABLE` запрос для `*MergeTree` таблицы без ключа. [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([алесапин](https://github.com/alesapin)) +- Удалил номер мутации из имени детали на тот случай, если мутаций не было. Это удаление улучшило совместимость с более старыми версиями. [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка, что мутации пропускаются для некоторых присоединенных частей из-за их data\_version больше, чем версия мутации таблицы. [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Ю](https://github.com/yuzhichang)) +- Разрешить запуск сервера с избыточными копиями деталей после их перемещения на другое устройство. [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправлена ошибка «Sizes of columns doesn’t match» это может появиться при использовании столбцов агрегатной функции. [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Борис Гранво](https://github.com/bgranvea)) +- Теперь исключение будет сделано в случае использования с привязками рядом с LIMIT BY. И теперь его можно использовать сверху предел. [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Никита Михайлов](https://github.com/nikitamikhaylov)) +- Исправьте перезагрузку словаря, если она есть `invalidate_query`, который остановил обновления и некоторые исключения при предыдущих попытках обновления. [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([алесапин](https://github.com/alesapin)) + +### Релиз ClickHouse в19.17.4.11, 2019-11-22 {#clickhouse-release-v19-17-4-11-2019-11-22} + +#### Назад Несовместимые Изменения {#backward-incompatible-change} + +- Использование столбца вместо AST для хранения скалярных результатов подзапросов для повышения производительности. Установка `enable_scalar_subquery_optimization` был добавлен в 19.17, и он был включен по умолчанию. Это приводит к таким ошибкам, как [этот](https://github.com/ClickHouse/ClickHouse/issues/7851) во время обновления до 19.17.2 или 19.17.3 с предыдущих версий. Этот параметр был отключен по умолчанию в 19.17.4, чтобы сделать возможным обновление с 19.16 и более старых версий без ошибок. [\#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([Амос Птица](https://github.com/amosbird)) + +#### Новая функция {#new-feature} + +- Добавьте возможность создавать словари с запросами DDL. [\#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([алесапин](https://github.com/alesapin)) +- Сделай `bloom_filter` тип поддержки индекса `LowCardinality` и `Nullable` [\#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [\#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Функция add `isValidJSON` чтобы проверить, что переданная строка является допустимым json. [\#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [\#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Вдимир](https://github.com/Vdimir)) +- Осуществлять `arrayCompact` функция [\#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([Меморандум](https://github.com/Joeywzr)) +- Созданная функция `hex` для десятичных чисел. Это работает так `hex(reinterpretAsString())`, но не удаляет последние нулевые байты. [\#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([Михаил Коротов](https://github.com/millb)) +- Добавь `arrayFill` и `arrayReverseFill` функции, которые заменяют элементы другими элементами спереди/сзади от них в массиве. [\#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) +- Добавь `CRC32IEEE()`/`CRC64()` поддержка [\#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Азат Хужин](https://github.com/azat)) +- Осуществлять `char` функция, аналогичная одной в [в MySQL](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [\#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([сундили](https://github.com/sundy-li)) +- Добавь `bitmapTransform` функция. Он преобразует массив значений в растровом изображении в другой массив значений, в результате чего получается новое растровое изображение [\#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([Zhichang Ю](https://github.com/yuzhichang)) +- Реализованный `javaHashUTF16LE()` функция [\#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([ачимбаб](https://github.com/achimbab)) +- Добавь `_shard_num` виртуальный столбец для распределенного движка [\#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Азат Хужин](https://github.com/azat)) + +#### Экспериментальная возможность {#experimental-feature} + +- Поддержка процессоров (новый конвейер выполнения запросов) в `MergeTree`. [\#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([Николай Кочетов](https://github.com/KochetovNicolai)) + +#### Исправление ошибок {#bug-fix-1} + +- Исправить неправильный парсинг float в `Values` [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([тавплубикс](https://github.com/tavplubix)) +- Исправьте редкий тупик, который может произойти, когда trace\_log включен. [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([Филимонов](https://github.com/filimonov)) +- Предотвратите дублирование сообщений при создании таблицы Кафки, в которой есть любой MVs, выбирающий из нее [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Иван](https://github.com/abyss7)) +- Поддержка `Array(LowCardinality(Nullable(String)))` в `IN`. Разрешает [\#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [\#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([ачимбаб](https://github.com/achimbab)) +- Добавить обработку данных `SQL_TINYINT` и `SQL_BIGINT`, и исправьте обработку `SQL_FLOAT` типы источников данных в ODBC мост. [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Денис Глазачев](https://github.com/traceon)) +- Исправить агрегации (`avg` и квантили) над пустыми десятичными столбцами [\#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([Андрей Коняев](https://github.com/akonyaev90)) +- Чинить `INSERT` в распределенный с `MATERIALIZED` столбцы [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Азат Хужин](https://github.com/azat)) +- Сделай `MOVE PARTITION` работайте, если некоторые части раздела уже находятся на целевом диске или томе [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправлена ошибка, из-за которой жесткие ссылки не создавались во время мутаций в `ReplicatedMergeTree` в конфигурациях с несколькими дисками. [\#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправлена ошибка с мутацией на MergeTree, когда вся часть остается неизменной, а лучшее место находится на другом диске [\#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправлена ошибка с `keep_free_space_ratio` не считывается с конфигурации дисков [\#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправлена ошибка с таблицей содержит только `Tuple` столбцы или столбцы со сложными путями. Исправления [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [\#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([алесапин](https://github.com/alesapin)) +- Не учитывайте память для буферного движка в ограничении max\_memory\_usage [\#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Азат Хужин](https://github.com/azat)) +- Исправлена окончательная отметка использования в `MergeTree` таблицы, заказанные по `tuple()`. В редких случаях это может привести к тому, что `Can't adjust last granule` ошибка при выборе. [\#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена ошибка в мутациях, которые имеют предикат с действиями, требующими контекста (например, функции для json), что может привести к сбоям или странным исключениям. [\#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([алесапин](https://github.com/alesapin)) +- Исправлено несоответствие экранирования имен баз данных и таблиц `data/` и `shadow/` справочники [\#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Александр Бурмак](https://github.com/Alex-Burmak)) +- Support duplicated keys in RIGHT\|FULL JOINs, e.g. `ON t.x = u.x AND t.x = u.y`. Исправьте сбой в этом случае. [\#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Артем Зуйков](https://github.com/4ertus2)) +- Чинить `Not found column in block` при соединении по выражению с правым или полным соединением. [\#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Артем Зуйков](https://github.com/4ertus2)) +- Еще одна попытка исправить бесконечный цикл в `PrettySpace` формат [\#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Исправлена ошибка в работе `concat` функция, когда все аргументы были `FixedString` такого же размера. [\#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([алесапин](https://github.com/alesapin)) +- Исправлено исключение в случае использования 1 аргумента при определении хранилищ S3, URL и HDFS. [\#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправлена область действия InterpreterSelectQuery для представлений с запросом [\#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Азат Хужин](https://github.com/azat)) + +#### Улучшение {#improvement} + +- `Nullable` столбцы признал и NULL-значения будут корректно обработаны в ODBC-мост [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Василий Немков](https://github.com/Enmk)) +- Напишите текущий пакет для распределенной отправки атомарно [\#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Азат Хужин](https://github.com/azat)) +- Вызовите исключение, если мы не можем обнаружить таблицу для имени столбца в запросе. [\#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавь `merge_max_block_size` установка `MergeTreeSettings` [\#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Артем Зуйков](https://github.com/4ertus2)) +- Запросы с помощью `HAVING` и без него `GROUP BY` предположим, что группа по константе. Так, `SELECT 1 HAVING 1` теперь возвращает результат. [\#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([Амос Птица](https://github.com/amosbird)) +- Поддержка синтаксического анализа `(X,)` как Кортеж похож на python. [\#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [\#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([Амос Птица](https://github.com/amosbird)) +- Сделай `range` функциональное поведение почти как у питона. [\#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([сундили](https://github.com/sundy-li)) +- Добавь `constraints` столбцы в таблицу `system.settings` [\#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([Виталий Баранов](https://github.com/vitlibar)) +- Лучший нулевой формат для обработчика tcp, так что его можно использовать `select ignore() from table format Null` для измерения производительности через clickhouse-клиент [\#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([Амос Птица](https://github.com/amosbird)) +- Такие запросы, как `CREATE TABLE ... AS (SELECT (1, 2))` разбираются правильно [\#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) + +#### Улучшение производительности {#performance-improvement} + +- Улучшена производительность агрегирования по коротким строковым ключам. [\#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Александр Кузьменков](https://github.com/akuzm), [Амос Птица](https://github.com/amosbird)) +- Выполните еще один проход синтаксического анализа / анализа выражений, чтобы получить потенциальную оптимизацию после того, как постоянные предикаты будут свернуты. [\#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([Амос Птица](https://github.com/amosbird)) +- Использовать для хранения мета-информации, чтобы оценить тривиально `SELECT count() FROM table;` [\#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([Амос Птица](https://github.com/amosbird), [Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Векторизация обработки `arrayReduce` аналогично агрегатору `addBatch`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Амос Птица](https://github.com/amosbird)) +- Незначительные улучшения в производительности `Kafka` потребление [\#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([Иван](https://github.com/abyss7)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement} + +- Добавьте поддержку кросс-компиляции в архитектуру процессора AARCH64. Сценарий рефакторинга упаковщика. [\#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [\#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([Иван](https://github.com/abyss7)) +- Распакуйте цепочки инструментов darwin-x86\_64 и linux-aarch64 в смонтированный том Docker при сборке пакетов [\#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([Иван](https://github.com/abyss7)) +- Обновление образа Docker для двоичного упаковщика [\#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([Иван](https://github.com/abyss7)) +- Исправлены ошибки компиляции на MacOS Catalina [\#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([Эрнест Полетаев](https://github.com/ernestp)) +- Некоторые рефакторинги в логике анализа запросов: разделение сложного класса на несколько простых. [\#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена сборка без подмодулей [\#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) +- Лучше `add_globs` в файлах CMake [\#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([Амос Птица](https://github.com/amosbird)) +- Удалить жестко закодированные пути в `unwind` цель [\#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Константин Подшумок](https://github.com/podshumok)) +- Разрешить использовать формат mysql без ssl [\#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) + +#### Другой {#other} + +- Добавлена грамматика ANTLR4 для диалекта ClickHouse SQL [\#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [\#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +## Релиз ClickHouse в19.16 {#clickhouse-release-v19-16} + +#### Релиз Clickhouse в19.16.14.65, 2020-03-25 {#clickhouse-release-v19-16-14-65-2020-03-25} + +- Исправлена ошибка в пакетных вычислениях тернарных логических операций по нескольким аргументам (более 10). [\#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([Александр казаков](https://github.com/Akazz)) Это исправление было возвращено в версию 19.16 по специальному запросу Altinity. + +#### Релиз Clickhouse в19.16.14.65, 2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} + +- Исправлена несовместимость распределенных подзапросов с более старыми версиями CH. Исправления [\#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) + [(tabplubix)](https://github.com/tavplubix) +- При выполнении `CREATE` запрос, сложите постоянные выражения в аргументах механизма хранения. Замените пустое имя базы данных текущей базой данных. Исправления [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Также исправлена проверка наличия локального адреса в системе `ClickHouseDictionarySource`. + [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) +- Теперь фон сливается воедино `*MergeTree` семейство движков таблиц более точно сохраняет порядок объема политики хранения. + [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Владимир Чеботарев](https://github.com/excitoon)) +- Предотвращение потери данных в `Kafka` в редких случаях, когда исключение происходит после чтения суффикса, но до фиксации. Исправления [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Связанный: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) + [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(Филимонов)](https://github.com/filimonov) +- Исправлена ошибка, приводящая к завершению работы сервера при попытке использовать / drop `Kafka` таблица создана с неверными параметрами. Исправления [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Включает [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). + [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(Филимонов)](https://github.com/filimonov) +- Разрешить использование `MaterializedView` с подзапросами выше `Kafka` таблицы. + [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([Филимонов](https://github.com/filimonov)) + +#### Новая функция {#new-feature-1} + +- Добавь `deduplicate_blocks_in_dependent_materialized_views` возможность управления поведением идемпотентных вставок в таблицы с материализованными представлениями. Эта новая функция была добавлена в релиз исправления ошибок по специальному запросу от Altinity. + [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(урыхи)](https://github.com/urykhy) + +### Релиз ClickHouse в19.16.2.2, 2019-10-30 {#clickhouse-release-v19-16-2-2-2019-10-30} + +#### Назад Несовместимые Изменения {#backward-incompatible-change-1} + +- Добавьте недостающую проверку arity для count/countif. + [\#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) + [\#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Вдимир](https://github.com/Vdimir)) +- Удаление устаревших `asterisk_left_columns_only` настройка (по умолчанию она была отключена). + [\#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Артем + Зуйков](https://github.com/4ertus2)) +- Строки формата для формата данных шаблона теперь задаются в файлах. + [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) + ([тавплубикс](https://github.com/tavplubix)) + +#### Новая функция {#new-feature-2} + +- Введите uniqCombined64 () для вычисления мощности, большей, чем UINT\_MAX. + [\#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), + [\#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Азат + Хужин](https://github.com/azat)) +- Поддержка индексов Bloom filter для столбцов массива. + [\#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) + ([ачимбаб](https://github.com/achimbab)) +- Добавление функции `getMacro(name)` это возвращает строку со значением соответствующего `` + из конфигурации сервера. [\#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) + ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Установите два параметра конфигурации для словаря, основанного на источнике HTTP: `credentials` и + `http-headers`. [\#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Гийом + Тассери](https://github.com/YiuRULE)) +- Добавьте новый ProfileEvent `Merge` это подсчитывает количество запущенных фоновых слияний. + [\#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Михаил + Коротов](https://github.com/millb)) +- Добавьте функцию fullHostName, которая возвращает полное доменное имя. + [\#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) + [\#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([сундили](https://github.com/sundy-li)) +- Функция add `arraySplit` и `arrayReverseSplit` которые разделяют массив на «cut off» + условия. Они полезны при обработке временных последовательностей. + [\#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) +- Добавьте новые функции, возвращающие массив всех сопоставленных индексов в семействе функций multiMatch. + [\#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Данила + Кутенин](https://github.com/danlark1)) +- Добавление нового компонента Database engine `Lazy` то есть оптимизирован для хранения большого количества мелких логов + таблицы. [\#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Никита + Васильев](https://github.com/nikvas0)) +- Добавьте агрегатные функции groupBitmapAnd, - Or, - Xor для растровых столбцов. [\#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Чжичан + Ю](https://github.com/yuzhichang)) +- Добавьте комбинаторы агрегатных функций-OrNull и-OrDefault, которые возвращают значение null + или значения по умолчанию, когда агрегировать нечего. + [\#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) + ([hcz](https://github.com/hczhcz)) +- Представьте пользовательский разделенный формат данных, который поддерживает пользовательское экранирование и + правила разграничения. [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) + ([тавплубикс](https://github.com/tavplubix)) +- Поддержка Redis в качестве источника внешнего словаря. [\#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [\#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Антон + Попов](https://github.com/CurtizJ)) + +#### Исправление ошибок {#bug-fix-2} + +- Исправьте неправильный результат запроса, если он есть `WHERE IN (SELECT ...)` раздел и `optimize_read_in_order` является + использованный. [\#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Антон + Попов](https://github.com/CurtizJ)) +- Отключен плагин аутентификации MariaDB, который зависит от файлов вне проекта. + [\#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Юрий Владимирович + Баранов](https://github.com/yurriy)) +- Исправить исключение `Cannot convert column ... because it is constant but values of constants are different in source and result` что редко может произойти, когда функции `now()`, `today()`, + `yesterday()`, `randConstant()` не использовать. + [\#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Николай + Кочетов](https://github.com/KochetovNicolai)) +- Исправлена проблема с использованием HTTP, оставьте в живых тайм-аут вместо TCP оставить в живых тайм-аут. + [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Василий + Немков](https://github.com/Enmk)) +- Исправлена ошибка сегментации в groupBitmapOr (проблема [\#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). + [\#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Чжичан + Ю](https://github.com/yuzhichang)) +- Для материализованных представлений фиксация для Кафки вызывается после того, как все данные были записаны. + [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Иван](https://github.com/abyss7)) +- Исправлена ошибка `duration_ms` значение в `system.part_log` стол. Это было в десять раз хуже. + [\#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Владимир + Чеботарев](https://github.com/excitoon)) +- Быстрое исправление для устранения сбоя в таблице LIVE VIEW и повторного включения всех тестов LIVE VIEW. + [\#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) + ([взаказников](https://github.com/vzakaznikov)) +- Правильно сериализовать значение NULL значений в мин/макс показатели MergeTree части. + [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Александр + Кузьменков](https://github.com/akuzm)) +- Не ставьте виртуальные столбцы .метаданные sql при создании таблицы в виде `CREATE TABLE AS`. + [\#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Иван](https://github.com/abyss7)) +- Исправлена ошибка сегментации в `ATTACH PART` запрос. + [\#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) + ([алесапин](https://github.com/alesapin)) +- Исправьте неправильный результат для некоторых запросов, задаваемых оптимизацией empty в подзапросах и empty + INNER/RIGHT JOIN. [\#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Николай + Кочетов](https://github.com/KochetovNicolai)) +- Исправление ошибок в системах живой вид getHeader() метод. + [\#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) + ([взаказников](https://github.com/vzakaznikov)) + +#### Улучшение {#improvement-1} + +- Добавьте сообщение в случае ожидания queue\_wait\_max\_ms. + [\#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Азат + Хужин](https://github.com/azat)) +- Выполнена установка `s3_min_upload_part_size` уровень таблицы. + [\#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Владимир + Чеботарев](https://github.com/excitoon)) +- Проверьте TTL в StorageFactory. [\#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) + ([сундили](https://github.com/sundy-li)) +- Сквош левых блоков в частичном объединении слиянием (оптимизация). + [\#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Артем + Зуйков](https://github.com/4ertus2)) +- Не допускайте недетерминированных функций в мутациях реплицируемых движков таблиц, поскольку это + может привести к несогласованности между репликами. + [\#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Александр + Казаков](https://github.com/Akazz)) +- Отключите отслеживание памяти при преобразовании трассировки стека исключений в строку. Это может предотвратить потерю + сообщений об ошибках типа `Memory limit exceeded` на сервере, который вызвал `Attempt to read after eof` исключение для клиента. [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) + ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Различные улучшения формата. Разрешает + [\#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), + [\#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), + [\#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), + [\#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) + [\#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) + ([тавплубикс](https://github.com/tavplubix)) +- ClickHouse игнорирует значения в правой части оператора IN, которые не могут быть преобразованы в левую + side type. Make it work properly for compound types – Array and Tuple. + [\#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Александр + Кузьменков](https://github.com/akuzm)) +- Поддержка отсутствует Неравенство для следующих присоединиться. Можно объединить менее-или-равный вариант и строгий + больше и меньше вариантов для столбцов, следующих на синтаксис. + [\#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Артем + Зуйков](https://github.com/4ertus2)) +- Оптимизируйте частичное объединение слиянием. [\#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) + ([Артем Зуйков](https://github.com/4ertus2)) +- Не используйте больше, чем 98К памяти в функции uniqCombined. + [\#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), + [\#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Азат + Хужин](https://github.com/azat)) +- Промыть части правой соединительной таблицы на диске в PartialMergeJoin (если их недостаточно + память). Загружайте данные обратно, когда это необходимо. [\#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) + ([Артем Зуйков](https://github.com/4ertus2)) + +#### Улучшение производительности {#performance-improvement-1} + +- Ускорьте joinGet с аргументами const, избегая дублирования данных. + [\#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Амос + Птица](https://github.com/amosbird)) +- Возвращайтесь раньше, если подзапрос пуст. + [\#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) +- Оптимизируйте синтаксический анализ SQL-выражения в значениях. + [\#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) + ([тавплубикс](https://github.com/tavplubix)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-1} + +- Отключите некоторые вклады для кросс-компиляции в Mac OS. + [\#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Иван](https://github.com/abyss7)) +- Добавьте недостающую ссылку с PocoXML для clickhouse\_common\_io. + [\#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Азат + Хужин](https://github.com/azat)) +- Примите несколько аргументов тестового фильтра в clickhouse-test. + [\#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Александр + Кузьменков](https://github.com/akuzm)) +- Включите musl и jemalloc для ARM. [\#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) + ([Амос Птица](https://github.com/amosbird)) +- Добавлен `--client-option` параметр to `clickhouse-test` чтобы передать клиенту дополнительные параметры. + [\#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Николай + Кочетов](https://github.com/KochetovNicolai)) +- Сохраните существующие конфигурации при обновлении пакета rpm. + [\#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) + ([Филимонов](https://github.com/filimonov)) +- Исправление ошибок, обнаруженных ПВС. [\#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Артем + Зуйков](https://github.com/4ertus2)) +- Исправьте сборку для Дарвина. [\#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) + ([Иван](https://github.com/abyss7)) +- совместимость с glibc 2.29. [\#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Амос + Птица](https://github.com/amosbird)) +- Убедитесь, что dh\_clean не касается потенциальных исходных файлов. + [\#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Амос + Птица](https://github.com/amosbird)) +- Попытка избежать конфликта при обновлении с altinity rpm - он имеет конфигурационный файл, упакованный отдельно + в clickhouse-server-common. [\#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) + ([Филимонов](https://github.com/filimonov)) +- Оптимизируйте некоторые заголовочные файлы для более быстрого восстановления. + [\#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), + [\#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Александр + Кузьменков](https://github.com/akuzm)) +- Добавьте тесты производительности для Date и DateTime. [\#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Василий + Немков](https://github.com/Enmk)) +- Исправьте некоторые тесты, которые содержали недетерминированные мутации. + [\#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Александр + Казаков](https://github.com/Akazz)) +- Добавьте сборку с MemorySanitizer в CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) + ([Александр Кузьменков](https://github.com/akuzm)) +- Избегайте использования неинициализированных значений в MetricsTransmitter. + [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Азат + Хужин](https://github.com/azat)) +- Исправьте некоторые проблемы в полях, найденных MemorySanitizer. + [\#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), + [\#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Александр + Кузьменков](https://github.com/akuzm)), [\#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) + ([Амос Птица](https://github.com/amosbird)) +- Исправьте неопределенное поведение в murmurhash32. [\#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Амос + Птица](https://github.com/amosbird)) +- Исправьте неопределенное поведение в StoragesInfoStream. [\#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) + ([тавплубикс](https://github.com/tavplubix)) +- Исправлено сворачивание постоянных выражений для внешних движков баз данных (MySQL, ODBC, JDBC). В предыдущих случаях + версии он не работал для нескольких постоянных выражений и вообще не работал для даты, + Дата-время и UUID. Это исправление [\#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) + [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) + ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправление ошибки гонки данных ThreadSanitizer в режиме реального времени при обращении к переменной no\_users\_thread. + [\#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) + ([взаказников](https://github.com/vzakaznikov)) +- Избавьтесь от символов malloc в libcommon + [\#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), + [\#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Амос + Птица](https://github.com/amosbird)) +- Добавьте глобальный флаг ENABLE\_LIBRARIES для отключения всех библиотек. + [\#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) + ([proller](https://github.com/proller)) + +#### Очистка кода {#code-cleanup} + +- Обобщите репозиторий конфигурации для подготовки к DDL для словарей. [\#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) + ([алесапин](https://github.com/alesapin)) +- Парсер для словарей DDL без всякой семантики. + [\#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) + ([алесапин](https://github.com/alesapin)) +- Разделите ParserCreateQuery на различные более мелкие Парсеры. + [\#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) + ([алесапин](https://github.com/alesapin)) +- Небольшой рефакторинг и переименование рядом с внешними словарями. + [\#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) + ([алесапин](https://github.com/alesapin)) +- Рефакторинг некоторого кода для подготовки к управлению доступом на основе ролей. [\#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Виталий + Баранов](https://github.com/vitlibar)) +- Некоторые улучшения в коде DatabaseOrdinary. + [\#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Никита + Васильев](https://github.com/nikvas0)) +- Не используйте итераторы в методах find () и emplace () хэш-таблиц. + [\#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Александр + Кузьменков](https://github.com/akuzm)) +- Исправьте getMultipleValuesFromConfig в случае, если корень параметра не пуст. [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) + ([Михаил Коротов](https://github.com/millb)) +- Удалите часть copy-paste (TemporaryFile и TemporaryFileStream) + [\#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Артем + Зуйков](https://github.com/4ertus2)) +- Немного улучшена читаемость кода (`MergeTreeData::getActiveContainingPart`). + [\#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Владимир + Чеботарев](https://github.com/excitoon)) +- Дождитесь всех запланированных заданий, которые используют локальные объекты, если `ThreadPool::schedule(...)` бросает + исключение. Переименовать `ThreadPool::schedule(...)` к `ThreadPool::scheduleOrThrowOnError(...)` и + исправьте комментарии, чтобы сделать очевидным, что он может бросить. + [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) + ([тавплубикс](https://github.com/tavplubix)) + +## ClickHouse релиз 19.15 {#clickhouse-release-19-15} + +### ClickHouse релиз 19.15.4.10, 2019-10-31 {#clickhouse-release-19-15-4-10-2019-10-31} + +#### Исправление ошибок {#bug-fix-3} + +- Добавлена обработка SQL\_TINYINT и SQL\_BIGINT, а также исправлена обработка типов источников данных SQL\_FLOAT в Мосте ODBC. + [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Денис Глазачев](https://github.com/traceon)) +- Разрешается иметь некоторые части на целевом диске или Томе в разделе перемещения. + [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Владимир Чеботарев](https://github.com/excitoon)) +- Фиксированное значение NULL-значений в столбцы, допускающие значения null через ODBC-мост. + [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Василий Немков](https://github.com/Enmk)) +- Исправлена вставка в распределенный нелокальный узел с материализованными столбцами. + [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Азат Хужин](https://github.com/azat)) +- Исправлена функция getMultipleValuesFromConfig. + [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Михаил Коротов](https://github.com/millb)) +- Исправлена проблема с использованием HTTP, оставьте в живых тайм-аут вместо TCP оставить в живых тайм-аут. + [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Василий Немков](https://github.com/Enmk)) +- Дождитесь завершения всех заданий по исключению (исправлены редкие сегменты). + [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([тавплубикс](https://github.com/tavplubix)) +- Не нажимайте на MVs при вставке в таблицу Кафки. + [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Иван](https://github.com/abyss7)) +- Отключите отслеживание памяти для стека исключений. + [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлен неверный код при преобразовании запроса для внешней базы данных. + [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Избегайте использования неинициализированных значений в MetricsTransmitter. + [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Азат Хужин](https://github.com/azat)) +- Добавлен пример конфигурации с макросами для тестов ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.15.3.6, 2019-10-09 {#clickhouse-release-19-15-3-6-2019-10-09} + +#### Исправление ошибок {#bug-fix-4} + +- Исправлена ошибка bad\_variant в хэшированном словаре. + ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка с ошибкой сегментации в запросе ATTACH PART. + ([алесапин](https://github.com/alesapin)) +- Расчет фиксированного времени в `MergeTreeData`. + ([Владимир Чеботарев](https://github.com/excitoon)) +- Посвятите себя Кафке явно после того, как написание будет завершено. + [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Иван](https://github.com/abyss7)) +- Правильно сериализовать значение NULL значений в мин/макс показатели MergeTree части. + [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Александр Кузьменков](https://github.com/akuzm)) + +### ClickHouse релиз 19.15.2.2, 2019-10-01 {#clickhouse-release-19-15-2-2-2019-10-01} + +#### Новая функция {#new-feature-3} + +- Многоуровневое хранение: поддержка использования нескольких томов хранения для таблиц с движком MergeTree. Можно хранить свежие данные на SSD и автоматически перемещать старые данные на жесткий диск. ([пример](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [\#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Игр](https://github.com/ObjatieGroba)) [\#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([алесапин](https://github.com/alesapin)) +- Добавить функцию таблицы `input` для считывания входящих данных в `INSERT SELECT` запрос. [\#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([паласоник1](https://github.com/palasonic1)) [\#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([Антон Попов](https://github.com/CurtizJ)) +- Добавить а `sparse_hashed` компоновка словаря, которая функционально эквивалентна `hashed` макет, но более эффективен для работы с памятью. Он использует примерно в два раза меньше памяти за счет более медленного извлечения значений. [\#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Азат Хужин](https://github.com/azat)) +- Реализована возможность определения списка пользователей для доступа к словарям. Используется только текущая подключенная база данных. [\#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([Гийом Тассери](https://github.com/YiuRULE)) +- Добавь `LIMIT` опцион на `SHOW` запрос. [\#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Филипп Мальковский](https://github.com/malkfilipp)) +- Добавь `bitmapSubsetLimit(bitmap, range_start, limit)` функция, возвращающая подмножество наименьшего числа `limit` значения в наборе, который не меньше, чем `range_start`. [\#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([Zhichang Ю](https://github.com/yuzhichang)) +- Добавь `bitmapMin` и `bitmapMax` функции. [\#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([Zhichang Ю](https://github.com/yuzhichang)) +- Функция add `repeat` относится к [выпуск-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [\#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([Флинн](https://github.com/ucasFL)) + +#### Экспериментальная возможность {#experimental-feature-1} + +- Реализуйте (в памяти) вариант соединения слиянием, который не изменяет текущий конвейер. Результат частично сортируется по ключу слияния. Набор `partial_merge_join = 1` чтобы использовать эту функцию. Соединение слиянием все еще находится в разработке. [\#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавь `S3` функция двигателя и таблицы. Он все еще находится в разработке (пока нет поддержки аутентификации). [\#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Владимир Чеботарев](https://github.com/excitoon)) + +#### Улучшение {#improvement-2} + +- Каждое сообщение, прочитанное от Кафки, вставляется атомарно. Это решает почти все известные проблемы с двигателем Kafka. [\#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([Иван](https://github.com/abyss7)) +- Улучшения для отработки отказа распределенных запросов. Сократите время восстановления, также оно теперь конфигурируемо и может быть увидено в `system.clusters`. [\#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([Василий Немков](https://github.com/Enmk)) +- Поддержка числовых значений для перечислений непосредственно в `IN` раздел. \#6766 [\#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) +- Поддержка (опционально, по умолчанию отключен) перенаправляет на URL хранение. [\#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) +- Добавьте информационное сообщение, когда клиент с более старой версией подключается к серверу. [\#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Филипп Мальковский](https://github.com/malkfilipp)) +- Удалите максимальное ограничение времени ожидания обратного отсчета для отправки данных в распределенных таблицах [\#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Азат Хужин](https://github.com/azat)) +- Добавьте возможность отправлять графиту события профиля (счетчики) с кумулятивными значениями. Его можно включить в разделе `` в серверах `config.xml`. [\#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Азат Хужин](https://github.com/azat)) +- Добавить автоматическое приведение типа `T` к `LowCardinality(T)` при вставке данных в столбец типа `LowCardinality(T)` в родном формате через HTTP. [\#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Добавить возможность использования функции `hex` без использования `reinterpretAsString` для `Float32`, `Float64`. [\#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([Михаил Коротов](https://github.com/millb)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-2} + +- Добавьте gdb-индекс в двоичный файл clickhouse с отладочной информацией. Это ускорит время запуска системы. `gdb`. [\#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([алесапин](https://github.com/alesapin)) +- Ускорить деб упаковки с исправленными помощью dpkg-деб, которая использует `pigz`. [\#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([алесапин](https://github.com/alesapin)) +- Набор `enable_fuzzing = 1` чтобы включить инструментирование libfuzzer всего кода проекта. [\#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) +- Добавить сплит построить тест в КИ. [\#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([алесапин](https://github.com/alesapin)) +- Добавьте сборку с MemorySanitizer в CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Александр Кузьменков](https://github.com/akuzm)) +- Заменять `libsparsehash` с `sparsehash-c11` [\#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Азат Хужин](https://github.com/azat)) + +#### Исправление ошибок {#bug-fix-5} + +- Исправлено снижение производительности индексного анализа по сложным ключам на больших таблицах. Это исправляет \#6924. [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена логическая ошибка, вызывающая segfaults при выборе из Кафки пустой темы. [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Иван](https://github.com/abyss7)) +- Исправлено слишком раннее закрытие соединения MySQL `MySQLBlockInputStream.cpp`. [\#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) +- Возвращена поддержка очень старых ядер Linux (исправление [\#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [\#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить возможную потерю данных в `insert select` запрос в случае пустого блока во входном потоке. \#6834 \#6862 [\#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправление для функции `АrrayEnumerateUniqRanked` с пустыми массивами в парах [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) +- Исправьте сложные запросы с помощью соединений массивов и глобальных подзапросов. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Иван](https://github.com/abyss7)) +- Чинить `Unknown identifier` ошибка в порядке ПО и группировка ПО с несколькими соединениями [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлено `MSan` предупреждение при выполнении функции с помощью `LowCardinality` аргумент. [\#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([Николай Кочетов](https://github.com/KochetovNicolai)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-2} + +- Изменен формат сериализации состояний растровых \* агрегатных функций для повышения производительности. Сериализованные состояния растрового изображения\* из предыдущих версий не могут быть прочитаны. [\#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([Zhichang Ю](https://github.com/yuzhichang)) + +## ClickHouse релиз 19.14 {#clickhouse-release-19-14} + +### ClickHouse релиз 19.14.7.15, 2019-10-02 {#clickhouse-release-19-14-7-15-2019-10-02} + +#### Исправление ошибок {#bug-fix-6} + +- Этот релиз также содержит все исправления ошибок от 19.11.12.69. +- Исправлена совместимость для распределенных запросов между 19.14 и более ранними версиями. Это исправление [\#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [\#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.14.6.12, 2019-09-19 {#clickhouse-release-19-14-6-12-2019-09-19} + +#### Исправление ошибок {#bug-fix-7} + +- Исправление для функции `АrrayEnumerateUniqRanked` с пустыми массивами в парах. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) +- Исправлено имя подзапроса в запросах с `ARRAY JOIN` и `GLOBAL IN subquery` с псевдонимом. Используйте псевдоним подзапроса для внешнего имени таблицы, если оно указано. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Иван](https://github.com/abyss7)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-3} + +- Чинить [хлопанье](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) тест `00715_fetch_merged_or_mutated_part_zookeeper` переписывая его в оболочку скриптов, потому что он должен ждать, пока мутации применятся. [\#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([Александр казаков](https://github.com/Akazz)) +- Исправлены системные и MemSan сбой в функции `groupUniqArray` с аргументом массива emtpy. Это было вызвано размещением пустых `PaddedPODArray` в хэш-таблицу нулевая ячейка, потому что конструктор для нулевого значения ячейки не вызывался. [\#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([Амос Птица](https://github.com/amosbird)) + +### ClickHouse релиз 19.14.3.3, 2019-09-10 {#clickhouse-release-19-14-3-3-2019-09-10} + +#### Новая функция {#new-feature-4} + +- `WITH FILL` модификатор для `ORDER BY`. (продолжение работы [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Антон Попов](https://github.com/CurtizJ)) +- `WITH TIES` модификатор для `LIMIT`. (продолжение работы [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Антон Попов](https://github.com/CurtizJ)) +- Разобрать некотируемых `NULL` буквальное значение NULL (если настройки `format_csv_unquoted_null_literal_as_null=1`). Инициализируйте нулевые поля значениями по умолчанию, если тип данных этого поля не является нулевым (если задано значение `input_format_null_as_default=1`). [\#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [\#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([тавплубикс](https://github.com/tavplubix)) +- Поддержка подстановочных знаков в путях табличных функций `file` и `hdfs`. Если путь содержит подстановочные знаки, то таблица будет доступна только для чтения. Пример использования: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` и `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [\#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Новый `system.metric_log` таблица, в которой хранятся значения `system.events` и `system.metrics` с заданным интервалом времени. [\#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [\#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([Никита Михайлов](https://github.com/nikitamikhaylov)) [\#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Разрешить запись текстовых журналов ClickHouse в `system.text_log` стол. [\#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [\#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([Никита Михайлов](https://github.com/nikitamikhaylov)) [\#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Показывать частные символы в трассировках стека (это делается с помощью синтаксического анализа таблиц символов файлов ELF). Добавлена информация о файле и номере строки в трассировках стека, если присутствует отладочная информация. Ускоренный поиск имени символа с индексацией символов, присутствующих в программе. Добавлены новые функции SQL для самоанализа: `demangle` и `addressToLine`. Переименованная функция `symbolizeAddress` к `addressToSymbol` для последовательности. Функция `addressToSymbol` вернет искалеченное имя по соображениям производительности и вам придется подать заявку `demangle`. Добавлена настройка `allow_introspection_functions` который по умолчанию отключен. [\#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Табличная функция `values` (имя не чувствительно к регистру). Это позволяет читать из `VALUES` список предложенных в [\#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). Пример: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) +- Добавлена возможность изменять настройки хранения. Синтаксис: `ALTER TABLE
MODIFY SETTING = `. [\#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [\#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [\#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([алесапин](https://github.com/alesapin)) +- Опора для снятия отсоединенных деталей. Синтаксис: `ALTER TABLE DROP DETACHED PART ''`. [\#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([тавплубикс](https://github.com/tavplubix)) +- Ограничения таблицы. Позволяет добавить ограничение к определению таблицы,которое будет проверяться при вставке. [\#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([Глеб Новиков](https://github.com/NanoBjorn)) [\#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Суппорт для каскадных материализованных представлений. [\#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([Амос Птица](https://github.com/amosbird)) +- Включите профилировщик запросов по умолчанию, чтобы один раз в секунду выполнять выборку каждого потока выполнения запроса. [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Входной формат `ORC`. [\#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [\#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([аконяев90](https://github.com/akonyaev90)) +- Добавлены две новые функции: `sigmoid` и `tanh` (которые полезны для приложений машинного обучения). [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Функция `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` чтобы проверить, находится ли данный токен в стоге сена. Токен-это подстрока максимальной длины между двумя не буквенно-цифровыми символами ASCII (или границами стога сена). Токен должен быть постоянной строкой. Поддерживается специализацией индекса tokenbf\_v1. [\#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [\#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([Василий Немков](https://github.com/Enmk)) +- Новая функция `neighbor(value, offset[, default_value])`. Позволяет достичь значения prev / next внутри столбца в блоке данных. [\#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Алекс Краш](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Алексей Миловидов](https://github.com/alexey-milovidov) +- Создал функцию `currentUser()`, возвращая логин авторизованного пользователя. Добавлен псевдоним `user()` для совместимости с MySQL. [\#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Алекс Краш](https://github.com/alex-krash)) +- Новые агрегатные функции `quantilesExactInclusive` и `quantilesExactExclusive` которые были предложены в [\#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [\#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) +- Функция `bitmapRange(bitmap, range_begin, range_end)` который возвращает новый набор с заданным диапазоном (не включает в себя `range_end`). [\#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([Zhichang Ю](https://github.com/yuzhichang)) +- Функция `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` что создает массив прецизионных длинных строк геохаш-боксов, покрывающих заданную площадь. [\#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([Василий Немков](https://github.com/Enmk)) +- Реализуйте поддержку запроса INSERT с помощью `Kafka` таблицы. [\#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([Иван](https://github.com/abyss7)) +- Добавлена поддержка для `_partition` и `_timestamp` виртуальные колонки для движка Кафки. [\#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([Иван](https://github.com/abyss7)) +- Возможность удаления конфиденциальных данных из `query_log`, журналы серверов, список процессов с правилами на основе регулярных выражений. [\#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([Филимонов](https://github.com/filimonov)) + +#### Экспериментальная возможность {#experimental-feature-2} + +- Формат входных и выходных данных `Template`. Он позволяет указать строку пользовательского формата для ввода и вывода. [\#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [\#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([тавплубикс](https://github.com/tavplubix)) +- Реализация проекта `LIVE VIEW` таблицы, которые были первоначально предложены в [\#2898](https://github.com/ClickHouse/ClickHouse/pull/2898), подготовленные в [\#3925](https://github.com/ClickHouse/ClickHouse/issues/3925), а затем обновляется в [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). Видеть [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) для детального описания. [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([взаказников](https://github.com/vzakaznikov)) [\#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([Николай Кочетов](https://github.com/KochetovNicolai)) [\#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([взаказников](https://github.com/vzakaznikov)) Заметить что `LIVE VIEW` функция может быть удалена в следующих версиях. + +#### Исправление ошибок {#bug-fix-8} + +- Этот релиз также содержит все исправления ошибок от 19.13 и 19.11. +- Исправьте ошибку сегментации, когда в таблице есть индексы пропуска и происходит вертикальное слияние. [\#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([алесапин](https://github.com/alesapin)) +- Исправьте ТТЛ для каждого столбца с нетривиальными значениями по умолчанию для столбцов. Ранее в случае принудительного слияния TTL с `OPTIMIZE ... FINAL` запрос, истекшие значения были заменены типом defaults вместо заданных пользователем значений столбца defaults. [\#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена проблема дублирования сообщений Кафки при обычном перезапуске сервера. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Иван](https://github.com/abyss7)) +- Исправлена бесконечная петля при чтении сообщений Кафки. Не приостанавливайте/возобновляйте потребительскую подписку вообще - в противном случае она может быть приостановлена на неопределенный срок в некоторых сценариях. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([Иван](https://github.com/abyss7)) +- Чинить `Key expression contains comparison between inconvertible types` исключение в `bitmapContains` функция. [\#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [\#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [\#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) +- Исправлена обработка выхода онлайн / оффлайн с поддержкой `optimize_skip_unused_shards` и пропал ключ от осколков. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлен неправильный код в мутациях, которые могут привести к повреждению памяти. Исправлена обработка выхода онлайн / оффлайн чтения адреса `0x14c0` это может произойти из-за совпадения `DROP TABLE` и `SELECT` от `system.parts` или `system.parts_columns`. Фиксированное состояние расы при подготовке запросов мутаций. Исправлена тупиковая ситуация, вызванная `OPTIMIZE` реплицированных таблиц и параллельных операций модификации, таких как ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Убраны лишние подробный вход в интерфейс для MySQL [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Возвращает возможность разбора логических настроек из ‘true’ и ‘false’ в конфигурационном файле. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([алесапин](https://github.com/alesapin)) +- Исправить сбой в работе `quantile` и `median` функции `Nullable(Decimal128)`. [\#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлен возможный неполный результат возвращаемый компанией `SELECT` запрос с помощью `WHERE` условие о первичном ключе содержало преобразование в тип Float. Это было вызвано неправильной проверкой монотонности в `toFloat` функция. [\#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) +- Проверять `max_expanded_ast_elements` установка для мутаций. Ясные мутации после `TRUNCATE TABLE`. [\#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправьте результаты соединения для ключевых столбцов при использовании с `join_use_nulls`. Прикрепите значения null вместо столбцов по умолчанию. [\#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка пропуска индексов с вертикальным слиянием и alter. Исправить для `Bad size of marks file` исключение. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [\#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([алесапин](https://github.com/alesapin)) +- Исправлена редкая ошибка в `ALTER MODIFY COLUMN` и вертикальное слияние, когда одна из Объединенных/измененных частей пуста (0 строк) [\#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка в преобразовании `LowCardinality` напечатать `AggregateFunctionFactory`. Это исправление [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправить неправильное поведение и возможные вылеты в `topK` и `topKWeighted` агрегированные функции. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлен небезопасный код вокруг `getIdentifier` функция. [\#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [\#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка в протоколе MySQL wire (используется при подключении к ClickHouse form MySQL client). Вызвано переполнением буфера кучи в `PacketPayloadWriteBuffer`. [\#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([Юрий Баранов](https://github.com/yurriy)) +- Исправлена утечка памяти внутри `bitmapSubsetInRange` функция. [\#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([Zhichang Ю](https://github.com/yuzhichang)) +- Исправлена редкая ошибка, когда мутация выполнялась после изменения детализации. [\#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([алесапин](https://github.com/alesapin)) +- Разрешить сообщение protobuf со всеми полями по умолчанию. [\#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([Виталий Баранов](https://github.com/vitlibar)) +- Устраните ошибку с помощью `nullIf` функция, когда мы посылаем `NULL` аргумент по второму аргументу. [\#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([Гийом Тассери](https://github.com/YiuRULE)) +- Исправлена редкая ошибка с неправильным выделением/освобождением памяти в сложных ключевых словарях кэша со строковыми полями, что приводит к бесконечному потреблению памяти (похоже на утечку памяти). Ошибка воспроизводится, когда размер строки был равен степени два, начиная с восьми (8, 16, 32 и т. д.). [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([алесапин](https://github.com/alesapin)) +- Исправлено кодирование горилл на небольших последовательностях, которое вызывало исключение `Cannot write after end of buffer`. [\#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Василий Немков](https://github.com/Enmk)) +- Разрешить использовать не обнуляемые типы В соединениях с `join_use_nulls` включен. [\#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Артем Зуйков](https://github.com/4ertus2)) +- Отключать `Poco::AbstractConfiguration` подстановки в запрос `clickhouse-client`. [\#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Избегайте тупиковых ситуаций в `REPLACE PARTITION`. [\#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- С помощью `arrayReduce` для постоянных аргументов может привести к обработка выхода онлайн / оффлайн. [\#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте несогласованные детали, которые могут появиться, если реплика была восстановлена после этого `DROP PARTITION`. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([тавплубикс](https://github.com/tavplubix)) +- Исправлено зависание `JSONExtractRaw` функция. [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка с неправильной сериализацией индексов пропуска и агрегацией с адаптивной детализацией. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [\#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([алесапин](https://github.com/alesapin)) +- Чинить `WITH ROLLUP` и `WITH CUBE` модификаторы `GROUP BY` с двухуровневой агрегацией. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена ошибка с написанием вторичных индексных меток с адаптивной детализацией. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([алесапин](https://github.com/alesapin)) +- Исправьте порядок инициализации при запуске сервера. С `StorageMergeTree::background_task_handle` инициализируется в `startup()` то `MergeTreeBlockOutputStream::write()` возможно, вы попытаетесь использовать его перед инициализацией. Просто проверьте, инициализирован ли он. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Иван](https://github.com/abyss7)) +- Очистка буфера данных от предыдущей операции чтения, которая была завершена с ошибкой. [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Николай](https://github.com/bopohaa)) +- Исправлена ошибка с включением адаптивной детализации при создании новой реплики для реплицированной таблицы \* MergeTree. [\#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([алесапин](https://github.com/alesapin)) +- Исправлена возможная ошибка при запуске сервера в случае возникновения исключения `libunwind` во время исключения при доступе к неинициализированному `ThreadStatus` структура. [\#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([Никита Михайлов](https://github.com/nikitamikhaylov)) +- Исправить сбой в работе `yandexConsistentHash` функция. Найдено с помощью теста fuzz. [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [\#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена возможность зависания запросов, когда сервер перегружен и глобальный пул потоков становится почти полным. Это имеет более высокие шансы произойти в кластерах с большим количеством сегментов (сотни), поскольку распределенные запросы выделяют поток для каждого соединения с каждым сегментом. Например, эта проблема может возникнуть, если кластер из 330 сегментов обрабатывает 30 параллельных распределенных запросов. Эта проблема затрагивает все версии, начиная с версии 19.2. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Фиксированная логика работы `arrayEnumerateUniqRanked` функция. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка segfault при декодировании таблицы символов. [\#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([Амос Птица](https://github.com/amosbird)) +- Исправлено неуместное исключение в приведении `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Удалено дополнительное цитирование описания в `system.settings` стол. [\#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [\#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Избегайте возможных тупиков в работе `TRUNCATE` из реплицированной таблицы. [\#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте чтение в порядке сортировки ключа. [\#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([Антон Попов](https://github.com/CurtizJ)) +- Чинить `ALTER TABLE ... UPDATE` запрос для таблиц с `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([алесапин](https://github.com/alesapin)) +- Исправить ошибку, открытую [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (начиная с 19.4.0). Воспроизводится в запросах к распределенным таблицам через таблицы MergeTree, когда мы не запрашиваем никаких столбцов (`SELECT 1`). [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([алесапин](https://github.com/alesapin)) +- Исправлено переполнение при целочисленном делении знакового типа на беззнаковый. Поведение было точно таким же, как в языке C или C++ (целочисленные правила продвижения), что может быть удивительно. Обратите внимание, что переполнение все еще возможно при делении большого числа со знаком на большое число без знака или наоборот (но этот случай менее обычен). Эта проблема существовала во всех версиях сервера. [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Ограничьте максимальное время сна для дросселирования, когда `max_execution_speed` или `max_execution_speed_bytes` заданный. Исправлены ложные ошибки, такие как `Estimated query execution time (inf seconds) is too long`. [\#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [\#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлены проблемы, связанные с использованием `MATERIALIZED` столбцы и псевдонимы в `MaterializedView`. [\#448](https://github.com/ClickHouse/ClickHouse/issues/448) [\#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [\#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [\#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [\#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [\#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([Амос Птица](https://github.com/amosbird)) [\#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Чинить `FormatFactory` поведение для входных потоков, которые не реализованы в качестве процессора. [\#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлена опечатка. [\#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Алексей Рындин](https://github.com/alexryndin)) +- Опечатка в сообщении об ошибке (is - \> are). [\#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Денис Журавлев](https://github.com/den-crane)) +- Исправлена ошибка при разборе списка столбцов из строки, Если тип содержал запятую (эта проблема была актуальна для `File`, `URL`, `HDFS` хранения) [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) + +#### Исправление безопасности {#security-fix} + +- Этот релиз также содержит все исправления безопасности ошибок от 19.13 и 19.11. +- Исправлена возможность сфабрикованного запроса вызвать сбой сервера из-за переполнения стека в синтаксическом анализаторе SQL. Исправлена возможность переполнения стека в таблицах слияния и распределения, материализованных представлениях и условиях безопасности на уровне строк, включающих подзапросы. [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшение {#improvement-3} + +- Правильная реализация троичной логики для `AND/OR`. [\#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([Александр казаков](https://github.com/Akazz)) +- Теперь значения и строки с истекшим сроком действия TTL будут удалены после этого `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` запрос. Добавленные запросы `SYSTEM STOP/START TTL MERGES` чтобы запретить / разрешить назначать слияния с TTL и фильтровать просроченные значения во всех слияниях. [\#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([Антон Попов](https://github.com/CurtizJ)) +- Возможность изменить расположение файла истории ClickHouse для использования клиентом `CLICKHOUSE_HISTORY_FILE` ОКР. [\#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([Филимонов](https://github.com/filimonov)) +- Удалять `dry_run` флаг от `InterpreterSelectQuery`. … [\#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Поддержка `ASOF JOIN` с `ON` раздел. [\#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Артем Зуйков](https://github.com/4ertus2)) +- Улучшенная поддержка индексов пропуска для мутаций и репликации. Поддержка `MATERIALIZE/CLEAR INDEX ... IN PARTITION` запрос. `UPDATE x = x` пересчитывает все индексы, использующие столбец `x`. [\#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([Никита Васильев](https://github.com/nikvas0)) +- Разрешить `ATTACH` живые представления (например, при запуске сервера) независимо от того, чтобы `allow_experimental_live_view` установка. [\#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Для трассировок стека, собранных профилировщиком запросов, не включайте кадры стека, созданные самим профилировщиком запросов. [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Теперь функции таблицы `values`, `file`, `url`, `hdfs` есть поддержка столбцов псевдонимов. [\#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Создайте исключение, если `config.d` файл не имеет соответствующего корневого элемента в качестве файла конфигурации. [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) +- Распечатайте дополнительную информацию в сообщении об исключении для `no space left on device`. [\#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [\#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [\#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([тавплубикс](https://github.com/tavplubix)) +- При определении осколков а `Distributed` таблица, которая будет покрыта запросом на чтение (для `optimize_skip_unused_shards` = 1) ClickHouse теперь проверяет условия от обоих `prewhere` и `where` предложения оператора select. [\#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([Александр казаков](https://github.com/Akazz)) +- Включенный `SIMDJSON` для машин без AVX2, но с набором инструкций SSE 4.2 и PCLMUL. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [\#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- ClickHouse может работать на файловых системах без `O_DIRECT` поддержка (например, ZFS и BtrFS) без дополнительной настройки. [\#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [\#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Поддержка push down предиката для окончательного подзапроса. [\#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [\#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Лучше `JOIN ON` извлечение ключей [\#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Артем Зуйков](https://github.com/4ertus2)) +- Обновление `SIMDJSON`. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [\#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Оптимизируйте выбор самого маленького столбца для `SELECT count()` запрос. [\#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([Амос Птица](https://github.com/amosbird)) +- Добавлен `strict` параметр в `windowFunnel()`. Когда `strict` устанавливается, то `windowFunnel()` применяет условия только для уникальных значений. [\#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([ачимбаб](https://github.com/achimbab)) +- Более безопасный интерфейс `mysqlxx::Pool`. [\#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([авасилиев](https://github.com/avasiliev)) +- Параметры размер строки при выполнении с помощью `--help` опция теперь соответствует размеру терминала. [\#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) +- Отключать «read in order» оптимизация для агрегации без ключей. [\#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([Антон Попов](https://github.com/CurtizJ)) +- Код состояния HTTP для `INCORRECT_DATA` и `TYPE_MISMATCH` коды ошибок были изменены по умолчанию `500 Internal Server Error` к `400 Bad Request`. [\#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([Александр Родин](https://github.com/a-rodin)) +- Переместить объект соединения из `ExpressionAction` в `AnalyzedJoin`. `ExpressionAnalyzer` и `ExpressionAction` не знаю о чем `Join` больше никаких занятий. Его логика скрыта за `AnalyzedJoin` iface защитный. [\#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена возможная взаимоблокировка распределенных запросов, когда один из сегментов является localhost, но запрос отправляется через сетевое соединение. [\#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Изменена семантика нескольких таблиц `RENAME` чтобы избежать возможных тупиков. [\#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [\#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Переписан сервер совместимости MySQL, чтобы предотвратить загрузку полной полезной нагрузки пакета в память. Снижение потребления памяти для каждого соединения примерно до `2 * DBMS_DEFAULT_BUFFER_SIZE` (буферы чтения/записи). [\#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([Юрий Баранов](https://github.com/yurriy)) +- Переместите логику интерпретации псевдонимов AST из синтаксического анализатора, который не должен ничего знать о семантике запросов. [\#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Артем Зуйков](https://github.com/4ertus2)) +- Чуть более безопасный разбор данных `NamesAndTypesList`. [\#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [\#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `clickhouse-copier`: Разрешить использование `where_condition` из конфигурации с `partition_key` псевдоним в запросе для проверки существования раздела (ранее он использовался только при чтении запросов данных). [\#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) +- Добавлен необязательный аргумент сообщения в поле `throwIf`. ([\#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [\#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Вдимир](https://github.com/Vdimir)) +- Исключение сервера, полученное при отправке данных вставки, теперь обрабатывается и в клиенте. [\#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [\#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) +- Добавлена метрика `DistributedFilesToInsert` это показывает общее количество файлов в файловой системе, выбранных для отправки на удаленные серверы распределенными таблицами. Это число суммируется по всем осколкам. [\#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Переместите большинство соединений подготовьте логику из `ExpressionAction/ExpressionAnalyzer` к `AnalyzedJoin`. [\#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправить Цан [предупреждение](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) ‘lock-order-inversion’. [\#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([Василий Немков](https://github.com/Enmk)) +- Улучшенные информационные сообщения об отсутствии возможностей Linux. Протоколирование фатальных ошибок с помощью «fatal» уровень, который будет легче найти в `system.text_log`. [\#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Когда включить сброс временных данных на диск, чтобы ограничить использование памяти во время `GROUP BY`, `ORDER BY`, он не проверял свободное место на диске. Исправление добавить новую настройку `min_free_disk_space`, когда свободное место на диске будет меньше порогового значения, запрос остановится и бросит `ErrorCodes::NOT_ENOUGH_SPACE`. [\#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([Вэйцин Сюй](https://github.com/weiqxu)) [\#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Удален рекурсивной записи по теме. Это не имеет смысла, потому что потоки используются повторно между запросами. `SELECT` запрос может получить Блокировку в одном потоке, удерживать блокировку в другом потоке и выходить из первого потока. В то же время, первый поток может быть повторно использован `DROP` запрос. Это приведет к ложным результатам «Attempt to acquire exclusive lock recursively» сообщения. [\#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Расщеплять `ExpressionAnalyzer.appendJoin()`. Подготовьте место в `ExpressionAnalyzer` для `MergeJoin`. [\#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавлен `mysql_native_password` плагин аутентификации для сервера совместимости MySQL. [\#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([Юрий Баранов](https://github.com/yurriy)) +- Меньшее количество `clock_gettime` вызовы; исправлена совместимость ABI между debug/release in `Allocator` (незначительный вопрос). [\#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Подвиньте `collectUsedColumns` от `ExpressionAnalyzer` к `SyntaxAnalyzer`. `SyntaxAnalyzer` делает `required_source_columns` теперь он сам по себе. [\#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавить настройку `joined_subquery_requires_alias` чтобы требовать псевдонимы для подселектов и табличных функций в `FROM` that more than one table is present (i.e. queries with JOINs). [\#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Артем Зуйков](https://github.com/4ertus2)) +- Извлекать `GetAggregatesVisitor` класс от `ExpressionAnalyzer`. [\#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Артем Зуйков](https://github.com/4ertus2)) +- `system.query_log`: изменение типа данных `type` столбец `Enum`. [\#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([Никита Михайлов](https://github.com/nikitamikhaylov)) +- Статическое соединение `sha256_password` плагин аутентификации. [\#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([Юрий Баранов](https://github.com/yurriy)) +- Избегайте дополнительной зависимости для настройки `compile` работать. В предыдущих версиях пользователь может получить ошибку типа `cannot open crti.o`, `unable to find library -lc` и т.д. [\#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Дополнительная проверка входных данных, которые могут быть получены от вредоносной реплики. [\#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Сейчас `clickhouse-obfuscator` файл доступен в формате `clickhouse-client` пакет. В предыдущих версиях он был доступен как `clickhouse obfuscator` (с пробелами). [\#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [\#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) +- Исправлена взаимоблокировка, когда у нас есть по крайней мере два запроса, которые читают по крайней мере две таблицы в разном порядке, и еще один запрос, который выполняет операцию DDL на одной из таблиц. Исправлена еще одна очень редкая тупиковая ситуация. [\#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлен `os_thread_ids` столбец `system.processes` и `system.query_log` для улучшения возможностей отладки. [\#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Обходной путь для ошибок расширения PHP mysqlnd, которые возникают, когда `sha256_password` используется в качестве плагина аутентификации по умолчанию (описано в разделе [\#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [\#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([Юрий Баранов](https://github.com/yurriy)) +- Удалите ненужное место с измененными столбцами nullability. [\#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Артем Зуйков](https://github.com/4ertus2)) +- Установите значение по умолчанию: `queue_max_wait_ms` до нуля, потому что текущее значение (пять секунд) не имеет никакого смысла. Есть редкие обстоятельства, когда эта настройка имеет какое-либо применение. Добавлены настройки `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` и `connection_pool_max_wait_ms` для устранения двусмысленности. [\#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Извлекать `SelectQueryExpressionAnalyzer` от `ExpressionAnalyzer`. Оставьте последний вариант для запросов, не связанных с выбором. [\#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Артем Зуйков](https://github.com/4ertus2)) +- Удалено дублирование входных и выходных форматов. [\#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Позволяет пользователю переопределить `poll_interval` и `idle_connection_timeout` настройки при подключении. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `MergeTree` теперь есть дополнительная опция `ttl_only_drop_parts` (отключено по умолчанию), чтобы избежать частичной обрезки деталей, чтобы они полностью выпадали, когда все строки в детали истекли. [\#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([Сергей Владыкин](https://github.com/svladykin)) +- Тип проверяет наличие заданных индексных функций. Бросьте исключение, если функция получила неправильный тип. Это устраняет тестирования с помощью утилиты. [\#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([Никита Васильев](https://github.com/nikvas0)) + +#### Улучшение производительности {#performance-improvement-2} + +- Оптимизируйте запросы с помощью `ORDER BY expressions` пункт, где `expressions` есть совпадающий префикс с ключом сортировки в `MergeTree` таблицы. Эта оптимизация управляется с помощью `optimize_read_in_order` установка. [\#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [\#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([Антон Попов](https://github.com/CurtizJ)) +- Позволяет использовать несколько резьб при загрузке и демонтаже деталей. [\#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [\#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [\#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Реализован пакетный вариант обновления состояний агрегатной функции. Это может привести к повышению производительности. [\#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- С помощью `FastOps` библиотека для функций `exp`, `log`, `sigmoid`, `tanh`. FastOps-это быстрая векторная математическая библиотека от Михаила Парахина (технический директор Яндекса). Улучшенная производительность `exp` и `log` функции более чем в 6 раз. Функция `exp` и `log` от `Float32` аргумент вернется `Float32` (в предыдущих версиях они всегда возвращаются `Float64`). Сейчас `exp(nan)` может вернуться `inf`. Результат работы `exp` и `log` функции могут быть не самым близким машинным представимым числом к истинному ответу. [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) Используя вариант Данилы Кутенина, чтобы сделать fastops работающими [\#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Отключить последовательную оптимизацию ключа для `UInt8/16`. [\#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [\#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([акузм](https://github.com/akuzm)) +- Улучшенная производительность `simdjson` библиотека, избавившись от динамического распределения в `ParsedJson::Iterator`. [\#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([Виталий Баранов](https://github.com/vitlibar)) +- Предаварийные страницы при выделении памяти с помощью `mmap()`. [\#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([акузм](https://github.com/akuzm)) +- Исправлена ошибка производительности в `Decimal` сравнение. [\#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-4} + +- Удалите компилятор (экземпляр шаблона времени выполнения), потому что мы выиграли его производительность. [\#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлен тест производительности, чтобы показать ухудшение производительности в gcc-9 более изолированным способом. [\#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена функция таблицы `numbers_mt`, который является многопоточным вариантом `numbers`. Обновленные тесты производительности с хэш-функциями. [\#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Режим сравнения в `clickhouse-benchmark` [\#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [\#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) +- Самое лучшее усилие для печати следов стека. Также добавить `SIGPROF` в качестве отладочного сигнала для печати трассировки стека запущенного потока. [\#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Каждая функция в своем собственном файле, часть 10. [\#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Удалить два раза константный `TABLE_IS_READ_ONLY`. [\#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([Филимонов](https://github.com/filimonov)) +- Изменения форматирования для `StringHashMap` PR [\#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [\#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([акузм](https://github.com/akuzm)) +- Лучший подзапрос для создания соединения в `ExpressionAnalyzer`. [\#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Артем Зуйков](https://github.com/4ertus2)) +- Удалить ненужное состояние (найденных с помощью PVS-студия). [\#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([акузм](https://github.com/akuzm)) +- Разделите интерфейс хэш-таблицы для `ReverseIndex`. [\#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([акузм](https://github.com/akuzm)) +- Рефакторинг настроек. [\#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([алесапин](https://github.com/alesapin)) +- Добавить комментарии для `set` индексные функции. [\#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([Никита Васильев](https://github.com/nikvas0)) +- Увеличьте оценку OOM в отладочной версии на Linux. [\#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([акузм](https://github.com/akuzm)) +- HDFS HA теперь работает в debug build. [\#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([Вэйцин Сюй](https://github.com/weiqxu)) +- Добавлен тест на `transform_query_for_external_database`. [\#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте тест для нескольких материализованных представлений для таблицы Кафки. [\#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([Иван](https://github.com/abyss7)) +- Сделайте лучшую схему сборки. [\#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([Иван](https://github.com/abyss7)) +- Исправлено `test_external_dictionaries` интеграция в случае, если она была выполнена под некорневым пользователем. [\#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Ошибка воспроизводится, когда общий размер записанных пакетов превышает `DBMS_DEFAULT_BUFFER_SIZE`. [\#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([Юрий Баранов](https://github.com/yurriy)) +- Добавлен тест для `RENAME` состояние гонки таблицы [\#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Избегайте гонки данных по настройкам в `KILL QUERY`. [\#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте интеграционный тест для обработки ошибок с помощью словаря кэша. [\#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([Виталий Баранов](https://github.com/vitlibar)) +- Отключите синтаксический анализ объектных файлов ELF на Mac OS, потому что это не имеет никакого смысла. [\#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Попытайтесь сделать генератор списка изменений лучше. [\#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавление `-Wshadow` перейти к ССЗ. [\#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- Удален устаревший код для `mimalloc` поддержка. [\#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `zlib-ng` определяет возможности x86 и сохраняет эту информацию в глобальных переменных. Это делается в вызове defalteInit, который может быть выполнен разными потоками одновременно. Чтобы избежать многопоточной записи, сделайте это при запуске библиотеки. [\#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([акузм](https://github.com/akuzm)) +- Регрессионный тест на ошибку, которая в соединении была исправлена в [\#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [\#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Бахтиер Рузиев](https://github.com/theruziev)) +- Исправлен отчет MSan. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте хлопающий тест TTL. [\#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена гонка ложных данных в `MergeTreeDataPart::is_frozen` поле. [\#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлены тайм-ауты в тесте fuzz. В предыдущей версии ему удалось найти ложное зависание в запросе `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [\#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлены отладочные проверки для `static_cast` из колонн. [\#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Поддержка Oracle Linux в официальных пакетах RPM. [\#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [\#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Изменен JSON perftests с `once` к `loop` тип. [\#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- `odbc-bridge.cpp` определяет `main()` поэтому он не должен быть включен в состав `clickhouse-lib`. [\#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Оривей Деш](https://github.com/orivej)) +- Тест на аварийное включение `FULL|RIGHT JOIN` с нулями в ключах правого стола. [\#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Артем Зуйков](https://github.com/4ertus2)) +- На всякий случай добавлен тест на ограничение по расширению псевдонимов. [\#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Переключился с `boost::filesystem` к `std::filesystem` при необходимости. [\#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [\#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлены пакеты RPM на веб-сайт. [\#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте тест для фиксированного `Unknown identifier` исключение в `IN` раздел. [\#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Артем Зуйков](https://github.com/4ertus2)) +- Упрощать `shared_ptr_helper` потому что люди сталкиваются с трудностями понимания этого. [\#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлены тесты производительности для исправленных кодеков Gorilla и DoubleDelta. [\#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([Василий Немков](https://github.com/Enmk)) +- Разделите интеграционный тест `test_dictionaries` в 4 отдельных теста. [\#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправить предупреждение PVS-Studio в `PipelineExecutor`. [\#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Разрешить использовать `library` источник словаря с ASan. [\#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена возможность генерировать список изменений из списка PR. [\#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Заприте дверь `TinyLog` хранение при чтении. [\#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([акузм](https://github.com/akuzm)) +- Проверить неработающие ссылки в ИЦ. [\#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Увеличьте тайм-аут для «stack overflow» тест, потому что это может занять много времени в отладочной сборке. [\#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена проверка на наличие двойных пробелов. [\#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Чинить `new/delete` отслеживание памяти при сборке с помощью дезинфицирующих средств. Слежка не совсем ясна. Это только предотвращает исключения ограничения памяти в тестах. [\#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Артем Зуйков](https://github.com/4ertus2)) +- Включите обратную проверку неопределенных символов при связывании. [\#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([Иван](https://github.com/abyss7)) +- Избежать восстановления `hyperscan` каждый день. [\#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен отчет утилиты в `ProtobufWriter`. [\#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Не разрешайте использовать query profiler с дезинфицирующими средствами, потому что он не совместим. [\#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте тест для перезагрузки словаря после сбоя по таймеру. [\#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправить несоответствие `PipelineExecutor::prepareProcessor` тип аргумента. [\#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Добавлен тест на плохие URI. [\#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлены дополнительные проверки для `CAST` функция. Это должно получить больше информации о неисправности сегментации в нечетком тесте. [\#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Добавлен `gcc-9` поддержка для `docker/builder` контейнер, который создает образ локально. [\#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([Глеб Новиков](https://github.com/NanoBjorn)) +- Тест для первичного ключа с помощью `LowCardinality(String)`. [\#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [\#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) +- Исправлены тесты, связанные с медленной печатью трассировок стека. [\#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте тестовый случай для аварийного входа `groupUniqArray` фиксированной в [\#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [\#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [\#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([акузм](https://github.com/akuzm)) +- Фиксированные индексы мутаций тестов. [\#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([Никита Васильев](https://github.com/nikvas0)) +- В тесте производительности не считывайте журнал запросов для запросов, которые мы не выполняли. [\#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([акузм](https://github.com/akuzm)) +- Материализованное представление теперь может быть создано с любыми типами низкой мощности независимо от настройки о подозрительных типах низкой мощности. [\#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Обновленные тесты для `send_logs_level` установка. [\#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлена сборка под gcc-8.2. [\#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Макс Ахмедов](https://github.com/zlobober)) +- Исправлена сборка с помощью внутреннего libc++. [\#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([Иван](https://github.com/abyss7)) +- Исправлена общая сборка с помощью `rdkafka` библиотека [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Иван](https://github.com/abyss7)) +- Исправления для сборки Mac OS (неполные). [\#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) [\#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([Алексей-Зайцев](https://github.com/alex-zaitsev)) +- Чинить «splitted» строить. [\#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Другие исправления сборки: [\#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([Амос Птица](https://github.com/amosbird)) [\#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [\#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [\#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([Иван](https://github.com/abyss7)) [\#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [\#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [\#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-3} + +- Удалены редко используемые функции таблица `catBoostPool` и хранение `CatBoostPool`. Если вы использовали эту функцию таблицы, пожалуйста, напишите письмо по адресу `clickhouse-feedback@yandex-team.com`. Обратите внимание, что интеграция CatBoost остается и будет поддерживаться. [\#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Отключать `ANY RIGHT JOIN` и `ANY FULL JOIN` по умолчанию. Набор `any_join_distinct_right_table_keys` настройка для их включения. [\#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [\#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Артем Зуйков](https://github.com/4ertus2)) + +## ClickHouse релиз 19.13 {#clickhouse-release-19-13} + +### ClickHouse релиз 19.13.6.51, 2019-10-02 {#clickhouse-release-19-13-6-51-2019-10-02} + +#### Исправление ошибок {#bug-fix-9} + +- Этот релиз также содержит все исправления ошибок от 19.11.12.69. + +### ClickHouse релиз 19.13.5.44, 2019-09-20 {#clickhouse-release-19-13-5-44-2019-09-20} + +#### Исправление ошибок {#bug-fix-10} + +- Этот релиз также содержит все исправления ошибок от 19.14.6.12. +- Исправлено возможное несогласованное состояние таблицы при выполнении `DROP` запрос для реплицированной таблицы в то время как zookeeper недоступен. [\#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [\#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([Никита Михайлов](https://github.com/nikitamikhaylov)) +- Исправление для гонки данных в StorageMerge [\#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, появившаяся в профайлером запрос, который приводит к бесконечному приему от гнезда. [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([алесапин](https://github.com/alesapin)) +- Исправлена чрезмерная загрузка процессора во время выполнения `JSONExtractRaw` функция над логическим значением. [\#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправляет регрессию при нажатии на материализованный вид. [\#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([Иван](https://github.com/abyss7)) +- Табличная функция `url` если бы эта уязвимость позволяла злоумышленнику вводить произвольные HTTP-заголовки в запрос. Эта проблема была обнаружена [Никита Тихомиров](https://github.com/NSTikhomirov). [\#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлять бесполезно `AST` проверьте установленный индекс. [\#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [\#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([Никита Васильев](https://github.com/nikvas0)) +- Исправлен парсинг данных `AggregateFunction` значения, встроенные в запрос. [\#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [\#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([Zhichang Ю](https://github.com/yuzhichang)) +- Исправлено неправильное поведение `trim` функции семьи. [\#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.13.4.32, 2019-09-10 {#clickhouse-release-19-13-4-32-2019-09-10} + +#### Исправление ошибок {#bug-fix-11} + +- Этот релиз также содержит все исправления безопасности ошибок от 19.11.9.52 и 19.11.10.54. +- Фиксированная гонка данных в `system.parts` стол и `ALTER` запрос. [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено несовпадение заголовков в потоках, произошедшее при чтении из пустой распределенной таблицы с sample и prewhere. [\#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([Лисян Цянь](https://github.com/fancyqlx)) [\#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлена ошибка при использовании `IN` предложение с подзапросом с кортежем. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([тавплубикс](https://github.com/tavplubix)) +- Исправьте случай с одинаковыми именами столбцов в `GLOBAL JOIN ON` раздел. [\#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка при приведении типов к `Decimal` это не поддерживает его. Вместо этого бросьте исключение. [\#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлен сбой в работе `extractAll()` функция. [\#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Артем Зуйков](https://github.com/4ertus2)) +- Преобразование запроса для `MySQL`, `ODBC`, `JDBC` функции таблицы теперь работают правильно для `SELECT WHERE` запросы с несколькими `AND` выражения. [\#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [\#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) +- Добавлены предыдущие проверки деклараций для интеграции MySQL 8. [\#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([Рафаэль Давид Тиноко](https://github.com/rafaeldtinoco)) + +#### Исправление безопасности {#security-fix-1} + +- Исправлены две уязвимости в кодеках на этапе декомпрессии (злоумышленник может сфабриковать сжатые данные, что приведет к переполнению буфера при декомпрессии). [\#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Артем Зуйков](https://github.com/4ertus2)) + +### ClickHouse релиз 19.13.3.26, 2019-08-22 {#clickhouse-release-19-13-3-26-2019-08-22} + +#### Исправление ошибок {#bug-fix-12} + +- Чинить `ALTER TABLE ... UPDATE` запрос для таблиц с `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([алесапин](https://github.com/alesapin)) +- Исправьте NPE при использовании предложения IN с подзапросом с кортежем. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена проблема, что если устаревшая реплика становится живой, она все еще может содержать части данных, которые были удалены разделом DROP. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена проблема с синтаксическим анализом CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена гонка данных в системе.таблица деталей и запрос ALTER. Это исправление [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен неправильный код в мутациях, которые могут привести к повреждению памяти. Исправлена обработка выхода онлайн / оффлайн чтения адреса `0x14c0` это может произойти из-за совпадения `DROP TABLE` и `SELECT` от `system.parts` или `system.parts_columns`. Фиксированное состояние расы при подготовке запросов мутаций. Исправлена тупиковая ситуация, вызванная `OPTIMIZE` реплицированных таблиц и параллельных операций модификации, таких как ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена возможная потеря данных после этого `ALTER DELETE` запрос на таблицу с пропущенным индексом. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Никита Васильев](https://github.com/nikvas0)) + +#### Исправление безопасности {#security-fix-2} + +- Если злоумышленник имеет доступ на запись в ZooKeeper и может запустить пользовательский сервер, доступный из сети, где работает ClickHouse, он может создать пользовательский вредоносный сервер, который будет действовать как реплика ClickHouse, и зарегистрировать его в ZooKeeper. Когда другая реплика будет извлекать часть данных из вредоносной реплики, она может заставить clickhouse-сервер выполнить запись в произвольный путь на файловой системе. Найдено Эльдаром Зайтовым, специалистом по информационной безопасности Яндекса. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.13.2.19, 2019-08-14 {#clickhouse-release-19-13-2-19-2019-08-14} + +#### Новая функция {#new-feature-5} + +- Профилировщик выборки на уровне запроса. [Пример](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [\#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([лаплаб](https://github.com/laplab)) [\#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) +- Разрешить указывать список столбцов с помощью `COLUMNS('regexp')` выражение, которое работает как более сложный вариант `*` звездочка. [\#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([мфриденталь](https://github.com/mfridental)), ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `CREATE TABLE AS table_function()` теперь возможный [\#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) +- Adam optimizer для стохастического градиентного спуска используется по умолчанию в `stochasticLinearRegression()` и `stochasticLogisticRegression()` агрегатные функции, потому что он показывает хорошее качество почти без какой-либо настройки. [\#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) +- Added functions for working with the сustom week number [\#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([Энди Янг](https://github.com/andyyzh)) +- `RENAME` запросы теперь работают со всеми хранилищами. [\#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([Иван](https://github.com/abyss7)) +- Теперь клиент получает журналы с сервера с любым желаемым уровнем настройки `send_logs_level` независимо от уровня журнала, указанного в настройках сервера. [\#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([Никита Михайлов](https://github.com/nikitamikhaylov)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-4} + +- Установка `input_format_defaults_for_omitted_fields` по умолчанию он включен. Вставки в распределенные таблицы требуют, чтобы этот параметр был одинаковым в кластере (его необходимо установить перед развертыванием обновления). Он позволяет вычислять сложные выражения по умолчанию для пропущенных полей в `JSONEachRow` и `CSV*` форматы. Это должно быть ожидаемое поведение, но может привести к незначительной разнице в производительности. [\#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Артем Зуйков](https://github.com/4ertus2)), [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([акузм](https://github.com/akuzm)) + +#### Экспериментальная возможность {#experimental-features} + +- Новый конвейер обработки запросов. Воспользуйся `experimental_use_processors=1` возможность включить его. Используй для своих собственных проблем. [\#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([Николай Кочетов](https://github.com/KochetovNicolai)) + +#### Исправление ошибок {#bug-fix-13} + +- Интеграция Кафки была исправлена в этой версии. +- Исправлено `DoubleDelta` кодирование данных `Int64` для больших `DoubleDelta` значения, улучшенные `DoubleDelta` кодирование случайных данных для `Int32`. [\#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([Василий Немков](https://github.com/Enmk)) +- Исправлена завышенная оценка стоимости `max_rows_to_read` если установка `merge_tree_uniform_read_distribution` имеет значение 0. [\#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшение {#improvement-4} + +- Создает исключение, если `config.d` файл не имеет соответствующего корневого элемента в качестве файла конфигурации [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) + +#### Улучшение производительности {#performance-improvement-3} + +- Оптимизировать `count()`. Теперь он использует самый маленький столбец (если это возможно). [\#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([Амос Птица](https://github.com/amosbird)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-5} + +- Отчет об использовании памяти в тестах производительности. [\#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([акузм](https://github.com/akuzm)) +- Исправление построения с внешним `libcxx` [\#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([Иван](https://github.com/abyss7)) +- Исправить общую сборку с помощью `rdkafka` библиотека [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Иван](https://github.com/abyss7)) + +## ClickHouse релиз 19.11 {#clickhouse-release-19-11} + +### ClickHouse релиз 19.11.13.74, 2019-11-01 {#clickhouse-release-19-11-13-74-2019-11-01} + +#### Исправление ошибок {#bug-fix-14} + +- Исправлена редкая авария в системе `ALTER MODIFY COLUMN` и вертикальное слияние, когда одна из Объединенных/измененных частей пуста (0 строк). [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([алесапин](https://github.com/alesapin)) +- Ручное обновление данных `SIMDJSON`. Это устраняет возможные наводнения в stderr файлы с поддельными диагностическими сообщениями в формате JSON. [\#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Александр казаков](https://github.com/Akazz)) +- Исправлена ошибка с `mrk` расширение файла для мутаций ([алесапин](https://github.com/alesapin)) + +### ClickHouse релиз 19.11.12.69, 2019-10-02 {#clickhouse-release-19-11-12-69-2019-10-02} + +#### Исправление ошибок {#bug-fix-15} + +- Исправлено снижение производительности индексного анализа по сложным ключам на больших таблицах. Это исправление [\#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Избегайте редких SIGSEGV при отправке данных в таблицах с распределенным движком (`Failed to send batch: file with index XXXXX is absent`). [\#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Азат Хужин](https://github.com/azat)) +- Чинить `Unknown identifier` с несколькими соединениями. Это исправление [\#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Артем Зуйков](https://github.com/4ertus2)) + +### ClickHouse релиз 19.11.11.57, 2019-09-13 {#clickhouse-release-19-11-11-57-2019-09-13} + +- Исправлена логическая ошибка, вызывающая segfaults при выборе из Кафки пустой темы. [\#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Иван](https://github.com/abyss7)) +- Исправление для функции `АrrayEnumerateUniqRanked` с пустыми массивами в парах. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) + +### ClickHouse релиз 19.11.10.54, 2019-09-10 {#clickhouse-release-19-11-10-54-2019-09-10} + +#### Исправление ошибок {#bug-fix-16} + +- Сохраняйте смещения для сообщений Кафки вручную, чтобы иметь возможность фиксировать их все сразу для всех разделов. Исправляет потенциальное дублирование в «one consumer - many partitions» сценарий. [\#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([Иван](https://github.com/abyss7)) + +### ClickHouse релиз 19.11.9.52, 2019-09-6 {#clickhouse-release-19-11-9-52-2019-09-6} + +- Улучшена обработка ошибок в словарях кэша. [\#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправлена ошибка в функции `arrayEnumerateUniqRanked`. [\#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) +- Чинить `JSONExtract` функция при извлечении `Tuple` из JSON. [\#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправлена возможная потеря данных после этого `ALTER DELETE` запрос на таблицу с пропущенным индексом. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Никита Васильев](https://github.com/nikvas0)) +- Исправлена проверка производительности. [\#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Паркет: исправьте чтение логических столбцов. [\#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено неправильное поведение `nullIf` функция для постоянных аргументов. [\#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([Гийом Тассери](https://github.com/YiuRULE)) [\#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена проблема дублирования сообщений Кафки при обычном перезапуске сервера. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Иван](https://github.com/abyss7)) +- Исправлена ошибка, когда долго `ALTER UPDATE` или `ALTER DELETE` может помешать запуску регулярных слияний. Предотвратите выполнение мутаций, если нет достаточного количества свободных потоков. [\#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [\#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена ошибка при обработке данных «timezone» в файле конфигурации сервера. [\#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте тесты Кафки. [\#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([Иван](https://github.com/abyss7)) + +#### Исправление безопасности {#security-fix-3} + +- Если злоумышленник имеет доступ на запись в ZooKeeper и может запустить пользовательский сервер, доступный из сети, где работает ClickHouse, он может создать пользовательский вредоносный сервер, который будет действовать как реплика ClickHouse, и зарегистрировать его в ZooKeeper. Когда другая реплика будет извлекать часть данных из вредоносной реплики, она может заставить clickhouse-сервер выполнить запись в произвольный путь на файловой системе. Найдено Эльдаром Зайтовым, специалистом по информационной безопасности Яндекса. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.11.8.46, 2019-08-22 {#clickhouse-release-19-11-8-46-2019-08-22} + +#### Исправление ошибок {#bug-fix-17} + +- Чинить `ALTER TABLE ... UPDATE` запрос для таблиц с `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([алесапин](https://github.com/alesapin)) +- Исправьте NPE при использовании предложения IN с подзапросом с кортежем. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена проблема, что если устаревшая реплика становится живой, она все еще может содержать части данных, которые были удалены разделом DROP. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена проблема с синтаксическим анализом CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([тавплубикс](https://github.com/tavplubix)) +- Исправлена гонка данных в системе.таблица деталей и запрос ALTER. Это исправление [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен неправильный код в мутациях, которые могут привести к повреждению памяти. Исправлена обработка выхода онлайн / оффлайн чтения адреса `0x14c0` это может произойти из-за совпадения `DROP TABLE` и `SELECT` от `system.parts` или `system.parts_columns`. Фиксированное состояние расы при подготовке запросов мутаций. Исправлена тупиковая ситуация, вызванная `OPTIMIZE` реплицированных таблиц и параллельных операций модификации, таких как ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.11.7.40, 2019-08-14 {#clickhouse-release-19-11-7-40-2019-08-14} + +#### Исправление ошибок {#bug-fix-18} + +- Интеграция Кафки была исправлена в этой версии. +- Исправлена обработка выхода онлайн / оффлайн при использовании `arrayReduce` для постоянных споров. [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено `toFloat()` монотонность. [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) +- Исправлена обработка выхода онлайн / оффлайн с поддержкой `optimize_skip_unused_shards` и пропал ключ от осколков. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) +- Фиксированная логика работы `arrayEnumerateUniqRanked` функция. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Удалено дополнительное подробное ведение журнала из обработчика MySQL. [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить неправильное поведение и возможные вылеты в `topK` и `topKWeighted` агрегированные функции. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) +- Не выставляйте виртуальные столбцы в `system.columns` стол. Это необходимо для обратной совместимости. [\#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка с выделением памяти для строковых полей в сложном словаре кэша ключей. [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка с включением адаптивной детализации при создании новой реплики для `Replicated*MergeTree` стол. [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([алесапин](https://github.com/alesapin)) +- Исправьте бесконечный цикл при чтении сообщений Кафки. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) +- Исправлена возможность готовых запрос, чтобы вызвать падение сервера из-за переполнения стека в парсер SQL и возможность переполнения стека в `Merge` и `Distributed` таблицы [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка кодирования горилл на небольших последовательностях. [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) + +#### Улучшение {#improvement-5} + +- Позволяет пользователю переопределить `poll_interval` и `idle_connection_timeout` настройки при подключении. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.11.5.28, 2019-08-05 {#clickhouse-release-19-11-5-28-2019-08-05} + +#### Исправление ошибок {#bug-fix-19} + +- Исправлена возможность зависания запросов при перегрузке сервера. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте FPE в функции yandexConsistentHash. Это исправление [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка в преобразовании `LowCardinality` напечатать `AggregateFunctionFactory`. Это исправление [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправьте синтаксический анализ `bool` настройки от `true` и `false` строки в файлах конфигурации. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([алесапин](https://github.com/alesapin)) +- Исправлена редкая ошибка с несовместимыми заголовками потока в запросах к `Distributed` стол `MergeTree` таблица, когда часть `WHERE` движется к `PREWHERE`. [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([алесапин](https://github.com/alesapin)) +- Исправлено переполнение при целочисленном делении знакового типа на беззнаковый. Это исправление [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-5} + +- `Kafka` все еще сломанный. + +### ClickHouse релиз 19.11.4.24, 2019-08-01 {#clickhouse-release-19-11-4-24-2019-08-01} + +#### Исправление ошибок {#bug-fix-20} + +- Исправлена ошибка с написанием вторичных индексных меток с адаптивной детализацией. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([алесапин](https://github.com/alesapin)) +- Чинить `WITH ROLLUP` и `WITH CUBE` модификаторы `GROUP BY` с двухуровневой агрегацией. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлено зависание `JSONExtractRaw` функция. Исправлено [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка segfault в ExternalLoader:: reloadOutdated (). [\#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправлен случай, когда сервер может закрыть прослушивающие сокеты, но не выключаться и продолжать обслуживать оставшиеся запросы. В конечном итоге вы можете получить два запущенных процесса clickhouse-server. Иногда сервер может выдать ошибку `bad_function_call` для остальных запросов. [\#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено бесполезное и некорректное условие на поле update для начальной загрузки внешних словарей через ODBC, MySQL, ClickHouse и HTTP. Это исправление [\#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [\#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено неуместное исключение в приведении `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Зафиксировать недетерминированный результат «uniq» агрегатная функция в крайне редких случаях. Ошибка присутствовала во всех версиях ClickHouse. [\#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Обработка выхода онлайн / оффлайн, когда мы немного завышены запись на функцию `IPv6CIDRToRange`. [\#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([Гийом Тассери](https://github.com/YiuRULE)) +- Исправлена небольшая утечка памяти, когда сервер выбрасывает много исключений из многих различных контекстов. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте ситуацию, когда потребитель сделал паузу перед подпиской и не возобновил ее после этого. [\#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([Иван](https://github.com/abyss7)) Обратите внимание, что Кафка разбит в этой версии. +- Очистка буфера данных Кафки от предыдущей операции чтения, которая была завершена с ошибкой [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Николай](https://github.com/bopohaa)) Обратите внимание, что Кафка разбит в этой версии. +- С `StorageMergeTree::background_task_handle` инициализируется в `startup()` то `MergeTreeBlockOutputStream::write()` возможно, вы попытаетесь использовать его перед инициализацией. Просто проверьте, инициализирован ли он. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Иван](https://github.com/abyss7)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-6} + +- Добавлено официальное лицо `rpm` пакеты. [\#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([алесапин](https://github.com/alesapin)) +- Добавьте возможность строить `.rpm` и `.tgz` пакеты с `packager` скрипт. [\#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([алесапин](https://github.com/alesapin)) +- Исправления для «Arcadia» система сборки. [\#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-6} + +- `Kafka` сломан в этой версии. + +### ClickHouse релиз 19.11.3.11, 2019-07-18 {#clickhouse-release-19-11-3-11-2019-07-18} + +#### Новая функция {#new-feature-6} + +- Добавлена поддержка подготовленных заявлений. [\#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([Александр](https://github.com/sanych73)) [\#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `DoubleDelta` и `Gorilla` кодеки столбцов [\#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([Василий Немков](https://github.com/Enmk)) +- Добавлен `os_thread_priority` настройка, позволяющая контролировать «nice» значение потоков обработки запросов, используемых ОС для настройки приоритета динамического планирования. Для этого требуется `CAP_SYS_NICE` возможности для работы. Это орудия труда [\#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [\#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Осуществлять `_topic`, `_offset`, `_key` колонны для двигателя Кафки [\#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([Иван](https://github.com/abyss7)) Обратите внимание, что Кафка разбит в этой версии. +- Добавить комбинатор агрегатных функций `-Resample` [\#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) +- Статистическая функция `groupArrayMovingSum(win_size)(x)` и `groupArrayMovingAvg(win_size)(x)`, которые вычисляют движущуюся сумму / среднее значение с ограничением размера окна или без него. [\#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) +- Добавить синоним `arrayFlatten` \<-\> `flatten` [\#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) +- Функция сделала из intergate Н3 `geoToH3` от Uber. [\#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Ремен Иван](https://github.com/BHYCHIK)) [\#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Исправление ошибок {#bug-fix-21} + +- Реализуйте кэш DNS с асинхронным обновлением. Отдельный поток разрешает все хосты и обновляет кэш DNS с периодом (настройка `dns_cache_update_period`). Это должно помочь, когда ip хостов часто меняется. [\#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена обработка выхода онлайн / оффлайн в `Delta` кодек, который влияет на столбцы со значениями размером менее 32 бит. Ошибка привела к случайному повреждению памяти. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([алесапин](https://github.com/alesapin)) +- Исправлена обработка выхода онлайн / оффлайн в TTL слиться с не-физической столбцов в блоке. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена редкая ошибка при проверке деталей с помощью `LowCardinality` колонка. Ранее `checkDataPart` всегда терпит неудачу при расставании с `LowCardinality` колонка. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([алесапин](https://github.com/alesapin)) +- Избегайте зависания соединений, когда пул потоков сервера заполнен. Это важно для соединений от `remote` табличная функция или соединения с сегментом без реплик при длительном таймауте соединения. Это исправление [\#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [\#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Поддержка постоянных аргументов для того, чтобы `evalMLModel` функция. Это исправление [\#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, когда ClickHouse определяет часовой пояс по умолчанию как `UCT` вместо `UTC`. Это исправление [\#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Фиксированный нижний поток буфера в `visitParamExtractRaw`. Это исправление [\#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [\#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Сейчас распространены `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` запросы будут выполняться непосредственно на реплике лидера. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([алесапин](https://github.com/alesapin)) +- Чинить `coalesce` для `ColumnConst` с `ColumnNullable` + соответствующие изменения. [\#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправьте это `ReadBufferFromKafkaConsumer` так что он продолжает читать новые сообщения после этого `commit()` даже если он был остановлен раньше [\#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([Иван](https://github.com/abyss7)) +- Чинить `FULL` и `RIGHT` Результаты соединения при присоединении на `Nullable` ключи в правой таблице. [\#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Артем Зуйков](https://github.com/4ertus2)) +- Возможно исправление бесконечного сна низкоприоритетных запросов. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено состояние гонки, которое приводит к тому, что некоторые запросы могут не отображаться в query\_log после `SYSTEM FLUSH LOGS` запрос. [\#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлено `heap-use-after-free` Предупреждение ASan в ClusterCopier вызвано часами, которые пытаются использовать уже удаленный объект copier. [\#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлена ошибка `StringRef` указатель, возвращаемый некоторыми реализациями `IColumn::deserializeAndInsertFromArena`. Эта ошибка затронула только модульные тесты. [\#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Не допускайте соединения исходного и промежуточного массива со столбцами маскировки одноименных столбцов. [\#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка вставки и выбора запроса к движку MySQL с цитированием идентификатора стиля MySQL. [\#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Зимний Чжан](https://github.com/zhang2014)) +- Сейчас `CHECK TABLE` запрос может работать с семейством движков MergeTree. Он возвращает статус проверки и сообщение, если таковые имеются для каждой детали (или файла в случае более простых движков). Кроме того, Исправлена ошибка в извлечении сломанной детали. [\#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([алесапин](https://github.com/alesapin)) +- Исправлена среда выполнения SPLIT\_SHARED\_LIBRARIES [\#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Данила Кутенин](https://github.com/danlark1)) +- Инициализация фиксированного часового пояса когда `/etc/localtime` это относительная ссылка, как `../usr/share/zoneinfo/Europe/Moscow` [\#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- clickhouse-копир: исправлена использования после освобождения при завершении работы [\#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) +- Обновленный `simdjson`. Исправлена проблема, из-за которой некоторые недопустимые JSONs с нулевыми байтами успешно разбирались. [\#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено отключение системных журналов [\#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Антон Попов](https://github.com/CurtizJ)) +- Исправьте зависание, когда условие в invalidate\_query зависит от словаря. [\#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([Виталий Баранов](https://github.com/vitlibar)) + +#### Улучшение {#improvement-6} + +- Разрешить неразрешимые адреса в конфигурации кластера. Они будут считаться недоступными и пытаться разрешить их при каждой попытке подключения. Это особенно полезно для Kubernetes. Это исправление [\#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [\#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Закройте неработающие TCP-соединения (по умолчанию с таймаутом в один час). Это особенно важно для больших кластеров с несколькими распределенными таблицами на каждом сервере, поскольку каждый сервер может содержать пул соединений с каждым другим сервером, и после пикового параллелизма запросов соединения будут останавливаться. Это исправление [\#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [\#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Более лучшее качество `topK` функция. Изменено поведение набора SavingSpace для удаления последнего элемента, если новый элемент имеет больший вес. [\#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [\#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([Гийом Тассери](https://github.com/YiuRULE)) +- Функции URL для работы с доменами теперь могут работать для неполных url без схемы [\#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([алесапин](https://github.com/alesapin)) +- Контрольные суммы, добавленные к `system.parts_columns` стол. [\#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([Никита Михайлов](https://github.com/nikitamikhaylov)) +- Добавлен `Enum` тип данных как синоним для `Enum8` или `Enum16`. [\#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) +- Полный вариант транспонирования битов для `T64` кодек. Может привести к лучшему сжатию с помощью `zstd`. [\#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Артем Зуйков](https://github.com/4ertus2)) +- Состояние на `startsWith` функция теперь может использовать первичный ключ. Это исправление [\#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) и [\#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [\#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) +- Разрешить использовать `clickhouse-copier` с перекрестной репликацией кластерной топологии, разрешив пустое имя базы данных. [\#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([nvartolomei](https://github.com/nvartolomei)) +- Воспользуйся `UTC` как часовой пояс по умолчанию в системе без `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` была напечатана, и сервер или клиент отказались запускаться. [\#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Возвращенная назад поддержка аргумента с плавающей запятой в функции `quantileTiming` для обратной совместимости. [\#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Показать, в какой таблице отсутствует столбец в сообщениях об ошибках. [\#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([Иван](https://github.com/abyss7)) +- Запретить выполнение запроса с одинаковым идентификатором query\_id разными пользователями [\#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) +- Более надежный код для отправки метрик в графит. Он будет работать даже во время длительного многократного использования `RENAME TABLE` операция. [\#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Более информативные сообщения об ошибках будут отображаться, когда ThreadPool не может запланировать выполнение задачи. Это исправление [\#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [\#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Инвертирование ngramSearch, чтобы быть более интуитивным [\#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Данила Кутенин](https://github.com/danlark1)) +- Добавить пользователя parsing в HDFS engine builder [\#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([аконяев90](https://github.com/akonyaev90)) +- Обновить значение по умолчанию `max_ast_elements parameter` [\#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Артем Коновалов](https://github.com/izebit)) +- Добавлено понятие устаревших настроек. Устаревшая настройка `allow_experimental_low_cardinality_type` может использоваться без какого-либо эффекта. [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Алексей Миловидов](https://github.com/alexey-milovidov) + +#### Улучшение производительности {#performance-improvement-4} + +- Увеличьте количество потоков для выбора из таблицы слияния для более равномерного распределения потоков. Добавлена настройка `max_streams_multiplier_for_merge_tables`. Это исправление [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-7} + +- Добавьте тест обратной совместимости для взаимодействия клиент-сервер с различными версиями clickhouse. [\#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([алесапин](https://github.com/alesapin)) +- Проверьте информацию о покрытии в каждом запросе фиксации и вытягивания. [\#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([алесапин](https://github.com/alesapin)) +- Сотрудничайте с address sanitizer для поддержки наших пользовательских распределителей (`Arena` и `ArenaWithFreeLists`) для лучшей отладки «use-after-free» ошибки. [\#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([акузм](https://github.com/akuzm)) +- Переключитесь на [Реализация LLVM libunwind](https://github.com/llvm-mirror/libunwind) для обработки исключений C++ и для печати трассировок стека [\#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([Никита Лапков](https://github.com/laplab)) +- Добавьте еще два предупреждения от -Weverything [\#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Разрешите построить ClickHouse с дезинфицирующим средством для памяти. [\#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен отчет утилиты о `bitTest` функция в тест. [\#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Настройки: добавлена возможность инициализации экземпляра ClickHouse, который требует проверки подлинности. [\#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([Корвяков Андрей Николаевич](https://github.com/shurshun)) +- Librdkafka обновление до версии 1.1.0 [\#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([Иван](https://github.com/abyss7)) +- Добавьте глобальный тайм-аут для интеграционных тестов и отключите некоторые из них в коде тестов. [\#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([алесапин](https://github.com/alesapin)) +- Исправьте некоторые сбои ThreadSanitizer. [\#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([акузм](https://github.com/akuzm)) +- То `--no-undefined` опция заставляет компоновщика проверять все внешние имена на наличие во время связывания. Очень полезно отслеживать реальные зависимости между библиотеками в режиме разделенной сборки. [\#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([Иван](https://github.com/abyss7)) +- Добавлен тест производительности для [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена совместимость с gcc-7. [\#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена поддержка gcc-9. Это исправление [\#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [\#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, когда libunwind может быть связан неправильно. [\#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено несколько предупреждений, найденных PVS-Studio. [\#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена начальная поддержка для `clang-tidy` статический анализатор. [\#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Преобразование макросов BSD / Linux endian( ‘be64toh’ и ‘htobe64’) к эквивалентам Mac OS X [\#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([Фу Чен](https://github.com/fredchenbj)) +- Улучшенное руководство по интеграционным тестам. [\#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Владимир Чеботарев](https://github.com/excitoon)) +- Исправление сборки на macosx + gcc9 [\#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([Филимонов](https://github.com/filimonov)) +- Исправьте трудноуловимую опечатку: aggreAGte - \> aggregate. [\#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([акузм](https://github.com/akuzm)) +- Исправлена сборка freebsd [\#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) +- Добавить ссылку на экспериментальный канал YouTube на сайт [\#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([Иван Блинков](https://github.com/blinkov)) +- CMake: добавить опцию для флагов покрытия: WITH\_COVERAGE [\#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) +- Исправьте начальный размер некоторых встроенных подарков. [\#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([акузм](https://github.com/akuzm)) +- clickhouse-сервер.postinst: исправлено обнаружение ОС для centos 6 [\#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) +- Добавлена генерация пакетов Arch linux. [\#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Владимир Чеботарев](https://github.com/excitoon)) +- Разделите Common / config.ч по библиотекам (СУБД) [\#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) +- Исправления для «Arcadia» построить платформу [\#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) +- Исправления для нетрадиционной сборки (gcc9, без подмодулей) [\#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) +- Требуется явный тип в unalignedStore, потому что было доказано, что он подвержен ошибкам [\#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([акузм](https://github.com/akuzm)) +- Исправлена сборка MacOS [\#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([Филимонов](https://github.com/filimonov)) +- Тест производительности относительно новой функции JIT с большим набором данных, как это было запрошено здесь [\#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [\#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([Гийом Тассери](https://github.com/YiuRULE)) +- Запуск статических тестов в стресс-тесте [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([алесапин](https://github.com/alesapin)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-7} + +- `Kafka` сломан в этой версии. +- Включить `adaptive_index_granularity` = 10 МБ по умолчанию для новых `MergeTree` таблицы. Если вы создали новые таблицы MergeTree на версии 19.11+, понижение рейтинга до версий до 19.6 будет невозможно. [\#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([алесапин](https://github.com/alesapin)) +- Удалены устаревшие недокументированные встроенные словари, которые использовались Яндексом.Метрика. Функция `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` они больше не доступны. Если вы используете эти функции, напишите письмо по адресу clickhouse-feedback@yandex-team.com Примечание: в последний момент мы решили сохранить эти функции на некоторое время. [\#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +## ClickHouse релиз 19.10 {#clickhouse-release-19-10} + +### ClickHouse релиз 19.10.1.5, 2019-07-12 {#clickhouse-release-19-10-1-5-2019-07-12} + +#### Новая функция {#new-feature-7} + +- Добавить новый кодек столбца: `T64`. Сделано для(U)столбцов IntX/EnumX/Data (Time)/DecimalX. Это должно быть хорошо для столбцов с постоянными или малыми значениями диапазона. Сам кодек позволяет увеличить или уменьшить тип данных без повторного сжатия. [\#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавление СУБД `MySQL` что позволяет просматривать все таблицы на удаленном сервере MySQL [\#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([Зимний Чжан](https://github.com/zhang2014)) +- `bitmapContains` реализация. Это в 2 раза быстрее, чем `bitmapHasAny` если второе растровое изображение содержит один элемент. [\#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([Zhichang Ю](https://github.com/yuzhichang)) +- Поддержка `crc32` функция (с поведением точно таким же, как в MySQL или PHP). Не используйте его, если вам нужна хэш-функция. [\#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Ремен Иван](https://github.com/BHYCHIK)) +- Реализованный `SYSTEM START/STOP DISTRIBUTED SENDS` запросы для управления асинхронными вставками в `Distributed` таблицы. [\#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([Зимний Чжан](https://github.com/zhang2014)) + +#### Исправление ошибок {#bug-fix-22} + +- Игнорируйте ограничения на выполнение запросов и максимальный размер частей для ограничений слияния при выполнении мутаций. [\#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([Антон Попов](https://github.com/CurtizJ)) +- Исправлена ошибка, которая может привести к дедупликации обычных блоков (крайне редко) и вставке дубликатов блоков (чаще). [\#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([алесапин](https://github.com/alesapin)) +- Исправление функции `arrayEnumerateUniqRanked` для Аргументов с пустыми массивами [\#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) +- Не Подписывайтесь на темы Кафки без намерения опросить какие-либо сообщения. [\#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([Иван](https://github.com/abyss7)) +- Сделать настройку `join_use_nulls` не получите никакого эффекта для типов, которые не могут быть внутри Nullable [\#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Исправлено `Incorrect size of index granularity` ошибки [\#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([коракстер](https://github.com/coraxster)) +- Фиксировать поплавок в десятичные преобразования переполнения [\#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([коракстер](https://github.com/coraxster)) +- Смыть буфер, когда `WriteBufferFromHDFS`- деструктор называется. Это исправляет запись в `HDFS`. [\#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([Синьдун Пэн](https://github.com/eejoin)) + +#### Улучшение {#improvement-7} + +- Обработать пустые ячейки в `CSV` в качестве значений по умолчанию при настройке `input_format_defaults_for_omitted_fields` это включено. [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([акузм](https://github.com/akuzm)) +- Неблокирующая загрузка внешних словарей. [\#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([Виталий Баранов](https://github.com/vitlibar)) +- Тайм-ауты сети могут быть динамически изменены для уже установленных соединений в соответствии с настройками. [\#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Константин Подшумок](https://github.com/podshumok)) +- С помощью «public\_suffix\_list» для функций `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. Он использует идеальную хэш-таблицу, сгенерированную `gperf` с помощью списка, сгенерированного из файла: https://publicsuffix.org/list/public\_suffix\_list.dat (например, теперь мы признаем домен `ac.uk` как несущественные). [\#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([Гийом Тассери](https://github.com/YiuRULE)) +- Усыновленный `IPv6` тип данных в системных таблицах; унифицированные столбцы информации о клиенте в `system.processes` и `system.query_log` [\#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Использование сеансов для соединений с протоколом совместимости MySQL. \#5476 [\#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([Юрий Баранов](https://github.com/yurriy)) +- Поддержка более `ALTER` запросы `ON CLUSTER`. [\#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [\#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([сундили](https://github.com/sundy-li)) +- Поддержка `` раздел в `clickhouse-local` конфигурационный файл. [\#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) +- Разрешить выполнение запроса с помощью `remote` функция таблицы в `clickhouse-local` [\#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) + +#### Улучшение производительности {#performance-improvement-5} + +- Добавьте возможность написать окончательную отметку в конце столбцов MergeTree. Это позволяет избежать бесполезного считывания ключей, находящихся вне диапазона табличных данных. Он включается только в том случае, если используется адаптивная детализация индекса. [\#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([алесапин](https://github.com/alesapin)) +- Улучшена производительность таблиц MergeTree на очень медленных файловых системах за счет уменьшения количества `stat` системных вызовов. [\#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено снижение производительности при чтении из таблиц MergeTree, которое было введено в версии 19.6. Исправления №5631. [\#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-8} + +- Реализованный `TestKeeper` в качестве реализации интерфейса ZooKeeper используется для тестирования [\#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) ([Левушкин Алексей](https://github.com/alexey-milovidov)) +- Отныне `.sql` тесты могут выполняться изолированно сервером, параллельно, со случайной базой данных. Это позволяет запускать их быстрее, добавлять новые тесты с пользовательскими конфигурациями серверов и быть уверенным, что различные тесты не влияют друг на друга. [\#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([Иван](https://github.com/abyss7)) +- Удалять `` и `` из тестов производительности [\#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Исправлено «select\_format» тест производительности для `Pretty` форматы [\#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +## ClickHouse релиз 19.9 {#clickhouse-release-19-9} + +### ClickHouse релиз 19.9.3.31, 2019-07-05 {#clickhouse-release-19-9-3-31-2019-07-05} + +#### Исправление ошибок {#bug-fix-23} + +- Исправлена ошибка segfault в Дельта-кодеке, которая влияет на столбцы со значениями размером менее 32 бит. Ошибка привела к случайному повреждению памяти. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([алесапин](https://github.com/alesapin)) +- Исправлена редкая ошибка в проверке детали с колонкой LowCardinality. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([алесапин](https://github.com/alesapin)) +- Исправлена обработка выхода онлайн / оффлайн в TTL слиться с не-физической столбцов в блоке. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Антон Попов](https://github.com/CurtizJ)) +- Исправьте потенциальный бесконечный спящий режим низкоприоритетных запросов. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить, как ClickHouse определяет часовой пояс по умолчанию, как СРТ, а не мирового. [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, связанная с выполнением распределенного DROP/ALTER/TRUNCATE/OPTIMIZE в кластерных запросах на реплику последователя перед репликой лидера. Теперь они будут выполняться непосредственно на реплике лидера. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([алесапин](https://github.com/alesapin)) +- Исправлено состояние гонки, которое приводит к тому, что некоторые запросы могут не отображаться в query\_log сразу же после запроса SYSTEM FLUSH LOGS. [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Антон Попов](https://github.com/CurtizJ)) +- Добавлена отсутствующая поддержка постоянных аргументов для `evalMLModel` функция. [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.9.2.4, 2019-06-24 {#clickhouse-release-19-9-2-4-2019-06-24} + +#### Новая функция {#new-feature-8} + +- Печать информации о замороженных деталях в `system.parts` стол. [\#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) +- Ask client password on clickhouse-запуск клиента на tty, если он не задан в аргументах [\#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) +- Осуществлять `dictGet` и `dictGetOrDefault` функции для десятичных типов. [\#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Улучшение {#improvement-8} + +- Инициализации в Debian: добавить службу ожидания [\#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) +- Добавление параметра запрещено по умолчанию, чтобы создать таблицу с подозрительных типов для LowCardinality [\#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Регрессионные функции возвращают веса модели, если они не используются в качестве состояния в функции `evalMLMethod`. [\#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) +- Переименуйте и улучшите методы регрессии. [\#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) +- Более четкие интерфейсы поиска строк. [\#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Данила Кутенин](https://github.com/danlark1)) + +#### Исправление ошибок {#bug-fix-24} + +- Исправить потенциальную потерю данных в Kafka [\#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([Иван](https://github.com/abyss7)) +- Исправьте потенциальную бесконечную петлю в `PrettySpace` форматирование при вызове с нулевыми столбцами [\#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Исправлена ошибка переполнения UInt32 в линейных моделях. Разрешить eval ML-модель для аргумента неконстантной модели. [\#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- `ALTER TABLE ... DROP INDEX IF EXISTS ...` не следует вызывать исключение, если указанный индекс не существует [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Глеб Новиков](https://github.com/NanoBjorn)) +- Исправлена обработка выхода онлайн / оффлайн с `bitmapHasAny` в скалярном подзапросе [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Ю](https://github.com/yuzhichang)) +- Исправлена ошибка, когда пул соединений репликации не повторяет попытку разрешения узла, даже если кэш DNS был удален. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([алесапин](https://github.com/alesapin)) +- Исправлено `ALTER ... MODIFY TTL` на Реплицированном Mergetree. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([Антон Попов](https://github.com/CurtizJ)) +- Фиксированная вставка в распределенную таблицу с материализованной колонкой [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Азат Хужин](https://github.com/azat)) +- Исправление плохой запас, когда усекают присоединиться хранения [\#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) +- В последних версиях пакета tzdata некоторые файлы теперь являются символическими ссылками. Текущий механизм обнаружения часового пояса по умолчанию нарушается и дает неверные имена для некоторых часовых поясов. Теперь, по крайней мере, мы заставим имя часового пояса к содержимому TZ, если оно будет предоставлено. [\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Иван](https://github.com/abyss7)) +- Исправьте некоторые крайне редкие случаи с Многовольницким поисковиком, когда постоянные иглы в сумме имеют длину не менее 16 КБ. Алгоритм пропустил или переписал предыдущие результаты, что может привести к неправильному результату работы алгоритма. `multiSearchAny`. [\#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Данила Кутенин](https://github.com/danlark1)) +- Исправлена проблема, когда настройки для запросов ExternalData не могли использовать параметры ClickHouse. Кроме того, на данный момент настройки `date_time_input_format` и `low_cardinality_allow_in_native_format` не может использоваться из-за неоднозначности имен (во внешних данных это может быть интерпретировано как формат таблицы, а в запросе-как настройка). [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Данила Кутенин](https://github.com/danlark1)) +- Исправлена ошибка, когда детали удалялись только из FS, не сбрасывая их из Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([алесапин](https://github.com/alesapin)) +- Удалить ведение журнала отладки из протокола MySQL [\#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Пропустить ZNONODE во время обработки DDL запроса [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Азат Хужин](https://github.com/azat)) +- Фикс микс `UNION ALL` тип столбца результатов. Были случаи с несогласованными данными и типами столбцов результирующих столбцов. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Артем Зуйков](https://github.com/4ertus2)) +- Бросьте исключение на неправильные целые числа в `dictGetT` функции вместо сбоя. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка element\_count и load\_factor для хэшированного словаря в `system.dictionaries` стол. [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Азат Хужин](https://github.com/azat)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-9} + +- Исправлена сборка без `Brotli` Поддержка сжатия HTTP (`ENABLE_BROTLI=OFF` переменная cmake). [\#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Антон Южанинов](https://github.com/citrin)) +- Включая рев.ч как рев/рев.х [\#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Оривей Деш](https://github.com/orivej)) +- Исправьте предупреждения gcc9 в hyperscan (директива \# line-это зло!) [\#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Данила Кутенин](https://github.com/danlark1)) +- Исправьте все предупреждения при компиляции с gcc-9. Исправлены некоторые проблемы ВНО. Исправьте GCC9 ICE и отправьте его в bugzilla. [\#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Данила Кутенин](https://github.com/danlark1)) +- Фиксированная связь с LLD [\#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Удаление неиспользуемых специализаций в словарях [\#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Артем Зуйков](https://github.com/4ertus2)) +- Улучшение тестов производительности для форматирования и синтаксического анализа таблиц для различных типов файлов [\#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Ольга Хвостикова](https://github.com/stavrolia)) +- Исправления для параллельного выполнения тестов [\#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) +- Docker: используйте конфигурации из clickhouse-test [\#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) +- Исправлена компиляция для FreeBSD [\#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) +- Повышение уровня обновления до 1.70 [\#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) +- Исправлена сборка clickhouse как подмодуля [\#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) +- Улучшение тестов производительности JSONExtract [\#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([Виталий Баранов](https://github.com/vitlibar)) + +## ClickHouse релиз 19.8 {#clickhouse-release-19-8} + +### ClickHouse релиз 19.8.3.8, 2019-06-11 {#clickhouse-release-19-8-3-8-2019-06-11} + +#### Новые средства {#new-features} + +- Добавлены функции для работы с JSON [\#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [\#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([Виталий Баранов](https://github.com/vitlibar)) +- Добавьте функцию basename с аналогичным поведением к функции basename, которая существует во многих языках (`os.path.basename` в Python, `basename` in PHP, etc…). Work with both an UNIX-like path or a Windows path. [\#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([Гийом Тассери](https://github.com/YiuRULE)) +- Добавлен `LIMIT n, m BY` или `LIMIT m OFFSET n BY` синтаксис для задания смещения N для ограничения на предложение. [\#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([Антон Попов](https://github.com/CurtizJ)) +- Добавлен новый тип данных `SimpleAggregateFunction`, что позволяет иметь столбцы с легкой агрегацией в виде `AggregatingMergeTree`. Это может быть использовано только с простыми функциями, такими как `any`, `anyLast`, `sum`, `min`, `max`. [\#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Борис Гранво](https://github.com/bgranvea)) +- Добавлена поддержка непостоянных аргументов в функции `ngramDistance` [\#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Данила Кутенин](https://github.com/danlark1)) +- Добавленные функции `skewPop`, `skewSamp`, `kurtPop` и `kurtSamp` для вычисления асимметрии последовательности, асимметрии образца, эксцесса и эксцесса образца соответственно. [\#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) +- Поддержка переименования операции для `MaterializeView` место хранения. [\#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([Гийом Тассери](https://github.com/YiuRULE)) +- Добавлен сервер, который позволяет подключаться к ClickHouse с помощью клиента MySQL. [\#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([Юрий Баранов](https://github.com/yurriy)) +- Добавь `toDecimal*OrZero` и `toDecimal*OrNull` функции. [\#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Артем Зуйков](https://github.com/4ertus2)) +- Поддержка десятичных типов в функциях: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted`, medianExactWeighted. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавлен `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [\#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Данила Кутенин](https://github.com/danlark1)) +- Добавлен `format` функция. Форматирование константы pattern (упрощенный шаблон формата Python) со строками, перечисленными в аргументах. [\#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Данила Кутенин](https://github.com/danlark1)) +- Добавлен `system.detached_parts` таблица, содержащая информацию об отсоединенных частях `MergeTree` таблицы. [\#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([акузм](https://github.com/akuzm)) +- Добавлен `ngramSearch` функция для вычисления несимметричной разности между иглой и стогом сена. [\#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[\#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Данила Кутенин](https://github.com/danlark1)) +- Реализация основных методов машинного обучения (стохастическая линейная регрессия и логистическая регрессия) с использованием интерфейса агрегатных функций. Имеет различные стратегии обновления весов моделей (простой градиентный спуск, метод импульса, метод Нестерова). Также поддерживаются мини-пакеты нестандартного размера. [\#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) +- Реализация проекта `geohashEncode` и `geohashDecode` функции. [\#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([Василий Немков](https://github.com/Enmk)) +- Добавлена статистическая функция `timeSeriesGroupSum`, который может агрегировать различные временные ряды, которые выборка временных меток не выравнивается. Он будет использовать линейную интерполяцию между двумя временными метками выборки, а затем суммировать временные ряды вместе. Добавлена статистическая функция `timeSeriesGroupRateSum`, который вычисляет скорость временных рядов, а затем суммирует ставки вместе. [\#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([Янкуань Лю](https://github.com/LiuYangkuan)) +- Добавленные функции `IPv4CIDRtoIPv4Range` и `IPv6CIDRtoIPv6Range` рассчитать нижний и верхний пределы для IP в подсети с использованием бесклассовой междоменной маршрутизации. [\#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([Гийом Тассери](https://github.com/YiuRULE)) +- Добавьте заголовок X-ClickHouse-Summary, когда мы отправляем запрос с помощью HTTP с включенной настройкой `send_progress_in_http_headers`. Возвращает обычную информацию X-ClickHouse-Progress с дополнительной информацией, например, сколько строк и байтов было вставлено в запрос. [\#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([Гийом Тассери](https://github.com/YiuRULE)) + +#### Улучшения {#improvements} + +- Добавлен `max_parts_in_total` настройка для семейства таблиц MergeTree (по умолчанию: 100 000), которая предотвращает небезопасную спецификацию ключа раздела \#5166. [\#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `clickhouse-obfuscator`: выведите начальное значение для отдельных столбцов, объединив начальное значение с именем столбца, а не с позицией столбца. Это предназначено для преобразования наборов данных с несколькими связанными таблицами, чтобы таблицы оставались соединяемыми после преобразования. [\#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавленные функции `JSONExtractRaw`, `JSONExtractKeyAndValues`. Переименованные функции `jsonExtract` к `JSONExtract`. Когда что-то идет не так, эти функции возвращают соответствующие значения, а не наоборот. `NULL`. Модифицированная функция `JSONExtract`, теперь он получает возвращаемый тип из своего последнего параметра и не вводит nullables. Реализован резервный вариант для RapidJSON в случае, если инструкции AVX2 недоступны. Библиотека Simdjson обновлена до новой версии. [\#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([Виталий Баранов](https://github.com/vitlibar)) +- Сейчас `if` и `multiIf` функции не зависят от условий `Nullable`, но полагайтесь на ветви для обеспечения совместимости sql. [\#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([Цзянь Ву](https://github.com/janplus)) +- `In` теперь предикат генерирует `Null` результат от `Null` входные данные, такие как `Equal` функция. [\#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([Цзянь Ву](https://github.com/janplus)) +- Проверьте ограничение по времени для каждого (flush\_interval / poll\_timeout) числа строк из Kafka. Это позволяет чаще отрывать чтение от потребителя Кафки и проверять временные ограничения для потоков верхнего уровня [\#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([Иван](https://github.com/abyss7)) +- Соедините рдкафку с комплектным САСЛОМ. Это должно позволить использовать аутентификацию SASL SCRAM [\#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([Иван](https://github.com/abyss7)) +- Пакетная версия RowRefList для всех соединений. [\#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Артем Зуйков](https://github.com/4ertus2)) +- clickhouse-server: более информативное прослушивание сообщений об ошибках. [\#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) +- Поддержка словарей в clickhouse-copier для функций в `` [\#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) +- Добавить новую настройку `kafka_commit_every_batch` чтобы регулировать политику Кафки. + Он позволяет установить режим фиксации: после обработки каждой партии сообщений или после записи всего блока в хранилище. Это компромисс между потерей некоторых сообщений или чтением их дважды в некоторых экстремальных ситуациях. [\#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([Иван](https://github.com/abyss7)) +- Сделай `windowFunnel` поддержка других целочисленных типов без знака. [\#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([сундили](https://github.com/sundy-li)) +- Разрешить теневой виртуальный столбец `_table` в двигателе слияния. [\#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([Иван](https://github.com/abyss7)) +- Сделай `sequenceMatch` агрегатные функции поддерживают другие целочисленные типы без знака [\#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([сундили](https://github.com/sundy-li)) +- Лучше сообщения об ошибках, если несоответствие контрольной суммы, скорее всего, вызвано аппаратными сбоями. [\#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Проверьте, что базовые таблицы поддерживают выборку для `StorageMerge` [\#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([Иван](https://github.com/abyss7)) +- Сlose MySQL connections after their usage in external dictionaries. It is related to issue \#893. [\#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) +- Улучшения протокола MySQL Wire. Изменено имя формата на MySQLWire. Использование RAII для вызова RSA\_free. Отключение SSL, если контекст не может быть создан. [\#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([Юрий Баранов](https://github.com/yurriy)) +- clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, …). [\#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) +- Соблюдайте настройки запросов при асинхронных вставках в распределенные таблицы. [\#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) +- Переименованные функции `leastSqr` к `simpleLinearRegression`, `LinearRegression` к `linearRegression`, `LogisticRegression` к `logisticRegression`. [\#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([Николай Кочетов](https://github.com/KochetovNicolai)) + +#### Улучшения в производительности {#performance-improvements} + +- Распараллеливание обработки деталей невоспроизводимого MergeTree столы В изменить изменить запрос. [\#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([Иван Куш](https://github.com/IvanKush)) +- Оптимизация при извлечении регулярных выражений. [\#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [\#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Данила Кутенин](https://github.com/danlark1)) +- Не добавляйте правый ключевой столбец join к результату соединения, если он используется только в разделе join on. [\#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Артем Зуйков](https://github.com/4ertus2)) +- Заморозьте буфер Кафки после первого пустого ответа. Это позволяет избежать многократных обращений к `ReadBuffer::next()` для пустого результата в некоторых потоках разбора строк. [\#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([Иван](https://github.com/abyss7)) +- `concat` оптимизация функций для нескольких аргументов. [\#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Данила Кутенин](https://github.com/danlark1)) +- Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [\#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Артем Зуйков](https://github.com/4ertus2)) +- Обновите нашу реализацию LZ4 со ссылкой на нее, чтобы иметь более быструю декомпрессию. [\#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Данила Кутенин](https://github.com/danlark1)) +- Реализована сортировка MSD radix (на основе kxsort) и частичная сортировка. [\#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Евгений Правда](https://github.com/kvinty)) + +#### Устранение ошибок {#bug-fixes} + +- Исправить пуш требуют колонн с соединением [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлена ошибка, когда ClickHouse запускался systemd, команда `sudo service clickhouse-server forcerestart` он работал не так, как ожидалось. [\#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) +- Исправьте коды ошибок http в DataPartsExchange (interserver http server на порту 9009 всегда возвращал код 200, даже при ошибках). [\#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) +- Исправить SimpleAggregateFunction на более длительный строк, чем MAX\_SMALL\_STRING\_SIZE [\#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Азат Хужин](https://github.com/azat)) +- Исправьте ошибку для `Decimal` к `Nullable(Decimal)` конверсия в ин. Поддержка других десятичных и десятичных преобразований (включая различные масштабы). [\#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлено блокирование FPU в библиотеке simdjson, приводящее к неправильному вычислению `uniqHLL` и `uniqCombined` агрегатная функция и математические функции, такие как `log`. [\#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена обработка смешанных случаев const/nonconst в функциях JSON. [\#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([Виталий Баранов](https://github.com/vitlibar)) +- Чинить `retention` функция. Теперь все условия, которые удовлетворяют в строке данных, добавляются в состояние данных. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) +- Исправьте тип результата для `quantileExact` с десятичными дробями. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Документация {#documentation} + +- Перевести документацию для `CollapsingMergeTree` к китайцам. [\#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) +- Переведите некоторые документы о табличных движках на китайский язык. + [\#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) + [\#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) + ([никогда ли](https://github.com/neverlee)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements} + +- Исправьте некоторые отчеты о дезинфицирующих средствах, которые показывают вероятное использование после освобождения.[\#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [\#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [\#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([Иван](https://github.com/abyss7)) +- Для удобства переместите тесты производительности из отдельных каталогов. [\#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте неправильные тесты производительности. [\#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([алесапин](https://github.com/alesapin)) +- Добавлен инструмент для вычисления контрольных сумм, вызванных битовыми переворотами, для отладки аппаратных проблем. [\#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Сделайте сценарий runner более удобным для использования. [\#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[\#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([Филимонов](https://github.com/filimonov)) +- Добавьте небольшую инструкцию, как писать тесты производительности. [\#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([алесапин](https://github.com/alesapin)) +- Добавить возможность делать замены В создать, заполнить и запросов падение производительности тесты [\#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Ольга Хвостикова](https://github.com/stavrolia)) + +## ClickHouse релиз 19.7 {#clickhouse-release-19-7} + +### ClickHouse релиз 19.7.5.29, 2019-07-05 {#clickhouse-release-19-7-5-29-2019-07-05} + +#### Исправление ошибок {#bug-fix-25} + +- Исправьте регрессию производительности в некоторых запросах с помощью JOIN. [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Зимний Чжан](https://github.com/zhang2014)) + +### ClickHouse релиз 19.7.5.27, 2019-06-09 {#clickhouse-release-19-7-5-27-2019-06-09} + +#### Новые средства {#new-features-1} + +- Добавлены функции связанные с растровым изображением `bitmapHasAny` и `bitmapHasAll` по аналогии с `hasAny` и `hasAll` функции для массивов. [\#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([Сергей Владыкин](https://github.com/svladykin)) + +#### Устранение ошибок {#bug-fixes-1} + +- Исправлена обработка выхода онлайн / оффлайн на `minmax` Индекс с нулевым значением. [\#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([Никита Васильев](https://github.com/nikvas0)) +- Отметить все входные столбцы в пределе по мере необходимости выход. Это исправляет ‘Not found column’ ошибка в некоторых распределенных запросах. [\#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([Константин Сергеевич Пан](https://github.com/kvap)) +- Чинить «Column ‘0’ already exists» ошибка в работе `SELECT .. PREWHERE` на колонке с дефолтом [\#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) +- Чинить `ALTER MODIFY TTL` запрос на `ReplicatedMergeTree`. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([Антон Попов](https://github.com/CurtizJ)) +- Не разрушайте сервер, когда потребители Kafka не смогли запустить его. [\#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([Иван](https://github.com/abyss7)) +- Исправленные функции растрового изображения дают неверный результат. [\#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([Энди Янг](https://github.com/andyyzh)) +- Исправить element\_count для хэшированного словаря (не включать дубликаты) [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Азат Хужин](https://github.com/azat)) +- Используйте содержимое переменной окружения TZ в качестве имени для часового пояса. В некоторых случаях это помогает правильно определить часовой пояс по умолчанию.[\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Иван](https://github.com/abyss7)) +- Не пытайтесь конвертировать целые числа в `dictGetT` функции, потому что он не работает правильно. Вместо этого создайте исключение. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Артем Зуйков](https://github.com/4ertus2)) +- Фиксировать параметры в запрос внешних данных по протоколу HTTP. [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Данила + Кутенин](https://github.com/danlark1)) +- Исправлена ошибка, когда детали удалялись только из FS, не сбрасывая их из Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка сегментации в `bitmapHasAny` функция. [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Ю](https://github.com/yuzhichang)) +- Исправлена ошибка, когда пул соединений репликации не повторяет попытку разрешения узла, даже если кэш DNS был удален. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([алесапин](https://github.com/alesapin)) +- Исправлено `DROP INDEX IF EXISTS` запрос. Сейчас `ALTER TABLE ... DROP INDEX IF EXISTS ...` запрос не вызывает исключения, если указанный индекс не существует. [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Глеб Новиков](https://github.com/NanoBjorn)) +- Союз исправить все колонки супертипа. Были случаи с несогласованными данными и типами столбцов результирующих столбцов. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Артем Зуйков](https://github.com/4ertus2)) +- Пропустите ZNONODE во время обработки DDL-запроса. До того, как другой узел удалит znode в очереди задач, тот, который + не обработал его, но уже получил список детей,завершит поток DDLWorker. [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Азат Хужин](https://github.com/azat)) +- Исправлена вставка в таблицу Distributed () с материализованным столбцом. [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Азат Хужин](https://github.com/azat)) + +### ClickHouse релиз 19.7.3.9, 2019-05-30 {#clickhouse-release-19-7-3-9-2019-05-30} + +#### Новые средства {#new-features-2} + +- Разрешить ограничить диапазон настроек, которые могут быть заданы пользователем. + Эти ограничения можно настроить в профиле настроек пользователя. + [\#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([Виталий + Баранов](https://github.com/vitlibar)) +- Добавьте вторую версию функции `groupUniqArray` с дополнительным + `max_size` параметр, ограничивающий размер результирующего массива. Этот + поведение похоже на то, что `groupArray(max_size)(x)` функция. + [\#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([Гийом + Тассери](https://github.com/YiuRULE)) +- Для форматов входных файлов TSVWithNames/CSVWithNames порядок столбцов теперь может быть + определяется из заголовка файла. Это контролируется с помощью + `input_format_with_names_use_header` параметр. + [\#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) + ([Александр](https://github.com/Akazz)) + +#### Устранение ошибок {#bug-fixes-2} + +- Сбой с uncompressed\_cache + JOIN во время слияния (\#5197) + [\#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Данила + Кутенин](https://github.com/danlark1)) +- Ошибка сегментации при запросе clickhouse-клиента к системным таблицам. \#5066 + [\#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) + ([Иван](https://github.com/abyss7)) +- Потеря данных при большой нагрузке через KafkaEngine (\#4736) + [\#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) + ([Иван](https://github.com/abyss7)) +- Исправлено очень редкое состояние гонки данных, которое могло произойти при выполнении запроса с объединением всех, включающих по крайней мере два выбора из системы.колонны, система.таблицы, система.детали, система.parts\_tables или таблицы объединить семью и исполнительского изменять столбцы из связанных таблиц одновременно. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшения в производительности {#performance-improvements-1} + +- Используйте radix sort для сортировки по одному числовому столбцу в `ORDER BY` без + `LIMIT`. [\#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), + [\#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) + ([Евгений Правда](https://github.com/kvinty), + [Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Документация {#documentation-1} + +- Перевод документации для некоторых движков таблиц на китайский язык. + [\#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), + [\#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), + [\#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) + ([张风啸](https://github.com/AlexZFX)), + [\#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([никогда + Ли](https://github.com/neverlee)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-1} + +- Правильная печать символов UTF-8 в `clickhouse-test`. + [\#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) + ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте параметр командной строки для clickhouse-client, чтобы всегда загружать предложение + данные. [\#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) + ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Устраните некоторые предупреждения PVS-Studio. + [\#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) + ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Обновление LZ4 [\#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Данила + Кутенин](https://github.com/danlark1)) +- Добавьте gperf для построения требований к предстоящему запросу на вытягивание \#5030. + [\#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) + ([proller](https://github.com/proller)) + +## ClickHouse релиз 19.6 {#clickhouse-release-19-6} + +### ClickHouse релиз 19.6.3.18, 2019-06-13 {#clickhouse-release-19-6-3-18-2019-06-13} + +#### Устранение ошибок {#bug-fixes-3} + +- Исправлено в состоянии pushdown для запросов из табличных функций `mysql` и `odbc` и соответствующие табличные двигатели. Это исправляет \#3540 и \#2384. [\#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена тупиковая ситуация в Zookeeper. [\#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) +- Разрешить кавычки десятичных знаков в CSV. [\#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Артем Зуйков](https://github.com/4ertus2) +- Запретить преобразование из float Inf/NaN в десятичные дроби (исключение throw). [\#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена гонка данных в запросе переименования. [\#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([Зимний Чжан](https://github.com/zhang2014)) +- Временно отключите LFAlloc. Использование LFAlloc может привести к большому количеству MAP\_FAILED при выделении несжатого кэша и в результате к сбоям запросов на высоконагруженных серверах. [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Данила Кутенин](https://github.com/danlark1)) + +### ClickHouse релиз 19.6.2.11, 2019-05-13 {#clickhouse-release-19-6-2-11-2019-05-13} + +#### Новые средства {#new-features-3} + +- Выражения TTL для столбцов и таблиц. [\#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([Антон Попов](https://github.com/CurtizJ)) +- Добавлена поддержка для `brotli` сжатие для HTTP-ответов (Accept-Encoding: br) [\#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([Михаил](https://github.com/fandyushin)) +- Добавлена новая функция `isValidUTF8` для проверки правильности кодировки набора байтов в кодировке utf-8. [\#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Данила Кутенин](https://github.com/danlark1)) +- Добавление новой политики балансировки нагрузки `first_or_random` который отправляет запросы на первый указанный хост, а если он недоступен, то отправляет запросы на случайные хосты shard. Полезно для настройки топологии перекрестной репликации. [\#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) + +#### Экспериментальная возможность {#experimental-features-1} + +- Добавить настройку `index_granularity_bytes` (адаптивная степень детализации индекса) для семейства таблиц MergeTree\*. [\#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([алесапин](https://github.com/alesapin)) + +#### Улучшения {#improvements-1} + +- Добавлена поддержка непостоянных и отрицательных аргументов размера и длины для функции `substringUTF8`. [\#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Отключить push-вниз в правой таблице в левой присоединиться, левой таблицы в правую присоединиться, и обе таблицы полностью присоединиться. Это исправляет неправильные результаты соединения в некоторых случаях. [\#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([Иван](https://github.com/abyss7)) +- `clickhouse-copier`: автоматическая загрузка конфигурации задачи из `--task-file` вариант [\#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) +- Добавлен обработчик опечаток для фабрики хранения и фабрики табличных функций. [\#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Данила Кутенин](https://github.com/danlark1)) +- Поддержка звездочек и квалифицированных звездочек для нескольких соединений без вложенных запросов [\#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Артем Зуйков](https://github.com/4ertus2)) +- Сделайте сообщение об ошибке отсутствующего столбца более удобным для пользователя. [\#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Улучшения в производительности {#performance-improvements-2} + +- Значительное ускорение от присоединения [\#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Мартийн Баккер](https://github.com/Gladdy)) + +#### Назад Несовместимые Изменения {#backward-incompatible-changes} + +- Заголовок http `Query-Id` был переименован в `X-ClickHouse-Query-Id` для последовательности. [\#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([Михаил](https://github.com/fandyushin)) + +#### Устранение ошибок {#bug-fixes-4} + +- Исправлено потенциальное разыменование нулевого указателя в `clickhouse-copier`. [\#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) +- Исправлена ошибка запроса с соединением + массив присоединиться [\#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлено зависание при запуске сервера, когда словарь зависит от другого словаря через базу данных с engine=Dictionary. [\#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([Виталий Баранов](https://github.com/vitlibar)) +- Partially fix distributed\_product\_mode = local. It's possible to allow columns of local tables in where/having/order by/… via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [\#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправьте потенциально неправильный результат для `SELECT DISTINCT` с `JOIN` [\#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлено очень редкое состояние гонки данных, которое могло произойти при выполнении запроса с объединением всех, включающих по крайней мере два выбора из системы.колонны, система.таблицы, система.детали, система.parts\_tables или таблицы объединить семью и исполнительского изменять столбцы из связанных таблиц одновременно. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-2} + +- Исправлены ошибки тестирования при запуске clickhouse-сервера на другом хосте [\#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([Василий Немков](https://github.com/Enmk)) +- clickhouse-test: отключение последовательностей управления цветом в среде без tty. [\#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([алесапин](https://github.com/alesapin)) +- clickhouse-test: разрешить использование любой тестовой базы данных (удалить `test.` квалификация там, где это возможно) [\#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) +- Исправление ошибок утилиты [\#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([Виталий Баранов](https://github.com/vitlibar)) +- Yandex LFAlloc был добавлен в ClickHouse для выделения данных MarkCache и UncompressedCache различными способами для более надежного улавливания сегментов [\#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Данила Кутенин](https://github.com/danlark1)) +- Python util, чтобы помочь с backports и changelogs. [\#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([Иван](https://github.com/abyss7)) + +## ClickHouse релиз 19.5 {#clickhouse-release-19-5} + +### ClickHouse релиз 19.5.4.22, 2019-05-13 {#clickhouse-release-19-5-4-22-2019-05-13} + +#### Устранение ошибок {#bug-fixes-5} + +- Исправлена возможная ошибка в функциях bitmap\* [\#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [\#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([Энди Янг](https://github.com/andyyzh)) +- Исправлено очень редкое состояние гонки данных, которое могло произойти при выполнении запроса с объединением всех, включающих по крайней мере два выбора из системы.колонны, система.таблицы, система.детали, система.parts\_tables или таблицы объединить семью и исполнительского изменять столбцы из связанных таблиц одновременно. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправления ошибок `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. Эта ошибка произошла, если столбец LowCardinality был частью первичного ключа. \#5031 [\#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Модификация функции удержания: если строка удовлетворяет как первому, так и N-му условию, то в состояние данных добавляется только первое удовлетворенное условие. Теперь все условия, которые удовлетворяют в строке данных, добавляются в состояние данных. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) + +### ClickHouse релиз 19.5.3.8, 2019-04-18 {#clickhouse-release-19-5-3-8-2019-04-18} + +#### Устранение ошибок {#bug-fixes-6} + +- Фиксированный тип установки `max_partitions_per_insert_block` из булев тип uint64. [\#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([Мохаммад Хосейн Сехават](https://github.com/mhsekhavat)) + +### ClickHouse релиз 19.5.2.6, 2019-04-15 {#clickhouse-release-19-5-2-6-2019-04-15} + +#### Новые средства {#new-features-4} + +- [Гиперскан](https://github.com/intel/hyperscan) было добавлено несколько совпадений регулярных выражений (функции `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [\#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Данила Кутенин](https://github.com/danlark1)) +- `multiSearchFirstPosition` была добавлена функция. [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Данила Кутенин](https://github.com/danlark1)) +- Реализуйте предварительно заданный фильтр выражений для каждой строки таблиц. [\#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([Иван](https://github.com/abyss7)) +- Новый тип индексов пропуска данных на основе фильтров Блума (может использоваться для `equal`, `in` и `like` должностные обязанности). [\#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([Никита Васильев](https://github.com/nikvas0)) +- Добавлен `ASOF JOIN` что позволяет запускать запросы, которые присоединяются к самому последнему известному значению. [\#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [\#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [\#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [\#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Мартийн Баккер](https://github.com/Gladdy), [Артем Зуйков](https://github.com/4ertus2)) +- Переписать несколько раз `COMMA JOIN` к `CROSS JOIN`. Затем перепишите их на `INNER JOIN` если можно. [\#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Улучшение {#improvement-9} + +- `topK` и `topKWeighted` теперь поддерживает пользовательские `loadFactor` (Исправлена проблема [\#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [\#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([Кирилл Даньшин](https://github.com/kirillDanshin)) +- Разрешить использовать `parallel_replicas_count > 1` даже для таблиц без выборки (настройка для них просто игнорируется). В предыдущих версиях это приводило к исключениям. [\#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Алексей Елыманов](https://github.com/digitalist)) +- Поддержка `CREATE OR REPLACE VIEW`. Позволяет создать представление или задать новое определение в одном операторе. [\#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Борис Гранво](https://github.com/bgranvea)) +- `Buffer` движок таблицы теперь поддерживает `PREWHERE`. [\#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([Янкуань Лю](https://github.com/LiuYangkuan)) +- Добавить возможность запуска реплицированной таблицы без метаданных в zookeeper in `readonly` режим. [\#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([алесапин](https://github.com/alesapin)) +- Исправлено мерцание индикатора выполнения в clickhouse-клиенте. Эта проблема была наиболее заметна при использовании `FORMAT Null` с потоковыми запросами. [\#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Разрешить отключать функции с помощью `hyperscan` библиотека на основе каждого пользователя, чтобы ограничить потенциально чрезмерное и неконтролируемое использование ресурсов. [\#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте номер версии, регистрирующий все ошибки. [\#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) +- Добавлено ограничение на использование `multiMatch` функции, которые требуют размера строки, чтобы вписаться в `unsigned int`. Также добавлено ограничение по количеству аргументов для `multiSearch` функции. [\#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Данила Кутенин](https://github.com/danlark1)) +- Улучшено использование пространства царапин и обработка ошибок в Hyperscan. [\#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Данила Кутенин](https://github.com/danlark1)) +- Заполнить `system.graphite_detentions` из таблицы config of `*GraphiteMergeTree` столы для двигателей. [\#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- Переименовать `trigramDistance` функция к `ngramDistance` и добавьте больше функций с помощью `CaseInsensitive` и `UTF`. [\#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Данила Кутенин](https://github.com/danlark1)) +- Улучшен расчет индексов пропуска данных. [\#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([Никита Васильев](https://github.com/nikvas0)) +- Держать обычные, `DEFAULT`, `MATERIALIZED` и `ALIAS` столбцы в одном списке (Исправлена проблема [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Алексей Зателепин](https://github.com/ztlpn)) + +#### Исправление ошибок {#bug-fix-26} + +- Избегать `std::terminate` в случае сбоя выделения памяти. Сейчас `std::bad_alloc` исключение создается, как и ожидалось. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено чтение capnproto из буфера. Иногда файлы не были успешно загружены по протоколу HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Владислав](https://github.com/smirnov-vs)) +- Исправления ошибок `Unknown log entry type: 0` после `OPTIMIZE TABLE FINAL` запрос. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Амос Птица](https://github.com/amosbird)) +- Неверные аргументы, чтобы `hasAny` или `hasAll` функции может привести к обработка выхода онлайн / оффлайн. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Взаимоблокировка может произойти во время выполнения `DROP DATABASE dictionary` запрос. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить неопределенное поведение в `median` и `quantile` функции. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) +- Исправьте обнаружение уровня сжатия, когда `network_compression_method` в нижнем регистре. Разбитые в в19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) +- Фиксированное незнание `UTC` настройка (Исправлена проблема [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) +- Чинить `histogram` поведение функции с помощью `Distributed` таблицы. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) +- Исправлен отчет Цан `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен отчет TSan о завершении работы из-за состояния гонки в использовании системных журналов. Исправлено потенциальное использование-после освобождения при выключении, когда включен part\_log. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить перепроверять детали в `ReplicatedMergeTreeAlterThread` в случае ошибки. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Арифметические операции над промежуточными состояниями агрегатной функции не работали для постоянных аргументов (таких как результаты подзапросов). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Всегда делайте обратные кавычки имен столбцов в метаданных. В противном случае невозможно создать таблицу с именем столбца `index` (сервер не будет перезапущен из-за неправильной формы `ATTACH` запрос в метаданных). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить сбой в работе `ALTER ... MODIFY ORDER BY` на `Distributed` стол. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) +- Исправлена обработка выхода онлайн / оффлайн в `JOIN ON` с включенной функцией `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлена ошибка с добавлением посторонней строки после использования сообщения protobuf от Кафки. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправить аварию `JOIN` на не значение столбца против значение null. Чинить `NULLs` в правой клавиш в `ANY JOIN` + `join_use_nulls`. [\#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +- Исправлено состояние гонки в `SELECT` от `system.tables` если таблица переименована или изменена одновременно. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена гонка данных при извлечении части данных, которая уже устарела. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена редкая гонка данных, которая может произойти во время `RENAME` таблица семейства MergeTree. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка сегментации в функции `arrayIntersect`. Ошибка сегментации может произойти, если функция вызывается со смешанными постоянными и обычными аргументами. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Лисян Цянь](https://github.com/fancyqlx)) +- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправить сбой в работе `FULL/RIGHT JOIN` когда мы присоединились на обнуляемой против не допускает значения null. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Артем Зуйков](https://github.com/4ertus2)) +- Чинить `No message received` исключение при извлечении деталей между репликами. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([алесапин](https://github.com/alesapin)) +- Исправлено `arrayIntersect` неправильный результат функции в случае нескольких повторяющихся значений в одном массиве. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправьте состояние гонки во время параллельной работы `ALTER COLUMN` запросы, которые могут привести к сбою сервера (Исправлена проблема [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправьте неправильный результат в `FULL/RIGHT JOIN` с колонкой const. [\#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправьте дубликаты внутри `GLOBAL JOIN` с Asterisk. [\#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправить вычет параметров в `ALTER MODIFY` из колонки `CODEC` если тип столбца не указан. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([алесапин](https://github.com/alesapin)) +- Функции `cutQueryStringAndFragment()` и `queryStringAndFragment()` теперь работает правильно, когда `URL` содержит фрагмент и не содержит запроса. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправлена редкая ошибка при настройке `min_bytes_to_use_direct_io` больше нуля, что происходит, когда поток должен искать назад в файле столбца. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([алесапин](https://github.com/alesapin)) +- Исправьте неправильные типы аргументов для агрегатных функций с помощью `LowCardinality` аргументы (Исправлена проблема [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправить неправильное имя квалификация в `GLOBAL JOIN`. [\#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Артем Зуйков](https://github.com/4ertus2)) +- Фиксированная функция `toISOWeek` результат за 1970 год. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Чинить `DROP`, `TRUNCATE` и `OPTIMIZE` дублирование запросов при выполнении на `ON CLUSTER` для `ReplicatedMergeTree*` столы семейные. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([алесапин](https://github.com/alesapin)) + +#### Назад Несовместимые Изменения {#backward-incompatible-change-8} + +- Переименовать настройки `insert_sample_with_metadata` ставить `input_format_defaults_for_omitted_fields`. [\#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Артем Зуйков](https://github.com/4ertus2)) +- Добавлена настройка `max_partitions_per_insert_block` (со значением 100 по умолчанию). Если вставленный блок содержит большее количество разделов, то возникает исключение. Установите его равным 0, если вы хотите удалить ограничение (не рекомендуется). [\#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Функции мульти-поиска были переименованы (`multiPosition` к `multiSearchAllPositions`, `multiSearch` к `multiSearchAny`, `firstMatch` к `multiSearchFirstIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Данила Кутенин](https://github.com/danlark1)) + +#### Улучшение производительности {#performance-improvement-6} + +- Оптимизировать Volnitsky поисковик путем встраивания, дающая около 5-10% улучшение поиска по запросам со многими иглами или много схожих биграмм. [\#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Данила Кутенин](https://github.com/danlark1)) +- Исправлена проблема производительности при настройке `use_uncompressed_cache` больше нуля, который появился при считывании всех данных, содержащихся в кэше. [\#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([алесапин](https://github.com/alesapin)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-10} + +- Упрочнение отладочной сборки: более детализированные сопоставления памяти и ASLR; добавление защиты памяти для кэша меток и индекса. Это позволяет найти больше ошибок топтания памяти в случае, когда ASan и MSan не могут этого сделать. [\#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте поддержку переменных cmake `ENABLE_PROTOBUF`, `ENABLE_PARQUET` и `ENABLE_BROTLI` который позволяет включить/выключить выше особенностей (так же, как мы можем сделать для librdkafka, MySQL и т. д). [\#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Сильвиу Развивается](https://github.com/silviucpp)) +- Добавьте возможность печати списка процессов и stacktraces всех потоков, если некоторые запросы зависли после тестового запуска. [\#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([алесапин](https://github.com/alesapin)) +- Добавить повторные попытки ВКЛ `Connection loss` ошибка в работе `clickhouse-test`. [\#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([алесапин](https://github.com/alesapin)) +- Добавьте build FreeBSD с Vagrant и построить с резьбой дезинфицирующее средство на упаковщик скриптов. [\#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [\#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([алесапин](https://github.com/alesapin)) +- Теперь пользователь запросил пароль для пользователя `'default'` во время установки. [\#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) +- Подавить предупреждение в `rdkafka` библиотека. [\#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Разрешить возможность сборки без ssl. [\#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) +- Добавьте способ запуска образа clickhouse-server от пользовательского пользователя. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- Обновите contrib boost до 1.69. [\#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) +- Отключить использование `mremap` при компиляции с помощью дезинфицирующего средства для нитей. Как ни странно, Цан не перехватил его `mremap` (хотя это действительно перехват `mmap`, `munmap`) это приводит к ложным срабатываниям. Исправлен отчет TSan в тестах с сохранением состояния. [\#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте тестовую проверку с использованием схемы формата через HTTP-интерфейс. [\#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([Виталий Баранов](https://github.com/vitlibar)) + +## ClickHouse релиз 19.4 {#clickhouse-release-19-4} + +### ClickHouse релиз 19.4.4.33, 2019-04-17 {#clickhouse-release-19-4-4-33-2019-04-17} + +#### Устранение ошибок {#bug-fixes-7} + +- Избегать `std::terminate` в случае сбоя выделения памяти. Сейчас `std::bad_alloc` исключение создается, как и ожидалось. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено чтение capnproto из буфера. Иногда файлы не были успешно загружены по протоколу HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Владислав](https://github.com/smirnov-vs)) +- Исправления ошибок `Unknown log entry type: 0` после `OPTIMIZE TABLE FINAL` запрос. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Амос Птица](https://github.com/amosbird)) +- Неверные аргументы, чтобы `hasAny` или `hasAll` функции может привести к обработка выхода онлайн / оффлайн. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Взаимоблокировка может произойти во время выполнения `DROP DATABASE dictionary` запрос. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить неопределенное поведение в `median` и `quantile` функции. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) +- Исправьте обнаружение уровня сжатия, когда `network_compression_method` в нижнем регистре. Разбитые в в19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) +- Фиксированное незнание `UTC` настройка (Исправлена проблема [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) +- Чинить `histogram` поведение функции с помощью `Distributed` таблицы. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) +- Исправлен отчет Цан `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен отчет TSan о завершении работы из-за состояния гонки в использовании системных журналов. Исправлено потенциальное использование-после освобождения при выключении, когда включен part\_log. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить перепроверять детали в `ReplicatedMergeTreeAlterThread` в случае ошибки. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Арифметические операции над промежуточными состояниями агрегатной функции не работали для постоянных аргументов (таких как результаты подзапросов). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Всегда делайте обратные кавычки имен столбцов в метаданных. В противном случае невозможно создать таблицу с именем столбца `index` (сервер не будет перезапущен из-за неправильной формы `ATTACH` запрос в метаданных). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправить сбой в работе `ALTER ... MODIFY ORDER BY` на `Distributed` стол. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) +- Исправлена обработка выхода онлайн / оффлайн в `JOIN ON` с включенной функцией `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлена ошибка с добавлением посторонней строки после использования сообщения protobuf от Кафки. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +- Исправлено состояние гонки в `SELECT` от `system.tables` если таблица переименована или изменена одновременно. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена гонка данных при извлечении части данных, которая уже устарела. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена редкая гонка данных, которая может произойти во время `RENAME` таблица семейства MergeTree. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка сегментации в функции `arrayIntersect`. Ошибка сегментации может произойти, если функция вызывается со смешанными постоянными и обычными аргументами. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Лисян Цянь](https://github.com/fancyqlx)) +- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Чинить `No message received` исключение при извлечении деталей между репликами. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([алесапин](https://github.com/alesapin)) +- Исправлено `arrayIntersect` неправильный результат функции в случае нескольких повторяющихся значений в одном массиве. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправьте состояние гонки во время параллельной работы `ALTER COLUMN` запросы, которые могут привести к сбою сервера (Исправлена проблема [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправьте вычет параметров в `ALTER MODIFY` из колонки `CODEC` если тип столбца не указан. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([алесапин](https://github.com/alesapin)) +- Функции `cutQueryStringAndFragment()` и `queryStringAndFragment()` теперь работает правильно, когда `URL` содержит фрагмент и не содержит запроса. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправлена редкая ошибка при настройке `min_bytes_to_use_direct_io` больше нуля, что происходит, когда поток должен искать назад в файле столбца. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([алесапин](https://github.com/alesapin)) +- Исправьте неправильные типы аргументов для агрегатных функций с помощью `LowCardinality` аргументы (Исправлена проблема [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Фиксированная функция `toISOWeek` результат за 1970 год. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Чинить `DROP`, `TRUNCATE` и `OPTIMIZE` дублирование запросов при выполнении на `ON CLUSTER` для `ReplicatedMergeTree*` столы семейные. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([алесапин](https://github.com/alesapin)) + +#### Улучшения {#improvements-2} + +- Держать обычные, `DEFAULT`, `MATERIALIZED` и `ALIAS` столбцы в одном списке (Исправлена проблема [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Алексей Зателепин](https://github.com/ztlpn)) + +### ClickHouse релиз 19.4.3.11, 2019-04-02 {#clickhouse-release-19-4-3-11-2019-04-02} + +#### Устранение ошибок {#bug-fixes-8} + +- Исправить сбой в работе `FULL/RIGHT JOIN` когда мы присоединились на обнуляемой против не допускает значения null. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-11} + +- Добавьте способ запуска образа clickhouse-server от пользовательского пользователя. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) + +### ClickHouse релиз 19.4.2.7, 2019-03-30 {#clickhouse-release-19-4-2-7-2019-03-30} + +#### Устранение ошибок {#bug-fixes-9} + +- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) + +### ClickHouse релиз 19.4.1.3, 2019-03-19 {#clickhouse-release-19-4-1-3-2019-03-19} + +#### Устранение ошибок {#bug-fixes-10} + +- Исправлены удаленные запросы, содержащие и то, и другое `LIMIT BY` и `LIMIT`. Ранее, если `LIMIT BY` и `LIMIT` были использованы для удаленного запроса, `LIMIT` может случиться и раньше `LIMIT BY`, что привело к слишком отфильтрованному результату. [\#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([Константин Сергеевич Пан](https://github.com/kvap)) + +### ClickHouse релиз 19.4.0.49, 2019-03-09 {#clickhouse-release-19-4-0-49-2019-03-09} + +#### Новые средства {#new-features-5} + +- Добавлена полная поддержка для `Protobuf` формат (ввод и вывод, вложенные структуры данных). [\#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [\#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([Виталий Баранов](https://github.com/vitlibar)) +- Добавлены растровые функции с ревущими растровыми изображениями. [\#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([Энди Янг](https://github.com/andyyzh)) [\#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([Виталий Баранов](https://github.com/vitlibar)) +- Поддержка формата паркета. [\#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) +- Для сравнения нечетких строк было добавлено расстояние N-грамм. Это похоже на Q-граммовые метрики в языке R. [\#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Данила Кутенин](https://github.com/danlark1)) +- Комбинируйте правила для свертки графита из выделенных шаблонов агрегации и хранения. [\#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- Добавлен `max_execution_speed` и `max_execution_speed_bytes` чтобы ограничить использование ресурсов. Добавлен `min_execution_speed_bytes` установка в дополнение к `min_execution_speed`. [\#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([Зимний Чжан](https://github.com/zhang2014)) +- Реализованная функция `flatten`. [\#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [\#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([Алексей-Миловидов](https://github.com/alexey-milovidov), [произв](https://github.com/kzon)) +- Добавленные функции `arrayEnumerateDenseRanked` и `arrayEnumerateUniqRanked` (это как будто `arrayEnumerateUniq` но позволяет точно настроить глубину массива, чтобы заглянуть внутрь многомерных массивов). [\#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [\#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/… [\#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Устранение ошибок {#bug-fixes-11} + +- Этот релиз также содержит все исправления ошибок из 19.3 и 19.1. +- Исправлена ошибка в индексах пропуска данных: неправильный порядок гранул после вставки. [\#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([Никита Васильев](https://github.com/nikvas0)) +- Исправлено `set` индекс для `Nullable` и `LowCardinality` столбцы. Перед ним, `set` индекс с `Nullable` или `LowCardinality` колонка привела к ошибке `Data type must be deserialized with multiple streams` во время выбора. [\#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Правильно установите update\_time на полный `executable` обновление словаря. [\#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Тема Новиков](https://github.com/temoon)) +- Исправлена поломка индикатора выполнения в 19.3. [\#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([Филимонов](https://github.com/filimonov)) +- Исправлены несогласованные значения MemoryTracker при сжатии области памяти, в некоторых случаях. [\#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено неопределенное поведение в ThreadPool. [\#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена очень редкая ошибка с сообщением `mutex lock failed: Invalid argument` это может произойти, когда таблица MergeTree была удалена одновременно с SELECT. [\#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Алексей Зателепин](https://github.com/ztlpn)) +- Совместимость драйвера ODBC с `LowCardinality` тип данных. [\#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) +- FreeBSD: исправление для `AIOcontextPool: Found io_event with unknown id 0` ошибка. [\#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) +- `system.part_log` таблица была создана независимо от конфигурации. [\#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте неопределенное поведение в `dictIsIn` функция для словарей кэша. [\#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([алесапин](https://github.com/alesapin)) +- Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [\#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Алексей Зателепин](https://github.com/ztlpn)) +- Отключите compile\_expressions по умолчанию, пока мы не получим собственные `llvm` contrib и может проверить его с помощью `clang` и `asan`. [\#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([алесапин](https://github.com/alesapin)) +- Предотвращать `std::terminate` когда `invalidate_query` для `clickhouse` внешний источник словаря вернул неверный результирующий набор (пустой или более одной строки или более одного столбца). Исправлена проблема, когда `invalidate_query` выполнялось каждые пять секунд независимо от `lifetime`. [\#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Избегайте тупиковой ситуации, когда `invalidate_query` для словаря с `clickhouse` источник был задействован `system.dictionaries` таблица или `Dictionaries` база данных (редкий случай). [\#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено перекрестное соединение с пустым местом. [\#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена обработка выхода онлайн / оффлайн в функции «replicate» когда передается постоянный аргумент. [\#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте лямбда-функцию с помощью оптимизатора предикатов. [\#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([Зимний Чжан](https://github.com/zhang2014)) +- Несколько соединений несколько исправлений. [\#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Улучшения {#improvements-3} + +- Поддержка псевдонимов в разделе JOIN ON для правых столбцов таблицы. [\#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Артем Зуйков](https://github.com/4ertus2)) +- Результат нескольких соединений требует правильных имен результатов, которые будут использоваться в подсекциях. В результате замените плоские псевдонимы именами источников. [\#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Артем Зуйков](https://github.com/4ertus2)) +- Улучшить нажимаем-вниз-логика вступила заявления. [\#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([Иван](https://github.com/abyss7)) + +#### Улучшения в производительности {#performance-improvements-3} + +- Улучшенная эвристика «move to PREWHERE» оптимизация. [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Используйте правильные таблицы поиска, которые используют API HashTable для 8-битных и 16-битных ключей. [\#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([Амос Птица](https://github.com/amosbird)) +- Улучшена производительность сравнения строк. [\#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Очистите распределенную очередь DDL в отдельном потоке, чтобы она не замедляла основной цикл, обрабатывающий распределенные задачи DDL. [\#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Алексей Зателепин](https://github.com/ztlpn)) +- Когда `min_bytes_to_use_direct_io` имеет значение 1, не каждый файл был открыт в режиме O\_DIRECT, потому что размер данных для чтения иногда недооценивался размером одного сжатого блока. [\#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-12} + +- Добавлена поддержка clang-9 [\#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправь ошибку `__asm__` инструкции (опять же) [\#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Константин Подшумок](https://github.com/podshumok)) +- Добавить возможность задавать настройки для `clickhouse-performance-test` из командной строки. [\#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([алесапин](https://github.com/alesapin)) +- Добавьте тесты словарей в интеграционные тесты. [\#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([алесапин](https://github.com/alesapin)) +- Добавлены запросы от бенчмарка на веб-сайте к автоматизированным тестам производительности. [\#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `xxhash.h` не существует во внешнем lz4, потому что это деталь реализации, и ее символы находятся в пространстве имен с `XXH_NAMESPACE` макрос. Когда lz4 является внешним, xxHash также должен быть внешним, и зависимые должны быть связаны с ним. [\#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Оривей Деш](https://github.com/orivej)) +- Исправлен случай, когда `quantileTiming` агрегатная функция может быть вызвана с отрицательным или плавающим аргументом (это исправляет тест fuzz с неопределенным поведением дезинфицирующего средства). [\#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправление орфографических ошибок. [\#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([сдк2](https://github.com/sdk2)) +- Исправлена компиляция на Mac. [\#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([Виталий Баранов](https://github.com/vitlibar)) +- Исправления сборки для FreeBSD и различных необычных конфигураций сборки. [\#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) + +## ClickHouse релиз 19.3 {#clickhouse-release-19-3} + +### ClickHouse релиз 19.3.9.1, 2019-04-02 {#clickhouse-release-19-3-9-1-2019-04-02} + +#### Устранение ошибок {#bug-fixes-12} + +- Исправить сбой в работе `FULL/RIGHT JOIN` когда мы присоединились на обнуляемой против не допускает значения null. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) + +#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-13} + +- Добавьте способ запуска образа clickhouse-server от пользовательского пользователя [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) + +### ClickHouse релиз 19.3.7, 2019-03-12 {#clickhouse-release-19-3-7-2019-03-12} + +#### Устранение ошибок {#bug-fixes-13} + +- Исправлена ошибка в \#3920. Эта ошибка проявляется как случайное повреждение кэша (сообщения `Unknown codec family code`, `Cannot seek through file`) и segfaults. Эта ошибка впервые появилась в версии 19.1 и присутствует в версиях до 19.1.10 и 19.3.6. [\#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.3.6, 2019-03-02 {#clickhouse-release-19-3-6-2019-03-02} + +#### Устранение ошибок {#bug-fixes-14} + +- При наличии более 1000 потоков в пуле, `std::terminate` может произойти на выходе из потока. [Азат Хужин](https://github.com/azat) [\#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [\#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Теперь это можно создать `ReplicatedMergeTree*` таблицы с комментариями к столбцам без значений по умолчанию и таблицы со столбцами кодеки без комментариев и значений по умолчанию. Также исправлено сравнение кодеков. [\#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка при соединении с массивом или кортежем. [\#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка в работе clickhouse-копировальной машины с сообщением `ThreadStatus not created`. [\#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлено зависание при выключении сервера, если использовались распределенные DDLs. [\#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Алексей Зателепин](https://github.com/ztlpn)) +- Неверные номера столбцов были напечатаны в сообщении об ошибке о синтаксическом анализе текстового формата для столбцов с числом больше 10. [\#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-3} + +- Исправлена сборка с включенным AVX. [\#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Включите расширенный учет и учет ввода-вывода на основе хорошо известной версии вместо ядра, под которым он компилируется. [\#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([nvartolomei](https://github.com/nvartolomei)) +- Разрешить пропустить настройку core\_dump.size\_limit, предупреждение, а не бросать, если лимита не получится. [\#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) +- Удалил то `inline` метки из `void readBinary(...)` в `Field.cpp`. Также объединены избыточные `namespace DB` блоки. [\#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) + +### ClickHouse релиз 19.3.5, 2019-02-21 {#clickhouse-release-19-3-5-2019-02-21} + +#### Устранение ошибок {#bug-fixes-15} + +- Исправлена ошибка с обработкой больших запросов вставки http. [\#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([алесапин](https://github.com/alesapin)) +- Исправлена обратная несовместимость со старыми версиями из-за неправильной реализации `send_logs_level` установка. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена обратная несовместимость функции таблицы `remote` введено с комментариями к колонке. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.3.4, 2019-02-16 {#clickhouse-release-19-3-4-2019-02-16} + +#### Улучшения {#improvements-4} + +- Размер индекса таблицы не учитывается для ограничений памяти при выполнении `ATTACH TABLE` запрос. Избегайте возможности того, что стол не может быть прикреплен после отсоединения. [\#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Немного повышен лимит на максимальный размер строки и массива, полученный от ZooKeeper. Это позволяет продолжать работу с увеличенным размером `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` на смотрителя зоопарка. [\#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Разрешить восстанавливать брошенную реплику, даже если она уже имеет огромное количество узлов в своей очереди. [\#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавьте один обязательный аргумент к `SET` индекс (максимальное количество хранимых строк). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Никита Васильев](https://github.com/nikvas0)) + +#### Устранение ошибок {#bug-fixes-16} + +- Исправлено `WITH ROLLUP` результат для группы по одиночке `LowCardinality` ключ. [\#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Исправлена ошибка в заданном индексе (удаление гранулы, если она содержит более `max_rows` грядки). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Никита Васильев](https://github.com/nikvas0)) +- Множество исправлений для сборки FreeBSD. [\#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) +- Исправлена подстановка псевдонимов в запросах с подзапросом, содержащим один и тот же псевдоним (проблема [\#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [\#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-4} + +- Добавить возможность запуска `clickhouse-server` для тестов без состояния в образе docker. [\#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([Василий Немков](https://github.com/Enmk)) + +### ClickHouse релиз 19.3.3, 2019-02-13 {#clickhouse-release-19-3-3-2019-02-13} + +#### Новые средства {#new-features-6} + +- Добавил тот `KILL MUTATION` оператор, который позволяет удалять мутации, которые по каким-то причинам застряли. Добавлен `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` поля к тому же `system.mutations` таблица для более легкого устранения неполадок. [\#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Алексей Зателепин](https://github.com/ztlpn)) +- Добавлена статистическая функция `entropy` который вычисляет энтропию Шеннона. [\#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) +- Добавлена возможность отправлять запросы `INSERT INTO tbl VALUES (....` к серверу без разделения на `query` и `data` части. [\#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([алесапин](https://github.com/alesapin)) +- Общая реализация проекта `arrayWithConstant` была добавлена функция. [\#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Реализованный `NOT BETWEEN` оператор сравнения. [\#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Дмитрий Наумов](https://github.com/nezed)) +- Осуществлять `sumMapFiltered` для того чтобы иметь возможность ограничить количество ключей для которых значения будут суммироваться по формуле `sumMap`. [\#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +- Добавлена поддержка `Nullable` напечатать `mysql` табличная функция. [\#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) +- Поддержка произвольных константных выражений в `LIMIT` пункт. [\#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) +- Добавлен `topKWeighted` агрегатная функция, принимающая дополнительный аргумент с весом (целое число без знака). [\#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([Андрей Гольман](https://github.com/andrewgolman)) +- `StorageJoin` теперь поддерживать `join_any_take_last_row` настройка, позволяющая перезаписать существующие значения одного и того же ключа. [\#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([Амос Птица](https://github.com/amosbird) +- Добавлена функция `toStartOfInterval`. [\#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([Виталий Баранов](https://github.com/vitlibar)) +- Добавлен `RowBinaryWithNamesAndTypes` формат. [\#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Козлюк Олег Викторович](https://github.com/DarkWanderer)) +- Добавлен `IPv4` и `IPv6` тип данных. Более эффективное внедрение `IPv*` функции. [\#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([Василий Немков](https://github.com/Enmk)) +- Добавлена функция `toStartOfTenMinutes()`. [\#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([Виталий Баранов](https://github.com/vitlibar)) +- Добавлен `Protobuf` выходной формат. [\#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [\#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([Виталий Баранов](https://github.com/vitlibar)) +- Добавлена поддержка brotli для HTTP-интерфейса для импорта данных (вставки). [\#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([Михаил](https://github.com/fandyushin)) +- Добавлены подсказки, когда пользователь делает опечатку в имени функции или вводит клиент командной строки. [\#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Данила Кутенин](https://github.com/danlark1)) +- Добавлен `Query-Id` к заголовку HTTP-ответа сервера. [\#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([Михаил](https://github.com/fandyushin)) + +#### Экспериментальная возможность {#experimental-features-2} + +- Добавлен `minmax` и `set` индексы пропуска данных для семейства движков таблиц MergeTree. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Никита Васильев](https://github.com/nikvas0)) +- Добавлено преобразование из `CROSS JOIN` к `INNER JOIN` если можно. [\#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [\#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Устранение ошибок {#bug-fixes-17} + +- Исправлено `Not found column` для повторяющихся столбцов в `JOIN ON` раздел. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Артем Зуйков](https://github.com/4ertus2)) +- Сделай `START REPLICATED SENDS` команда начала репликации отправляет. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) +- Фиксированное выполнение агрегатных функций с помощью `Array(LowCardinality)` аргументы. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Исправлено неправильное поведение при выполнении `INSERT ... SELECT ... FROM file(...)` запрос и файл имеет `CSVWithNames` или `TSVWIthNames` формат и первая строка данных отсутствуют. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка при перезагрузке словаря, если словарь недоступен. Эта ошибка появилась в 19.1.6. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) +- Исправлено `ALL JOIN` с дубликатами в правой таблице. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка сегментации с помощью `use_uncompressed_cache=1` и исключение с неправильным несжатым размером. Эта ошибка появилась в 19.1.6. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([алесапин](https://github.com/alesapin)) +- Исправлено `compile_expressions` ошибка с сопоставлением больших (более int16) дат. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([алесапин](https://github.com/alesapin)) +- Исправлена бесконечная петля при выборе функции из таблицы `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Временно отключите оптимизацию предикатов для `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлено `Illegal instruction` ошибка при использовании функций base64 на старых процессорах. Эта ошибка была воспроизведена только тогда, когда ClickHouse был скомпилирован с помощью gcc-8. [\#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено `No message received` ошибка при взаимодействии с драйвером PostgreSQL ODBC через TLS-соединение. Также исправлена ошибка segfault при использовании драйвера MySQL ODBC. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен неверный результат, когда `Date` и `DateTime` аргументы используются в ветвях условного оператора (функции `if`). Добавлен общий случай для функции `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Словари ClickHouse теперь загружаются внутри `clickhouse` процесс. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена тупиковая ситуация, когда `SELECT` из-за стола с `File` двигатель был восстановлен после того, как `No such file or directory` ошибка. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Фиксированное состояние гонки при выборе из `system.tables` может дать `table doesn't exist` ошибка. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `clickhouse-client` может segfault на выходе при загрузке данных для предложений командной строки, если он был запущен в интерактивном режиме. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка при выполнении мутаций, содержащих `IN` операторы давали неверные результаты. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправлена ошибка: если есть база данных с `Dictionary` движок, все словари принудительно загружаются при запуске сервера, и если есть словарь с источником ClickHouse от localhost, то словарь не может загрузиться. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка при повторной попытке создания системных журналов при выключении сервера. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Правильно верните правильный тип и правильно обработайте замки `joinGet` функция. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Амос Птица](https://github.com/amosbird)) +- Добавлен `sumMapWithOverflow` функция. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +- Исправлена обработка выхода онлайн / оффлайн с `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлена ошибка с неправильным `Date` и `DateTime` сравнение. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) +- Исправлен тест fuzz при неопределенном поведении дезинфицирующего средства: добавлена проверка типа параметра для `quantile*Weighted` семейство функций. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено редкое состояние гонки при удалении старых частей данных может произойти сбой с помощью `File not found` ошибка. [\#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка установки пакета с отсутствующим /etc / clickhouse-server / config.XML. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-5} + +- Пакет Debian: исправьте/etc/clickhouse-server / preprocessed link в соответствии с конфигурацией. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) +- Различные исправления сборки для FreeBSD. [\#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) +- Добавлена возможность создавать, заполнять и удалять таблицы в perftest. [\#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([алесапин](https://github.com/alesapin)) +- Добавлен скрипт для проверки наличия дубликатов включений. [\#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена возможность выполнения запросов по индексу в тесте производительности. [\#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([алесапин](https://github.com/alesapin)) +- Предлагается установить пакет с отладочными символами. [\#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Рефакторинг производительности-тест. Улучшенная регистрация и обработка сигналов. [\#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([алесапин](https://github.com/alesapin)) +- Добавлены документы в анонимизированный Яндекс.Метрика набирает данные. [\#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([алесапин](https://github.com/alesapin)) +- Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [\#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Алексей Зателепин](https://github.com/ztlpn)) +- Добавлены документы о двух наборах данных в s3. [\#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([алесапин](https://github.com/alesapin)) +- Добавлен скрипт, который создает список изменений из описания запросов на вытягивание. [\#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [\#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([Кочетовниколай](https://github.com/KochetovNicolai)) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Добавлен кукольный модуль для Clickhouse. [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Максим Федотов](https://github.com/MaxFedotov)) +- Добавлены документы для группы недокументированных функций. [\#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправление сборки рук. [\#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[\#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [\#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) +- Словарные тесты теперь можно запускать из `ctest`. [\#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) +- Сейчас `/etc/ssl` используется в качестве каталога по умолчанию с SSL-сертификатами. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена проверка инструкций SSE и AVX при запуске. [\#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Игр](https://github.com/igron99)) +- Init скрипт будет ждать сервер до запуска. [\#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) + +#### Назад Несовместимые Изменения {#backward-incompatible-changes-1} + +- Удаленный `allow_experimental_low_cardinality_type` установка. `LowCardinality` типы данных уже готовы к производству. [\#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Уменьшите размер маркированного кэша и размер несжатого кэша соответственно доступному объему памяти. [\#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Лопатин Константин](https://github.com/k-lopatin) +- Добавить ключевые слова `INDEX` в `CREATE TABLE` запрос. Столбец с именем `index` должен быть заключен в кавычки с обратными или двойными кавычками: `` `index` ``. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Никита Васильев](https://github.com/nikvas0)) +- `sumMap` теперь продвигайте тип результата вместо переполнения. Старое `sumMap` поведение может быть получено с помощью `sumMapWithOverflow` функция. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) + +#### Улучшения в производительности {#performance-improvements-4} + +- `std::sort` заменено на `pdqsort` для запросов без `LIMIT`. [\#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Евгений Правда](https://github.com/kvinty)) +- Теперь сервер повторно использует потоки из глобального пула потоков. Это влияет на производительность в некоторых угловых случаях. [\#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшения {#improvements-5} + +- Реализована поддержка AIO для FreeBSD. [\#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) +- `SELECT * FROM a JOIN b USING a, b` теперь вернуться `a` и `b` столбцы только из левой таблицы. [\#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Артем Зуйков](https://github.com/4ertus2)) +- Позволять `-C` возможность работы клиента в качестве `-c` вариант. [\#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([семинсергей](https://github.com/syominsergey)) +- Теперь вариант `--password` использовать без стоимости требует пароль из stdin. [\#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD\_Conqueror](https://github.com/bsd-conqueror)) +- Добавлена подсветка неэскапированных метасимволов в строковых литералах, содержащих `LIKE` выражения или регулярные выражения. [\#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена отмена HTTP-запросов только для чтения, если клиентский сокет уходит. [\#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([nvartolomei](https://github.com/nvartolomei)) +- Теперь сервер сообщает о прогрессе, чтобы сохранить клиентские соединения живыми. [\#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([Иван](https://github.com/abyss7)) +- Немного лучше сообщение с причиной для оптимизации запроса с помощью `optimize_throw_if_noop` настройка включена. [\#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена поддержка `--version` опция для сервера clickhouse. [\#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Лопатин Константин](https://github.com/k-lopatin)) +- Добавлен `--help/-h` опцион на `clickhouse-server`. [\#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([Юрий Баранов](https://github.com/yurriy)) +- Добавлена поддержка скалярных подзапросов с результатом состояния агрегатной функции. [\#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([Николай Кочетов](https://github.com/KochetovNicolai)) +- Улучшено время завершения работы сервера и изменено время ожидания. [\#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлена информация о настройке replicated\_can\_become\_leader для system.реплики и добавить ведение журнала, если реплика не будет пытаться стать лидером. [\#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Алексей Зателепин](https://github.com/ztlpn)) + +## ClickHouse релиз 19.1 {#clickhouse-release-19-1} + +### ClickHouse релиз 19.1.14, 2019-03-14 {#clickhouse-release-19-1-14-2019-03-14} + +- Исправления ошибок `Column ... queried more than once` это может произойти, если установка `asterisk_left_columns_only` имеет значение 1 в случае использования `GLOBAL JOIN` с `SELECT *` (редкий случай). Эта проблема не существует в версии 19.3 и более поздних версиях. [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Артем Зуйков](https://github.com/4ertus2)) + +### ClickHouse релиз 19.1.13, 2019-03-12 {#clickhouse-release-19-1-13-2019-03-12} + +Этот релиз содержит точно такой же набор исправлений, как и 19.3.7. + +### ClickHouse релиз 19.1.10, 2019-03-03 {#clickhouse-release-19-1-10-2019-03-03} + +Этот релиз содержит точно такой же набор исправлений, как и 19.3.6. + +## ClickHouse релиз 19.1 {#clickhouse-release-19-1-1} + +### ClickHouse релиз 19.1.9, 2019-02-21 {#clickhouse-release-19-1-9-2019-02-21} + +#### Устранение ошибок {#bug-fixes-18} + +- Исправлена обратная несовместимость со старыми версиями из-за неправильной реализации `send_logs_level` установка. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена обратная несовместимость функции таблицы `remote` введено с комментариями к колонке. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.1.8, 2019-02-16 {#clickhouse-release-19-1-8-2019-02-16} + +#### Устранение ошибок {#bug-fixes-19} + +- Исправлена ошибка установки пакета с отсутствующим /etc / clickhouse-server / config.XML. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) + +## ClickHouse релиз 19.1 {#clickhouse-release-19-1-2} + +### ClickHouse релиз 19.1.7, 2019-02-15 {#clickhouse-release-19-1-7-2019-02-15} + +#### Устранение ошибок {#bug-fixes-20} + +- Правильно возвращайте правильный тип и правильно обрабатывайте замки `joinGet` функция. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Амос Птица](https://github.com/amosbird)) +- Исправлена ошибка при повторной попытке создания системных журналов при выключении сервера. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка: если есть база данных с `Dictionary` движок, все словари принудительно загружаются при запуске сервера, и если есть словарь с источником ClickHouse от localhost, то словарь не может загрузиться. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка при выполнении мутаций, содержащих `IN` операторы давали неверные результаты. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Алексей Зателепин](https://github.com/ztlpn)) +- `clickhouse-client` может segfault на выходе при загрузке данных для предложений командной строки, если он был запущен в интерактивном режиме. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Фиксированное состояние гонки при выборе из `system.tables` может дать `table doesn't exist` ошибка. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена тупиковая ситуация, когда `SELECT` из-за стола с надписью: `File` двигатель был восстановлен после того, как `No such file or directory` ошибка. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка: локальные словари ClickHouse загружаются через TCP, но должны загружаться внутри процесса. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено `No message received` ошибка при взаимодействии с драйвером PostgreSQL ODBC через TLS-соединение. Также исправлена ошибка segfault при использовании драйвера MySQL ODBC. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Временно отключите оптимизацию предикатов для `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлена бесконечная петля при выборе функции из таблицы `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлено `compile_expressions` ошибка с сопоставлением больших (более int16) дат. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка сегментации с помощью `uncompressed_cache=1` и исключение с неправильным несжатым размером. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([алесапин](https://github.com/alesapin)) +- Исправлено `ALL JOIN` с дубликатами в правой таблице. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Артем Зуйков](https://github.com/4ertus2)) +- Исправлено неправильное поведение при выполнении `INSERT ... SELECT ... FROM file(...)` запрос и файл имеет `CSVWithNames` или `TSVWIthNames` формат и первая строка данных отсутствуют. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Фиксированное выполнение агрегатных функций с помощью `Array(LowCardinality)` аргументы. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Пакет Debian: исправьте/etc/clickhouse-server / preprocessed link в соответствии с конфигурацией. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) +- Исправлен тест fuzz при неопределенном поведении дезинфицирующего средства: добавлена проверка типа параметра для `quantile*Weighted` семейство функций. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Сделай `START REPLICATED SENDS` команда начала репликации отправляет. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) +- Исправлено `Not found column` для повторяющихся столбцов в разделе JOIN ON. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Артем Зуйков](https://github.com/4ertus2)) +- Сейчас `/etc/ssl` используется в качестве каталога по умолчанию с SSL-сертификатами. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка при перезагрузке словаря, если словарь недоступен. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) +- Исправлена ошибка с неправильным `Date` и `DateTime` сравнение. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) +- Исправлен неверный результат, когда `Date` и `DateTime` аргументы используются в ветвях условного оператора (функции `if`). Добавлен общий случай для функции `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +### ClickHouse релиз 19.1.6, 2019-01-24 {#clickhouse-release-19-1-6-2019-01-24} + +#### Новые средства {#new-features-7} + +- Пользовательские кодеки сжатия для каждого столбца для таблиц. [\#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [\#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([алесапин](https://github.com/alesapin), [Зимний Чжан](https://github.com/zhang2014), [Анатолий](https://github.com/Sindbag)) +- Добавлен кодек сжатия `Delta`. [\#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([алесапин](https://github.com/alesapin)) +- Разрешить `ALTER` кодеки сжатия. [\#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([алесапин](https://github.com/alesapin)) +- Добавленные функции `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` для совместимости со стандартом SQL. [\#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([Иван Блинков](https://github.com/blinkov)) +- Поддержка записи в систему `HDFS` таблица или `hdfs` табличная функция. [\#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([алесапин](https://github.com/alesapin)) +- Добавлены функции поиска нескольких постоянных строк из большого стога сена: `multiPosition`, `multiSearch` ,`firstMatch` также с помощью `-UTF8`, `-CaseInsensitive`, и `-CaseInsensitiveUTF8` варианты. [\#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Данила Кутенин](https://github.com/danlark1)) +- Обрезка неиспользуемых осколков, если `SELECT` фильтры запросов по ключу сегментирования (настройка `optimize_skip_unused_shards`). [\#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Глеб Кантеров](https://github.com/kanterov), [Иван](https://github.com/abyss7)) +- Позволять `Kafka` движок для игнорирования некоторого количества ошибок синтаксического анализа в каждом блоке. [\#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([Иван](https://github.com/abyss7)) +- Добавлена поддержка для `CatBoost` мультиклассовые модели оценки. Функция `modelEvaluate` возвращает кортеж с необработанными прогнозами для каждого класса для многоклассовых моделей. `libcatboostmodel.so` должно быть построено с помощью [\#607](https://github.com/catboost/catboost/pull/607). [\#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Добавленные функции `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [\#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Борис Гранво](https://github.com/bgranvea)) +- Добавлены функции хэширования `xxHash64` и `xxHash32`. [\#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([Филимонов](https://github.com/filimonov)) +- Добавлен `gccMurmurHash` функция хэширования (GCC flavoured Murmur hash), которая использует то же самое хэш-семя, что и [ССЗ](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [\#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([сундили](https://github.com/sundy-li)) +- Добавлены функции хэширования `javaHash`, `hiveHash`. [\#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) +- Добавлена функция таблицы `remoteSecure`. Функция работает как `remote`, но использует безопасное соединение. [\#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) + +#### Экспериментальная возможность {#experimental-features-3} + +- Добавлена эмуляция нескольких соединений (`allow_experimental_multiple_joins_emulation` установочный). [\#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Артем Зуйков](https://github.com/4ertus2)) + +#### Устранение ошибок {#bug-fixes-21} + +- Сделай `compiled_expression_cache_size` установка ограничена по умолчанию для снижения потребления памяти. [\#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка, которая привела к зависанию в потоках, выполняющих изменения реплицированных таблиц, и в потоке, обновляющем конфигурацию из ZooKeeper. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [\#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправлено состояние гонки при выполнении распределенной задачи ALTER. Состояние гонки привело к тому, что более чем одна реплика пыталась выполнить задачу, и все реплики, кроме одной, потерпели неудачу с ошибкой ZooKeeper. [\#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправьте ошибку, когда `from_zk` элементы конфигурации не были обновлены после истечения времени ожидания запроса к ZooKeeper. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправлена ошибка с неправильным префиксом для масок подсети IPv4. [\#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([алесапин](https://github.com/alesapin)) +- Исправлена ошибка (`std::terminate`) в редких случаях, когда новый поток не может быть создан из-за исчерпания ресурсов. [\#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, когда в `remote` таблица выполнения функции, когда ошибались ограничений, используемых в `getStructureOfRemoteTable`. [\#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([алесапин](https://github.com/alesapin)) +- Исправьте утечку сокетов netlink. Они были помещены в пул, где они никогда не удалялись, а новые сокеты создавались в начале нового потока, когда использовались все текущие сокеты. [\#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Алексей Зателепин](https://github.com/ztlpn)) +- Исправлена ошибка с закрытием `/proc/self/fd` каталог раньше, чем все fds были прочитаны из него `/proc` после раздвоения `odbc-bridge` подпроцесс. [\#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([алесапин](https://github.com/alesapin)) +- Исправлено монотонное преобразование строки в UInt в случае использования строки в первичном ключе. [\#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлена ошибка при вычислении монотонности функции преобразования целых чисел. [\#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена обработка выхода онлайн / оффлайн в `arrayEnumerateUniq`, `arrayEnumerateDense` функции в случае некоторых недопустимых аргументов. [\#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправьте UB в StorageMerge. [\#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([Амос Птица](https://github.com/amosbird)) +- Исправлена обработка выхода онлайн / оффлайн в функции `addDays`, `subtractDays`. [\#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка: функции `round`, `floor`, `trunc`, `ceil` может возвращать фиктивный результат при выполнении с целочисленным аргументом и большим отрицательным масштабом. [\#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, вызванная ‘kill query sync’ что ведет к свалке ядра. [\#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([мувулдипекер](https://github.com/fancyqlx)) +- Исправлена ошибка с длительной задержкой после пустой очереди репликации. [\#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [\#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([алесапин](https://github.com/alesapin)) +- Исправлено чрезмерное использование памяти при вставке в таблицу с `LowCardinality` первичный ключ. [\#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Исправлено `LowCardinality` сериализация для `Native` форматирование в случае пустых массивов. [\#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [\#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Исправлен неверный результат при использовании числового столбца distinct by single LowCardinality. [\#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [\#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Исправлена специализированная агрегация с ключом LowCardinality (в случае, когда `compile` настройка включена). [\#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Исправлена переадресация пользователей и паролей для запросов реплицированных таблиц. [\#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([алесапин](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) +- Исправлено очень редкое состояние гонки, которое может произойти при перечислении таблиц в базе данных словаря во время перезагрузки словарей. [\#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлен неверный результат при использовании метода Rollup или CUBE. [\#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [\#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([Сэм Чоу](https://github.com/reflection)) +- Исправлены псевдонимы столбцов для запроса с помощью `JOIN ON` синтаксис и распределенные таблицы. [\#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([Зимний Чжан](https://github.com/zhang2014)) +- Исправлена ошибка во внутренней реализации `quantileTDigest` (найдено Артемом Вахрушевым). Эта ошибка никогда не происходит в ClickHouse и была актуальна только для тех, кто использует кодовую базу ClickHouse непосредственно в качестве библиотеки. [\#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Улучшения {#improvements-6} + +- Поддержка `IF NOT EXISTS` в `ALTER TABLE ADD COLUMN` заявления вместе с `IF EXISTS` в `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [\#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Борис Гранво](https://github.com/bgranvea)) +- Функция `parseDateTimeBestEffort`: поддержка форматов `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` и тому подобное. [\#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- `CapnProtoInputStream` теперь поддерживайте зубчатые конструкции. [\#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Hultgren Один Ван Дер Хорст](https://github.com/Miniwoffer)) +- Улучшение удобства использования: добавлена проверка того, что серверный процесс запускается от владельца каталога данных. Не позволяют запускать сервер от root, если данные принадлежат к непривилегированным пользователем. [\#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([Сергей-в-Гальцев](https://github.com/sergey-v-galtsev)) +- Улучшена логика проверки необходимых столбцов при анализе запросов с соединениями. [\#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Артем Зуйков](https://github.com/4ertus2)) +- Уменьшено количество подключений в случае большого количества распределенных таблиц на одном сервере. [\#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([Зимний Чжан](https://github.com/zhang2014)) +- Поддерживаемые итоговые значения строка для `WITH TOTALS` запрос для драйвера ODBC. [\#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Максим Корицкий](https://github.com/nightweb)) +- Разрешено к использованию `Enum`s как целые числа внутри функции if. [\#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([Иван](https://github.com/abyss7)) +- Добавлен `low_cardinality_allow_in_native_format` установка. Если он отключен, не используйте его `LowCadrinality` напечатать `Native` формат. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([Кочетовниколай](https://github.com/KochetovNicolai)) +- Удалил некоторые избыточные объекты из кэша скомпилированных выражений, чтобы снизить использование памяти. [\#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([алесапин](https://github.com/alesapin)) +- Добавить проверить это `SET send_logs_level = 'value'` запрос принимает соответствующее значение. [\#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Сабянин Максим](https://github.com/s-mx)) +- Исправлена проверка типа данных в функциях преобразования типов. [\#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([Зимний Чжан](https://github.com/zhang2014)) + +#### Улучшения в производительности {#performance-improvements-5} + +- Добавьте параметр MergeTree `use_minimalistic_part_header_in_zookeeper`. Если этот параметр включен, реплицированные таблицы будут хранить метаданные компактной детали в одном znode детали. Это может значительно уменьшить размер моментального снимка ZooKeeper (особенно если таблицы содержат много столбцов). Обратите внимание, что после включения этого параметра вы не сможете понизить рейтинг до версии, которая его не поддерживает. [\#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Алексей Зателепин](https://github.com/ztlpn)) +- Добавление реализации функций на основе DFA `sequenceMatch` и `sequenceCount` в случае, если шаблон не содержит времени. [\#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +- Повышение производительности при сериализации целых чисел. [\#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([Амос Птица](https://github.com/amosbird)) +- Нулевое левое заполнение PODArray так, чтобы элемент -1 всегда был действителен и обнулялся. Он используется для безветвевого расчета смещений. [\#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([Амос Птица](https://github.com/amosbird)) +- Возвратившегося `jemalloc` версии, которые приводят к снижению производительности. [\#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) + +#### Назад Несовместимые Изменения {#backward-incompatible-changes-2} + +- Удалена недокументированная функция `ALTER MODIFY PRIMARY KEY` потому что он был вытеснен на второй план `ALTER MODIFY ORDER BY` команда. [\#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Алексей Зателепин](https://github.com/ztlpn)) +- Удаленная функция `shardByHash`. [\#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Запретить использование скалярных подзапросов с результатом типа `AggregateFunction`. [\#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([Иван](https://github.com/abyss7)) + +#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-6} + +- Добавлена поддержка PowerPC (`ppc64le`) строить. [\#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Данила Кутенин](https://github.com/danlark1)) +- Функциональные тесты с отслеживанием состояния выполняются на общедоступном наборе данных. [\#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлена ошибка, когда сервер не может начать работу с `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` сообщение внутри Docker или systemd-nspawn. [\#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Обновленный `rdkafka` библиотека для v1.0.0-проект RC5. Используется cppkafka вместо raw c интерфейса. [\#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([Иван](https://github.com/abyss7)) +- Обновленный `mariadb-client` библиотека. Исправлена одна из проблем, обнаруженных UBSan. [\#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Некоторые исправления для утилиты для сборки. [\#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [\#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [\#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлено в фиксации запусков тестов с утилиты для сборки. +- Добавлено в фиксации работает в PVS-Studio-статический анализатор. +- Исправлены ошибки, обнаруженные компанией PVS-Studio. [\#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлены проблемы совместимости glibc. [\#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Переместите изображения Docker в 18.10 и добавьте файл совместимости для glibc \>= 2.28 [\#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([алесапин](https://github.com/alesapin)) +- Добавьте переменную env, если пользователь не хочет использовать каталоги chown в образе Server Docker. [\#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([алесапин](https://github.com/alesapin)) +- Включено большинство предупреждений от `-Weverything` в лязг. Включенный `-Wpedantic`. [\#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Добавлено еще несколько предупреждений, которые доступны только в clang 8. [\#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Ссылка на `libLLVM` а не к отдельным библиотекам LLVM при использовании общего связывания. [\#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Оривей Деш](https://github.com/orivej)) +- Добавлены переменные дезинфицирующего средства для тестовых изображений. [\#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([алесапин](https://github.com/alesapin)) +- `clickhouse-server` пакет debian будет рекомендовать `libcap2-bin` пакет для использования `setcap` инструмент для настройки возможностей. Это необязательно. [\#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Улучшено время компиляции, исправлены ошибки. [\#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) +- Добавлены тесты производительности для хэш-функций. [\#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([Филимонов](https://github.com/filimonov)) +- Фиксированные циклические библиотечные зависимости. [\#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) +- Улучшена компиляция с низким уровнем доступной памяти. [\#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) +- Добавлен тестовый сценарий для воспроизведения снижения производительности в `jemalloc`. [\#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) +- Исправлены опечатки в комментариях и строковых литералах под заголовком `dbms`. [\#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([майха](https://github.com/maiha)) +- Исправлены опечатки в комментариях. [\#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Евгений Правда](https://github.com/kvinty)) + +## [Changelog для 2018](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2018.md) {#changelog-for-2018} diff --git a/docs/ru/whats_new/changelog/index.md b/docs/ru/whats_new/changelog/index.md new file mode 100644 index 00000000000..bcfe62cbd0b --- /dev/null +++ b/docs/ru/whats_new/changelog/index.md @@ -0,0 +1,650 @@ +--- +toc_folder_title: Changelog +toc_priority: 74 +toc_title: '2020' +--- + +## ClickHouse release v20.3 + +### ClickHouse release v20.3.4.10, 2020-03-20 + +#### Bug Fix +* This release also contains all bug fixes from 20.1.8.41 +* Fix missing `rows_before_limit_at_least` for queries over http (with processors pipeline). This fixes [#9730](https://github.com/ClickHouse/ClickHouse/issues/9730). [#9757](https://github.com/ClickHouse/ClickHouse/pull/9757) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + + +### ClickHouse release v20.3.3.6, 2020-03-17 + +#### Bug Fix +* This release also contains all bug fixes from 20.1.7.38 +* Fix bug in a replication that doesn't allow replication to work if the user has executed mutations on the previous version. This fixes [#9645](https://github.com/ClickHouse/ClickHouse/issues/9645). [#9652](https://github.com/ClickHouse/ClickHouse/pull/9652) ([alesapin](https://github.com/alesapin)). It makes version 20.3 backward compatible again. +* Add setting `use_compact_format_in_distributed_parts_names` which allows to write files for `INSERT` queries into `Distributed` table with more compact format. This fixes [#9647](https://github.com/ClickHouse/ClickHouse/issues/9647). [#9653](https://github.com/ClickHouse/ClickHouse/pull/9653) ([alesapin](https://github.com/alesapin)). It makes version 20.3 backward compatible again. + +### ClickHouse release v20.3.2.1, 2020-03-12 + +#### Backward Incompatible Change + +* Fixed the issue `file name too long` when sending data for `Distributed` tables for a large number of replicas. Fixed the issue that replica credentials were exposed in the server log. The format of directory name on disk was changed to `[shard{shard_index}[_replica{replica_index}]]`. [#8911](https://github.com/ClickHouse/ClickHouse/pull/8911) ([Mikhail Korotov](https://github.com/millb)) After you upgrade to the new version, you will not be able to downgrade without manual intervention, because old server version does not recognize the new directory format. If you want to downgrade, you have to manually rename the corresponding directories to the old format. This change is relevant only if you have used asynchronous `INSERT`s to `Distributed` tables. In the version 20.3.3 we will introduce a setting that will allow you to enable the new format gradually. +* Changed the format of replication log entries for mutation commands. You have to wait for old mutations to process before installing the new version. +* Implement simple memory profiler that dumps stacktraces to `system.trace_log` every N bytes over soft allocation limit [#8765](https://github.com/ClickHouse/ClickHouse/pull/8765) ([Ivan](https://github.com/abyss7)) [#9472](https://github.com/ClickHouse/ClickHouse/pull/9472) ([alexey-milovidov](https://github.com/alexey-milovidov)) The column of `system.trace_log` was renamed from `timer_type` to `trace_type`. This will require changes in third-party performance analysis and flamegraph processing tools. +* Use OS thread id everywhere instead of internal thread number. This fixes [#7477](https://github.com/ClickHouse/ClickHouse/issues/7477) Old `clickhouse-client` cannot receive logs that are send from the server when the setting `send_logs_level` is enabled, because the names and types of the structured log messages were changed. On the other hand, different server versions can send logs with different types to each other. When you don't use the `send_logs_level` setting, you should not care. [#8954](https://github.com/ClickHouse/ClickHouse/pull/8954) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Remove `indexHint` function [#9542](https://github.com/ClickHouse/ClickHouse/pull/9542) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Remove `findClusterIndex`, `findClusterValue` functions. This fixes [#8641](https://github.com/ClickHouse/ClickHouse/issues/8641). If you were using these functions, send an email to `clickhouse-feedback@yandex-team.com` [#9543](https://github.com/ClickHouse/ClickHouse/pull/9543) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now it's not allowed to create columns or add columns with `SELECT` subquery as default expression. [#9481](https://github.com/ClickHouse/ClickHouse/pull/9481) ([alesapin](https://github.com/alesapin)) +* Require aliases for subqueries in JOIN. [#9274](https://github.com/ClickHouse/ClickHouse/pull/9274) ([Artem Zuikov](https://github.com/4ertus2)) +* Improved `ALTER MODIFY/ADD` queries logic. Now you cannot `ADD` column without type, `MODIFY` default expression doesn't change type of column and `MODIFY` type doesn't loose default expression value. Fixes [#8669](https://github.com/ClickHouse/ClickHouse/issues/8669). [#9227](https://github.com/ClickHouse/ClickHouse/pull/9227) ([alesapin](https://github.com/alesapin)) +* Require server to be restarted to apply the changes in logging configuration. This is a temporary workaround to avoid the bug where the server logs to a deleted log file (see [#8696](https://github.com/ClickHouse/ClickHouse/issues/8696)). [#8707](https://github.com/ClickHouse/ClickHouse/pull/8707) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* The setting `experimental_use_processors` is enabled by default. This setting enables usage of the new query pipeline. This is internal refactoring and we expect no visible changes. If you will see any issues, set it to back zero. [#8768](https://github.com/ClickHouse/ClickHouse/pull/8768) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### New Feature +* Add `Avro` and `AvroConfluent` input/output formats [#8571](https://github.com/ClickHouse/ClickHouse/pull/8571) ([Andrew Onyshchuk](https://github.com/oandrew)) [#8957](https://github.com/ClickHouse/ClickHouse/pull/8957) ([Andrew Onyshchuk](https://github.com/oandrew)) [#8717](https://github.com/ClickHouse/ClickHouse/pull/8717) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Multi-threaded and non-blocking updates of expired keys in `cache` dictionaries (with optional permission to read old ones). [#8303](https://github.com/ClickHouse/ClickHouse/pull/8303) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Add query `ALTER ... MATERIALIZE TTL`. It runs mutation that forces to remove expired data by TTL and recalculates meta-information about TTL in all parts. [#8775](https://github.com/ClickHouse/ClickHouse/pull/8775) ([Anton Popov](https://github.com/CurtizJ)) +* Switch from HashJoin to MergeJoin (on disk) if needed [#9082](https://github.com/ClickHouse/ClickHouse/pull/9082) ([Artem Zuikov](https://github.com/4ertus2)) +* Added `MOVE PARTITION` command for `ALTER TABLE` [#4729](https://github.com/ClickHouse/ClickHouse/issues/4729) [#6168](https://github.com/ClickHouse/ClickHouse/pull/6168) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Reloading storage configuration from configuration file on the fly. [#8594](https://github.com/ClickHouse/ClickHouse/pull/8594) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Allowed to change `storage_policy` to not less rich one. [#8107](https://github.com/ClickHouse/ClickHouse/pull/8107) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Added support for globs/wildcards for S3 storage and table function. [#8851](https://github.com/ClickHouse/ClickHouse/pull/8851) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Implement `bitAnd`, `bitOr`, `bitXor`, `bitNot` for `FixedString(N)` datatype. [#9091](https://github.com/ClickHouse/ClickHouse/pull/9091) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Added function `bitCount`. This fixes [#8702](https://github.com/ClickHouse/ClickHouse/issues/8702). [#8708](https://github.com/ClickHouse/ClickHouse/pull/8708) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#8749](https://github.com/ClickHouse/ClickHouse/pull/8749) ([ikopylov](https://github.com/ikopylov)) +* Add `generateRandom` table function to generate random rows with given schema. Allows to populate arbitrary test table with data. [#8994](https://github.com/ClickHouse/ClickHouse/pull/8994) ([Ilya Yatsishin](https://github.com/qoega)) +* `JSONEachRowFormat`: support special case when objects enclosed in top-level array. [#8860](https://github.com/ClickHouse/ClickHouse/pull/8860) ([Kruglov Pavel](https://github.com/Avogar)) +* Now it's possible to create a column with `DEFAULT` expression which depends on a column with default `ALIAS` expression. [#9489](https://github.com/ClickHouse/ClickHouse/pull/9489) ([alesapin](https://github.com/alesapin)) +* Allow to specify `--limit` more than the source data size in `clickhouse-obfuscator`. The data will repeat itself with different random seed. [#9155](https://github.com/ClickHouse/ClickHouse/pull/9155) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added `groupArraySample` function (similar to `groupArray`) with reservior sampling algorithm. [#8286](https://github.com/ClickHouse/ClickHouse/pull/8286) ([Amos Bird](https://github.com/amosbird)) +* Now you can monitor the size of update queue in `cache`/`complex_key_cache` dictionaries via system metrics. [#9413](https://github.com/ClickHouse/ClickHouse/pull/9413) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Allow to use CRLF as a line separator in CSV output format with setting `output_format_csv_crlf_end_of_line` is set to 1 [#8934](https://github.com/ClickHouse/ClickHouse/pull/8934) [#8935](https://github.com/ClickHouse/ClickHouse/pull/8935) [#8963](https://github.com/ClickHouse/ClickHouse/pull/8963) ([Mikhail Korotov](https://github.com/millb)) +* Implement more functions of the [H3](https://github.com/uber/h3) API: `h3GetBaseCell`, `h3HexAreaM2`, `h3IndexesAreNeighbors`, `h3ToChildren`, `h3ToString` and `stringToH3` [#8938](https://github.com/ClickHouse/ClickHouse/pull/8938) ([Nico Mandery](https://github.com/nmandery)) +* New setting introduced: `max_parser_depth` to control maximum stack size and allow large complex queries. This fixes [#6681](https://github.com/ClickHouse/ClickHouse/issues/6681) and [#7668](https://github.com/ClickHouse/ClickHouse/issues/7668). [#8647](https://github.com/ClickHouse/ClickHouse/pull/8647) ([Maxim Smirnov](https://github.com/qMBQx8GH)) +* Add a setting `force_optimize_skip_unused_shards` setting to throw if skipping of unused shards is not possible [#8805](https://github.com/ClickHouse/ClickHouse/pull/8805) ([Azat Khuzhin](https://github.com/azat)) +* Allow to configure multiple disks/volumes for storing data for send in `Distributed` engine [#8756](https://github.com/ClickHouse/ClickHouse/pull/8756) ([Azat Khuzhin](https://github.com/azat)) +* Support storage policy (``) for storing temporary data. [#8750](https://github.com/ClickHouse/ClickHouse/pull/8750) ([Azat Khuzhin](https://github.com/azat)) +* Added `X-ClickHouse-Exception-Code` HTTP header that is set if exception was thrown before sending data. This implements [#4971](https://github.com/ClickHouse/ClickHouse/issues/4971). [#8786](https://github.com/ClickHouse/ClickHouse/pull/8786) ([Mikhail Korotov](https://github.com/millb)) +* Added function `ifNotFinite`. It is just a syntactic sugar: `ifNotFinite(x, y) = isFinite(x) ? x : y`. [#8710](https://github.com/ClickHouse/ClickHouse/pull/8710) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added `last_successful_update_time` column in `system.dictionaries` table [#9394](https://github.com/ClickHouse/ClickHouse/pull/9394) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Add `blockSerializedSize` function (size on disk without compression) [#8952](https://github.com/ClickHouse/ClickHouse/pull/8952) ([Azat Khuzhin](https://github.com/azat)) +* Add function `moduloOrZero` [#9358](https://github.com/ClickHouse/ClickHouse/pull/9358) ([hcz](https://github.com/hczhcz)) +* Added system tables `system.zeros` and `system.zeros_mt` as well as tale functions `zeros()` and `zeros_mt()`. Tables (and table functions) contain single column with name `zero` and type `UInt8`. This column contains zeros. It is needed for test purposes as the fastest method to generate many rows. This fixes [#6604](https://github.com/ClickHouse/ClickHouse/issues/6604) [#9593](https://github.com/ClickHouse/ClickHouse/pull/9593) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) + +#### Experimental Feature +* Add new compact format of parts in `MergeTree`-family tables in which all columns are stored in one file. It helps to increase performance of small and frequent inserts. The old format (one file per column) is now called wide. Data storing format is controlled by settings `min_bytes_for_wide_part` and `min_rows_for_wide_part`. [#8290](https://github.com/ClickHouse/ClickHouse/pull/8290) ([Anton Popov](https://github.com/CurtizJ)) +* Support for S3 storage for `Log`, `TinyLog` and `StripeLog` tables. [#8862](https://github.com/ClickHouse/ClickHouse/pull/8862) ([Pavel Kovalenko](https://github.com/Jokser)) + +#### Bug Fix +* Fixed inconsistent whitespaces in log messages. [#9322](https://github.com/ClickHouse/ClickHouse/pull/9322) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug in which arrays of unnamed tuples were flattened as Nested structures on table creation. [#8866](https://github.com/ClickHouse/ClickHouse/pull/8866) ([achulkov2](https://github.com/achulkov2)) +* Fixed the issue when "Too many open files" error may happen if there are too many files matching glob pattern in `File` table or `file` table function. Now files are opened lazily. This fixes [#8857](https://github.com/ClickHouse/ClickHouse/issues/8857) [#8861](https://github.com/ClickHouse/ClickHouse/pull/8861) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* DROP TEMPORARY TABLE now drops only temporary table. [#8907](https://github.com/ClickHouse/ClickHouse/pull/8907) ([Vitaly Baranov](https://github.com/vitlibar)) +* Remove outdated partition when we shutdown the server or DETACH/ATTACH a table. [#8602](https://github.com/ClickHouse/ClickHouse/pull/8602) ([Guillaume Tassery](https://github.com/YiuRULE)) +* For how the default disk calculates the free space from `data` subdirectory. Fixed the issue when the amount of free space is not calculated correctly if the `data` directory is mounted to a separate device (rare case). This fixes [#7441](https://github.com/ClickHouse/ClickHouse/issues/7441) [#9257](https://github.com/ClickHouse/ClickHouse/pull/9257) ([Mikhail Korotov](https://github.com/millb)) +* Allow comma (cross) join with IN () inside. [#9251](https://github.com/ClickHouse/ClickHouse/pull/9251) ([Artem Zuikov](https://github.com/4ertus2)) +* Allow to rewrite CROSS to INNER JOIN if there's [NOT] LIKE operator in WHERE section. [#9229](https://github.com/ClickHouse/ClickHouse/pull/9229) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix possible incorrect result after `GROUP BY` with enabled setting `distributed_aggregation_memory_efficient`. Fixes [#9134](https://github.com/ClickHouse/ClickHouse/issues/9134). [#9289](https://github.com/ClickHouse/ClickHouse/pull/9289) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Found keys were counted as missed in metrics of cache dictionaries. [#9411](https://github.com/ClickHouse/ClickHouse/pull/9411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix replication protocol incompatibility introduced in [#8598](https://github.com/ClickHouse/ClickHouse/issues/8598). [#9412](https://github.com/ClickHouse/ClickHouse/pull/9412) ([alesapin](https://github.com/alesapin)) +* Fixed race condition on `queue_task_handle` at the startup of `ReplicatedMergeTree` tables. [#9552](https://github.com/ClickHouse/ClickHouse/pull/9552) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* The token `NOT` didn't work in `SHOW TABLES NOT LIKE` query [#8727](https://github.com/ClickHouse/ClickHouse/issues/8727) [#8940](https://github.com/ClickHouse/ClickHouse/pull/8940) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added range check to function `h3EdgeLengthM`. Without this check, buffer overflow is possible. [#8945](https://github.com/ClickHouse/ClickHouse/pull/8945) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed up a bug in batched calculations of ternary logical OPs on multiple arguments (more than 10). [#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([Alexander Kazakov](https://github.com/Akazz)) +* Fix error of PREWHERE optimization, which could lead to segfaults or `Inconsistent number of columns got from MergeTreeRangeReader` exception. [#9024](https://github.com/ClickHouse/ClickHouse/pull/9024) ([Anton Popov](https://github.com/CurtizJ)) +* Fix unexpected `Timeout exceeded while reading from socket` exception, which randomly happens on secure connection before timeout actually exceeded and when query profiler is enabled. Also add `connect_timeout_with_failover_secure_ms` settings (default 100ms), which is similar to `connect_timeout_with_failover_ms`, but is used for secure connections (because SSL handshake is slower, than ordinary TCP connection) [#9026](https://github.com/ClickHouse/ClickHouse/pull/9026) ([tavplubix](https://github.com/tavplubix)) +* Fix bug with mutations finalization, when mutation may hang in state with `parts_to_do=0` and `is_done=0`. [#9022](https://github.com/ClickHouse/ClickHouse/pull/9022) ([alesapin](https://github.com/alesapin)) +* Use new ANY JOIN logic with `partial_merge_join` setting. It's possible to make `ANY|ALL|SEMI LEFT` and `ALL INNER` joins with `partial_merge_join=1` now. [#8932](https://github.com/ClickHouse/ClickHouse/pull/8932) ([Artem Zuikov](https://github.com/4ertus2)) +* Shard now clamps the settings got from the initiator to the shard's constaints instead of throwing an exception. This fix allows to send queries to a shard with another constraints. [#9447](https://github.com/ClickHouse/ClickHouse/pull/9447) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fixed memory management problem in `MergeTreeReadPool`. [#8791](https://github.com/ClickHouse/ClickHouse/pull/8791) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix `toDecimal*OrNull()` functions family when called with string `e`. Fixes [#8312](https://github.com/ClickHouse/ClickHouse/issues/8312) [#8764](https://github.com/ClickHouse/ClickHouse/pull/8764) ([Artem Zuikov](https://github.com/4ertus2)) +* Make sure that `FORMAT Null` sends no data to the client. [#8767](https://github.com/ClickHouse/ClickHouse/pull/8767) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Fix bug that timestamp in `LiveViewBlockInputStream` will not updated. `LIVE VIEW` is an experimental feature. [#8644](https://github.com/ClickHouse/ClickHouse/pull/8644) ([vxider](https://github.com/Vxider)) [#8625](https://github.com/ClickHouse/ClickHouse/pull/8625) ([vxider](https://github.com/Vxider)) +* Fixed `ALTER MODIFY TTL` wrong behavior which did not allow to delete old TTL expressions. [#8422](https://github.com/ClickHouse/ClickHouse/pull/8422) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed UBSan report in MergeTreeIndexSet. This fixes [#9250](https://github.com/ClickHouse/ClickHouse/issues/9250) [#9365](https://github.com/ClickHouse/ClickHouse/pull/9365) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed the behaviour of `match` and `extract` functions when haystack has zero bytes. The behaviour was wrong when haystack was constant. This fixes [#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid throwing from destructor in Apache Avro 3rd-party library. [#9066](https://github.com/ClickHouse/ClickHouse/pull/9066) ([Andrew Onyshchuk](https://github.com/oandrew)) +* Don't commit a batch polled from `Kafka` partially as it can lead to holes in data. [#8876](https://github.com/ClickHouse/ClickHouse/pull/8876) ([filimonov](https://github.com/filimonov)) +* Fix `joinGet` with nullable return types. https://github.com/ClickHouse/ClickHouse/issues/8919 [#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([Amos Bird](https://github.com/amosbird)) +* Fix data incompatibility when compressed with `T64` codec. [#9016](https://github.com/ClickHouse/ClickHouse/pull/9016) ([Artem Zuikov](https://github.com/4ertus2)) Fix data type ids in `T64` compression codec that leads to wrong (de)compression in affected versions. [#9033](https://github.com/ClickHouse/ClickHouse/pull/9033) ([Artem Zuikov](https://github.com/4ertus2)) +* Add setting `enable_early_constant_folding` and disable it in some cases that leads to errors. [#9010](https://github.com/ClickHouse/ClickHouse/pull/9010) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix pushdown predicate optimizer with VIEW and enable the test [#9011](https://github.com/ClickHouse/ClickHouse/pull/9011) ([Winter Zhang](https://github.com/zhang2014)) +* Fix segfault in `Merge` tables, that can happen when reading from `File` storages [#9387](https://github.com/ClickHouse/ClickHouse/pull/9387) ([tavplubix](https://github.com/tavplubix)) +* Added a check for storage policy in `ATTACH PARTITION FROM`, `REPLACE PARTITION`, `MOVE TO TABLE`. Otherwise it could make data of part inaccessible after restart and prevent ClickHouse to start. [#9383](https://github.com/ClickHouse/ClickHouse/pull/9383) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix alters if there is TTL set for table. [#8800](https://github.com/ClickHouse/ClickHouse/pull/8800) ([Anton Popov](https://github.com/CurtizJ)) +* Fix race condition that can happen when `SYSTEM RELOAD ALL DICTIONARIES` is executed while some dictionary is being modified/added/removed. [#8801](https://github.com/ClickHouse/ClickHouse/pull/8801) ([Vitaly Baranov](https://github.com/vitlibar)) +* In previous versions `Memory` database engine use empty data path, so tables are created in `path` directory (e.g. `/var/lib/clickhouse/`), not in data directory of database (e.g. `/var/lib/clickhouse/db_name`). [#8753](https://github.com/ClickHouse/ClickHouse/pull/8753) ([tavplubix](https://github.com/tavplubix)) +* Fixed wrong log messages about missing default disk or policy. [#9530](https://github.com/ClickHouse/ClickHouse/pull/9530) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix not(has()) for the bloom_filter index of array types. [#9407](https://github.com/ClickHouse/ClickHouse/pull/9407) ([achimbab](https://github.com/achimbab)) +* Allow first column(s) in a table with `Log` engine be an alias [#9231](https://github.com/ClickHouse/ClickHouse/pull/9231) ([Ivan](https://github.com/abyss7)) +* Fix order of ranges while reading from `MergeTree` table in one thread. It could lead to exceptions from `MergeTreeRangeReader` or wrong query results. [#9050](https://github.com/ClickHouse/ClickHouse/pull/9050) ([Anton Popov](https://github.com/CurtizJ)) +* Make `reinterpretAsFixedString` to return `FixedString` instead of `String`. [#9052](https://github.com/ClickHouse/ClickHouse/pull/9052) ([Andrew Onyshchuk](https://github.com/oandrew)) +* Avoid extremely rare cases when the user can get wrong error message (`Success` instead of detailed error description). [#9457](https://github.com/ClickHouse/ClickHouse/pull/9457) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Do not crash when using `Template` format with empty row template. [#8785](https://github.com/ClickHouse/ClickHouse/pull/8785) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Metadata files for system tables could be created in wrong place [#8653](https://github.com/ClickHouse/ClickHouse/pull/8653) ([tavplubix](https://github.com/tavplubix)) Fixes [#8581](https://github.com/ClickHouse/ClickHouse/issues/8581). +* Fix data race on exception_ptr in cache dictionary [#8303](https://github.com/ClickHouse/ClickHouse/issues/8303). [#9379](https://github.com/ClickHouse/ClickHouse/pull/9379) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Do not throw an exception for query `ATTACH TABLE IF NOT EXISTS`. Previously it was thrown if table already exists, despite the `IF NOT EXISTS` clause. [#8967](https://github.com/ClickHouse/ClickHouse/pull/8967) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed missing closing paren in exception message. [#8811](https://github.com/ClickHouse/ClickHouse/pull/8811) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid message `Possible deadlock avoided` at the startup of clickhouse-client in interactive mode. [#9455](https://github.com/ClickHouse/ClickHouse/pull/9455) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed the issue when padding at the end of base64 encoded value can be malformed. Update base64 library. This fixes [#9491](https://github.com/ClickHouse/ClickHouse/issues/9491), closes [#9492](https://github.com/ClickHouse/ClickHouse/issues/9492) [#9500](https://github.com/ClickHouse/ClickHouse/pull/9500) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Prevent losing data in `Kafka` in rare cases when exception happens after reading suffix but before commit. Fixes [#9378](https://github.com/ClickHouse/ClickHouse/issues/9378) [#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) ([filimonov](https://github.com/filimonov)) +* Fixed exception in `DROP TABLE IF EXISTS` [#8663](https://github.com/ClickHouse/ClickHouse/pull/8663) ([Nikita Vasilev](https://github.com/nikvas0)) +* Fix crash when a user tries to `ALTER MODIFY SETTING` for old-formated `MergeTree` table engines family. [#9435](https://github.com/ClickHouse/ClickHouse/pull/9435) ([alesapin](https://github.com/alesapin)) +* Support for UInt64 numbers that don't fit in Int64 in JSON-related functions. Update SIMDJSON to master. This fixes [#9209](https://github.com/ClickHouse/ClickHouse/issues/9209) [#9344](https://github.com/ClickHouse/ClickHouse/pull/9344) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed execution of inversed predicates when non-strictly monotinic functional index is used. [#9223](https://github.com/ClickHouse/ClickHouse/pull/9223) ([Alexander Kazakov](https://github.com/Akazz)) +* Don't try to fold `IN` constant in `GROUP BY` [#8868](https://github.com/ClickHouse/ClickHouse/pull/8868) ([Amos Bird](https://github.com/amosbird)) +* Fix bug in `ALTER DELETE` mutations which leads to index corruption. This fixes [#9019](https://github.com/ClickHouse/ClickHouse/issues/9019) and [#8982](https://github.com/ClickHouse/ClickHouse/issues/8982). Additionally fix extremely rare race conditions in `ReplicatedMergeTree` `ALTER` queries. [#9048](https://github.com/ClickHouse/ClickHouse/pull/9048) ([alesapin](https://github.com/alesapin)) +* When the setting `compile_expressions` is enabled, you can get `unexpected column` in `LLVMExecutableFunction` when we use `Nullable` type [#8910](https://github.com/ClickHouse/ClickHouse/pull/8910) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Multiple fixes for `Kafka` engine: 1) fix duplicates that were appearing during consumer group rebalance. 2) Fix rare 'holes' appeared when data were polled from several partitions with one poll and committed partially (now we always process / commit the whole polled block of messages). 3) Fix flushes by block size (before that only flushing by timeout was working properly). 4) better subscription procedure (with assignment feedback). 5) Make tests work faster (with default intervals and timeouts). Due to the fact that data was not flushed by block size before (as it should according to documentation), that PR may lead to some performance degradation with default settings (due to more often & tinier flushes which are less optimal). If you encounter the performance issue after that change - please increase `kafka_max_block_size` in the table to the bigger value ( for example `CREATE TABLE ...Engine=Kafka ... SETTINGS ... kafka_max_block_size=524288`). Fixes [#7259](https://github.com/ClickHouse/ClickHouse/issues/7259) [#8917](https://github.com/ClickHouse/ClickHouse/pull/8917) ([filimonov](https://github.com/filimonov)) +* Fix `Parameter out of bound` exception in some queries after PREWHERE optimizations. [#8914](https://github.com/ClickHouse/ClickHouse/pull/8914) ([Baudouin Giard](https://github.com/bgiard)) +* Fixed the case of mixed-constness of arguments of function `arrayZip`. [#8705](https://github.com/ClickHouse/ClickHouse/pull/8705) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* When executing `CREATE` query, fold constant expressions in storage engine arguments. Replace empty database name with current database. Fixes [#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [#3492](https://github.com/ClickHouse/ClickHouse/issues/3492) [#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) ([tavplubix](https://github.com/tavplubix)) +* Now it's not possible to create or add columns with simple cyclic aliases like `a DEFAULT b, b DEFAULT a`. [#9603](https://github.com/ClickHouse/ClickHouse/pull/9603) ([alesapin](https://github.com/alesapin)) +* Fixed a bug with double move which may corrupt original part. This is relevant if you use `ALTER TABLE MOVE` [#8680](https://github.com/ClickHouse/ClickHouse/pull/8680) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Allow `interval` identifier to correctly parse without backticks. Fixed issue when a query cannot be executed even if the `interval` identifier is enclosed in backticks or double quotes. This fixes [#9124](https://github.com/ClickHouse/ClickHouse/issues/9124). [#9142](https://github.com/ClickHouse/ClickHouse/pull/9142) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed fuzz test and incorrect behaviour of `bitTestAll`/`bitTestAny` functions. [#9143](https://github.com/ClickHouse/ClickHouse/pull/9143) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix possible crash/wrong number of rows in `LIMIT n WITH TIES` when there are a lot of rows equal to n'th row. [#9464](https://github.com/ClickHouse/ClickHouse/pull/9464) ([tavplubix](https://github.com/tavplubix)) +* Fix mutations with parts written with enabled `insert_quorum`. [#9463](https://github.com/ClickHouse/ClickHouse/pull/9463) ([alesapin](https://github.com/alesapin)) +* Fix data race at destruction of `Poco::HTTPServer`. It could happen when server is started and immediately shut down. [#9468](https://github.com/ClickHouse/ClickHouse/pull/9468) ([Anton Popov](https://github.com/CurtizJ)) +* Fix bug in which a misleading error message was shown when running `SHOW CREATE TABLE a_table_that_does_not_exist`. [#8899](https://github.com/ClickHouse/ClickHouse/pull/8899) ([achulkov2](https://github.com/achulkov2)) +* Fixed `Parameters are out of bound` exception in some rare cases when we have a constant in the `SELECT` clause when we have an `ORDER BY` and a `LIMIT` clause. [#8892](https://github.com/ClickHouse/ClickHouse/pull/8892) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Fix mutations finalization, when already done mutation can have status `is_done=0`. [#9217](https://github.com/ClickHouse/ClickHouse/pull/9217) ([alesapin](https://github.com/alesapin)) +* Prevent from executing `ALTER ADD INDEX` for MergeTree tables with old syntax, because it doesn't work. [#8822](https://github.com/ClickHouse/ClickHouse/pull/8822) ([Mikhail Korotov](https://github.com/millb)) +* During server startup do not access table, which `LIVE VIEW` depends on, so server will be able to start. Also remove `LIVE VIEW` dependencies when detaching `LIVE VIEW`. `LIVE VIEW` is an experimental feature. [#8824](https://github.com/ClickHouse/ClickHouse/pull/8824) ([tavplubix](https://github.com/tavplubix)) +* Fix possible segfault in `MergeTreeRangeReader`, while executing `PREWHERE`. [#9106](https://github.com/ClickHouse/ClickHouse/pull/9106) ([Anton Popov](https://github.com/CurtizJ)) +* Fix possible mismatched checksums with column TTLs. [#9451](https://github.com/ClickHouse/ClickHouse/pull/9451) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed a bug when parts were not being moved in background by TTL rules in case when there is only one volume. [#8672](https://github.com/ClickHouse/ClickHouse/pull/8672) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fixed the issue `Method createColumn() is not implemented for data type Set`. This fixes [#7799](https://github.com/ClickHouse/ClickHouse/issues/7799). [#8674](https://github.com/ClickHouse/ClickHouse/pull/8674) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now we will try finalize mutations more frequently. [#9427](https://github.com/ClickHouse/ClickHouse/pull/9427) ([alesapin](https://github.com/alesapin)) +* Fix `intDiv` by minus one constant [#9351](https://github.com/ClickHouse/ClickHouse/pull/9351) ([hcz](https://github.com/hczhcz)) +* Fix possible race condition in `BlockIO`. [#9356](https://github.com/ClickHouse/ClickHouse/pull/9356) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix bug leading to server termination when trying to use / drop `Kafka` table created with wrong parameters. [#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) ([filimonov](https://github.com/filimonov)) +* Added workaround if OS returns wrong result for `timer_create` function. [#8837](https://github.com/ClickHouse/ClickHouse/pull/8837) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error in usage of `min_marks_for_seek` parameter. Fixed the error message when there is no sharding key in Distributed table and we try to skip unused shards. [#8908](https://github.com/ClickHouse/ClickHouse/pull/8908) ([Azat Khuzhin](https://github.com/azat)) + +#### Improvement +* Implement `ALTER MODIFY/DROP` queries on top of mutations for `ReplicatedMergeTree*` engines family. Now `ALTERS` blocks only at the metadata update stage, and don't block after that. [#8701](https://github.com/ClickHouse/ClickHouse/pull/8701) ([alesapin](https://github.com/alesapin)) +* Add ability to rewrite CROSS to INNER JOINs with `WHERE` section containing unqialified names. [#9512](https://github.com/ClickHouse/ClickHouse/pull/9512) ([Artem Zuikov](https://github.com/4ertus2)) +* Make `SHOW TABLES` and `SHOW DATABASES` queries support the `WHERE` expressions and `FROM`/`IN` [#9076](https://github.com/ClickHouse/ClickHouse/pull/9076) ([sundyli](https://github.com/sundy-li)) +* Added a setting `deduplicate_blocks_in_dependent_materialized_views`. [#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) ([urykhy](https://github.com/urykhy)) +* After recent changes MySQL client started to print binary strings in hex thereby making them not readable ([#9032](https://github.com/ClickHouse/ClickHouse/issues/9032)). The workaround in ClickHouse is to mark string columns as UTF-8, which is not always, but usually the case. [#9079](https://github.com/ClickHouse/ClickHouse/pull/9079) ([Yuriy Baranov](https://github.com/yurriy)) +* Add support of String and FixedString keys for `sumMap` [#8903](https://github.com/ClickHouse/ClickHouse/pull/8903) ([Baudouin Giard](https://github.com/bgiard)) +* Support string keys in SummingMergeTree maps [#8933](https://github.com/ClickHouse/ClickHouse/pull/8933) ([Baudouin Giard](https://github.com/bgiard)) +* Signal termination of thread to the thread pool even if the thread has thrown exception [#8736](https://github.com/ClickHouse/ClickHouse/pull/8736) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) +* Allow to set `query_id` in `clickhouse-benchmark` [#9416](https://github.com/ClickHouse/ClickHouse/pull/9416) ([Anton Popov](https://github.com/CurtizJ)) +* Don't allow strange expressions in `ALTER TABLE ... PARTITION partition` query. This addresses [#7192](https://github.com/ClickHouse/ClickHouse/issues/7192) [#8835](https://github.com/ClickHouse/ClickHouse/pull/8835) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* The table `system.table_engines` now provides information about feature support (like `supports_ttl` or `supports_sort_order`). [#8830](https://github.com/ClickHouse/ClickHouse/pull/8830) ([Max Akhmedov](https://github.com/zlobober)) +* Enable `system.metric_log` by default. It will contain rows with values of ProfileEvents, CurrentMetrics collected with "collect_interval_milliseconds" interval (one second by default). The table is very small (usually in order of megabytes) and collecting this data by default is reasonable. [#9225](https://github.com/ClickHouse/ClickHouse/pull/9225) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries. Fixes [#6964](https://github.com/ClickHouse/ClickHouse/issues/6964) [#8874](https://github.com/ClickHouse/ClickHouse/pull/8874) ([Ivan](https://github.com/abyss7)) +* Now temporary `LIVE VIEW` is created by `CREATE LIVE VIEW name WITH TIMEOUT [42] ...` instead of `CREATE TEMPORARY LIVE VIEW ...`, because the previous syntax was not consistent with `CREATE TEMPORARY TABLE ...` [#9131](https://github.com/ClickHouse/ClickHouse/pull/9131) ([tavplubix](https://github.com/tavplubix)) +* Add text_log.level configuration parameter to limit entries that goes to `system.text_log` table [#8809](https://github.com/ClickHouse/ClickHouse/pull/8809) ([Azat Khuzhin](https://github.com/azat)) +* Allow to put downloaded part to a disks/volumes according to TTL rules [#8598](https://github.com/ClickHouse/ClickHouse/pull/8598) ([Vladimir Chebotarev](https://github.com/excitoon)) +* For external MySQL dictionaries, allow to mutualize MySQL connection pool to "share" them among dictionaries. This option significantly reduces the number of connections to MySQL servers. [#9409](https://github.com/ClickHouse/ClickHouse/pull/9409) ([Clément Rodriguez](https://github.com/clemrodriguez)) +* Show nearest query execution time for quantiles in `clickhouse-benchmark` output instead of interpolated values. It's better to show values that correspond to the execution time of some queries. [#8712](https://github.com/ClickHouse/ClickHouse/pull/8712) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Possibility to add key & timestamp for the message when inserting data to Kafka. Fixes [#7198](https://github.com/ClickHouse/ClickHouse/issues/7198) [#8969](https://github.com/ClickHouse/ClickHouse/pull/8969) ([filimonov](https://github.com/filimonov)) +* If server is run from terminal, highlight thread number, query id and log priority by colors. This is for improved readability of correlated log messages for developers. [#8961](https://github.com/ClickHouse/ClickHouse/pull/8961) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better exception message while loading tables for `Ordinary` database. [#9527](https://github.com/ClickHouse/ClickHouse/pull/9527) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Implement `arraySlice` for arrays with aggregate function states. This fixes [#9388](https://github.com/ClickHouse/ClickHouse/issues/9388) [#9391](https://github.com/ClickHouse/ClickHouse/pull/9391) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Allow constant functions and constant arrays to be used on the right side of IN operator. [#8813](https://github.com/ClickHouse/ClickHouse/pull/8813) ([Anton Popov](https://github.com/CurtizJ)) +* If zookeeper exception has happened while fetching data for system.replicas, display it in a separate column. This implements [#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Atomically remove MergeTree data parts on destroy. [#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Support row-level security for Distributed tables. [#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([Ivan](https://github.com/abyss7)) +* Now we recognize suffix (like KB, KiB...) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([Mikhail Korotov](https://github.com/millb)) +* Prevent out of memory while constructing result of a large JOIN. [#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2)) +* Added names of clusters to suggestions in interactive mode in `clickhouse-client`. [#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([Ivan](https://github.com/abyss7)) +* Added column `exception_code` in `system.query_log` table. [#8770](https://github.com/ClickHouse/ClickHouse/pull/8770) ([Mikhail Korotov](https://github.com/millb)) +* Enabled MySQL compatibility server on port `9004` in the default server configuration file. Fixed password generation command in the example in configuration. [#8771](https://github.com/ClickHouse/ClickHouse/pull/8771) ([Yuriy Baranov](https://github.com/yurriy)) +* Prevent abort on shutdown if the filesystem is readonly. This fixes [#9094](https://github.com/ClickHouse/ClickHouse/issues/9094) [#9100](https://github.com/ClickHouse/ClickHouse/pull/9100) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better exception message when length is required in HTTP POST query. [#9453](https://github.com/ClickHouse/ClickHouse/pull/9453) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add `_path` and `_file` virtual columns to `HDFS` and `File` engines and `hdfs` and `file` table functions [#8489](https://github.com/ClickHouse/ClickHouse/pull/8489) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fix error `Cannot find column` while inserting into `MATERIALIZED VIEW` in case if new column was added to view's internal table. [#8766](https://github.com/ClickHouse/ClickHouse/pull/8766) [#8788](https://github.com/ClickHouse/ClickHouse/pull/8788) ([vzakaznikov](https://github.com/vzakaznikov)) [#8788](https://github.com/ClickHouse/ClickHouse/issues/8788) [#8806](https://github.com/ClickHouse/ClickHouse/pull/8806) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) [#8803](https://github.com/ClickHouse/ClickHouse/pull/8803) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix progress over native client-server protocol, by send progress after final update (like logs). This may be relevant only to some third-party tools that are using native protocol. [#9495](https://github.com/ClickHouse/ClickHouse/pull/9495) ([Azat Khuzhin](https://github.com/azat)) +* Add a system metric tracking the number of client connections using MySQL protocol ([#9013](https://github.com/ClickHouse/ClickHouse/issues/9013)). [#9015](https://github.com/ClickHouse/ClickHouse/pull/9015) ([Eugene Klimov](https://github.com/Slach)) +* From now on, HTTP responses will have `X-ClickHouse-Timezone` header set to the same timezone value that `SELECT timezone()` would report. [#9493](https://github.com/ClickHouse/ClickHouse/pull/9493) ([Denis Glazachev](https://github.com/traceon)) + +#### Performance Improvement +* Improve performance of analysing index with IN [#9261](https://github.com/ClickHouse/ClickHouse/pull/9261) ([Anton Popov](https://github.com/CurtizJ)) +* Simpler and more efficient code in Logical Functions + code cleanups. A followup to [#8718](https://github.com/ClickHouse/ClickHouse/issues/8718) [#8728](https://github.com/ClickHouse/ClickHouse/pull/8728) ([Alexander Kazakov](https://github.com/Akazz)) +* Overall performance improvement (in range of 5%..200% for affected queries) by ensuring even more strict aliasing with C++20 features. [#9304](https://github.com/ClickHouse/ClickHouse/pull/9304) ([Amos Bird](https://github.com/amosbird)) +* More strict aliasing for inner loops of comparison functions. [#9327](https://github.com/ClickHouse/ClickHouse/pull/9327) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* More strict aliasing for inner loops of arithmetic functions. [#9325](https://github.com/ClickHouse/ClickHouse/pull/9325) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* A ~3 times faster implementation for ColumnVector::replicate(), via which ColumnConst::convertToFullColumn() is implemented. Also will be useful in tests when materializing constants. [#9293](https://github.com/ClickHouse/ClickHouse/pull/9293) ([Alexander Kazakov](https://github.com/Akazz)) +* Another minor performance improvement to `ColumnVector::replicate()` (this speeds up the `materialize` function and higher order functions) an even further improvement to [#9293](https://github.com/ClickHouse/ClickHouse/issues/9293) [#9442](https://github.com/ClickHouse/ClickHouse/pull/9442) ([Alexander Kazakov](https://github.com/Akazz)) +* Improved performance of `stochasticLinearRegression` aggregate function. This patch is contributed by Intel. [#8652](https://github.com/ClickHouse/ClickHouse/pull/8652) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improve performance of `reinterpretAsFixedString` function. [#9342](https://github.com/ClickHouse/ClickHouse/pull/9342) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Do not send blocks to client for `Null` format in processors pipeline. [#8797](https://github.com/ClickHouse/ClickHouse/pull/8797) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) [#8767](https://github.com/ClickHouse/ClickHouse/pull/8767) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +#### Build/Testing/Packaging Improvement +* Exception handling now works correctly on Windows Subsystem for Linux. See https://github.com/ClickHouse-Extras/libunwind/pull/3 This fixes [#6480](https://github.com/ClickHouse/ClickHouse/issues/6480) [#9564](https://github.com/ClickHouse/ClickHouse/pull/9564) ([sobolevsv](https://github.com/sobolevsv)) +* Replace `readline` with `replxx` for interactive line editing in `clickhouse-client` [#8416](https://github.com/ClickHouse/ClickHouse/pull/8416) ([Ivan](https://github.com/abyss7)) +* Better build time and less template instantiations in FunctionsComparison. [#9324](https://github.com/ClickHouse/ClickHouse/pull/9324) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added integration with `clang-tidy` in CI. See also [#6044](https://github.com/ClickHouse/ClickHouse/issues/6044) [#9566](https://github.com/ClickHouse/ClickHouse/pull/9566) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now we link ClickHouse in CI using `lld` even for `gcc`. [#9049](https://github.com/ClickHouse/ClickHouse/pull/9049) ([alesapin](https://github.com/alesapin)) +* Allow to randomize thread scheduling and insert glitches when `THREAD_FUZZER_*` environment variables are set. This helps testing. [#9459](https://github.com/ClickHouse/ClickHouse/pull/9459) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Enable secure sockets in stateless tests [#9288](https://github.com/ClickHouse/ClickHouse/pull/9288) ([tavplubix](https://github.com/tavplubix)) +* Make SPLIT_SHARED_LIBRARIES=OFF more robust [#9156](https://github.com/ClickHouse/ClickHouse/pull/9156) ([Azat Khuzhin](https://github.com/azat)) +* Make "performance_introspection_and_logging" test reliable to random server stuck. This may happen in CI environment. See also [#9515](https://github.com/ClickHouse/ClickHouse/issues/9515) [#9528](https://github.com/ClickHouse/ClickHouse/pull/9528) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Validate XML in style check. [#9550](https://github.com/ClickHouse/ClickHouse/pull/9550) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed race condition in test `00738_lock_for_inner_table`. This test relied on sleep. [#9555](https://github.com/ClickHouse/ClickHouse/pull/9555) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Remove performance tests of type `once`. This is needed to run all performance tests in statistical comparison mode (more reliable). [#9557](https://github.com/ClickHouse/ClickHouse/pull/9557) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added performance test for arithmetic functions. [#9326](https://github.com/ClickHouse/ClickHouse/pull/9326) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added performance test for `sumMap` and `sumMapWithOverflow` aggregate functions. Follow-up for [#8933](https://github.com/ClickHouse/ClickHouse/issues/8933) [#8947](https://github.com/ClickHouse/ClickHouse/pull/8947) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Ensure style of ErrorCodes by style check. [#9370](https://github.com/ClickHouse/ClickHouse/pull/9370) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add script for tests history. [#8796](https://github.com/ClickHouse/ClickHouse/pull/8796) ([alesapin](https://github.com/alesapin)) +* Add GCC warning `-Wsuggest-override` to locate and fix all places where `override` keyword must be used. [#8760](https://github.com/ClickHouse/ClickHouse/pull/8760) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +* Ignore weak symbol under Mac OS X because it must be defined [#9538](https://github.com/ClickHouse/ClickHouse/pull/9538) ([Deleted user](https://github.com/ghost)) +* Normalize running time of some queries in performance tests. This is done in preparation to run all the performance tests in comparison mode. [#9565](https://github.com/ClickHouse/ClickHouse/pull/9565) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix some tests to support pytest with query tests [#9062](https://github.com/ClickHouse/ClickHouse/pull/9062) ([Ivan](https://github.com/abyss7)) +* Enable SSL in build with MSan, so server will not fail at startup when running stateless tests [#9531](https://github.com/ClickHouse/ClickHouse/pull/9531) ([tavplubix](https://github.com/tavplubix)) +* Fix database substitution in test results [#9384](https://github.com/ClickHouse/ClickHouse/pull/9384) ([Ilya Yatsishin](https://github.com/qoega)) +* Build fixes for miscellaneous platforms [#9381](https://github.com/ClickHouse/ClickHouse/pull/9381) ([proller](https://github.com/proller)) [#8755](https://github.com/ClickHouse/ClickHouse/pull/8755) ([proller](https://github.com/proller)) [#8631](https://github.com/ClickHouse/ClickHouse/pull/8631) ([proller](https://github.com/proller)) +* Added disks section to stateless-with-coverage test docker image [#9213](https://github.com/ClickHouse/ClickHouse/pull/9213) ([Pavel Kovalenko](https://github.com/Jokser)) +* Get rid of in-source-tree files when building with GRPC [#9588](https://github.com/ClickHouse/ClickHouse/pull/9588) ([Amos Bird](https://github.com/amosbird)) +* Slightly faster build time by removing SessionCleaner from Context. Make the code of SessionCleaner more simple. [#9232](https://github.com/ClickHouse/ClickHouse/pull/9232) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Updated checking for hung queries in clickhouse-test script [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) +* Removed some useless files from repository. [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Changed type of math perftests from `once` to `loop`. [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html) +* Suppress some test failures under MSan. [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Speedup "exception while insert" test. This test often time out in debug-with-coverage build. [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Updated `libcxx` and `libcxxabi` to master. In preparation to [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix flacky test `00910_zookeeper_test_alter_compression_codecs`. [#9525](https://github.com/ClickHouse/ClickHouse/pull/9525) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Clean up duplicated linker flags. Make sure the linker won't look up an unexpected symbol. [#9433](https://github.com/ClickHouse/ClickHouse/pull/9433) ([Amos Bird](https://github.com/amosbird)) +* Add `clickhouse-odbc` driver into test images. This allows to test interaction of ClickHouse with ClickHouse via its own ODBC driver. [#9348](https://github.com/ClickHouse/ClickHouse/pull/9348) ([filimonov](https://github.com/filimonov)) +* Fix several bugs in unit tests. [#9047](https://github.com/ClickHouse/ClickHouse/pull/9047) ([alesapin](https://github.com/alesapin)) +* Enable `-Wmissing-include-dirs` GCC warning to eliminate all non-existing includes - mostly as a result of CMake scripting errors [#8704](https://github.com/ClickHouse/ClickHouse/pull/8704) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +* Describe reasons if query profiler cannot work. This is intended for [#9049](https://github.com/ClickHouse/ClickHouse/issues/9049) [#9144](https://github.com/ClickHouse/ClickHouse/pull/9144) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Update OpenSSL to upstream master. Fixed the issue when TLS connections may fail with the message `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error` and `SSL Exception: error:2400006E:random number generator::error retrieving entropy`. The issue was present in version 20.1. [#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Update Dockerfile for server [#8893](https://github.com/ClickHouse/ClickHouse/pull/8893) ([Ilya Mazaev](https://github.com/ne-ray)) +* Minor fixes in build-gcc-from-sources script [#8774](https://github.com/ClickHouse/ClickHouse/pull/8774) ([Michael Nacharov](https://github.com/mnach)) +* Replace `numbers` to `zeros` in perftests where `number` column is not used. This will lead to more clean test results. [#9600](https://github.com/ClickHouse/ClickHouse/pull/9600) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix stack overflow issue when using initializer_list in Column constructors. [#9367](https://github.com/ClickHouse/ClickHouse/pull/9367) ([Deleted user](https://github.com/ghost)) +* Upgrade librdkafka to v1.3.0. Enable bundled `rdkafka` and `gsasl` libraries on Mac OS X. [#9000](https://github.com/ClickHouse/ClickHouse/pull/9000) ([Andrew Onyshchuk](https://github.com/oandrew)) +* build fix on GCC 9.2.0 [#9306](https://github.com/ClickHouse/ClickHouse/pull/9306) ([vxider](https://github.com/Vxider)) + + +## ClickHouse release v20.1 + +### ClickHouse release v20.1.8.41, 2020-03-20 + +#### Bug Fix +* Fix possible permanent `Cannot schedule a task` error (due to unhandled exception in `ParallelAggregatingBlockInputStream::Handler::onFinish/onFinishThread`). This fixes [#6833](https://github.com/ClickHouse/ClickHouse/issues/6833). [#9154](https://github.com/ClickHouse/ClickHouse/pull/9154) ([Azat Khuzhin](https://github.com/azat)) +* Fix excessive memory consumption in `ALTER` queries (mutations). This fixes [#9533](https://github.com/ClickHouse/ClickHouse/issues/9533) and [#9670](https://github.com/ClickHouse/ClickHouse/issues/9670). [#9754](https://github.com/ClickHouse/ClickHouse/pull/9754) ([alesapin](https://github.com/alesapin)) +* Fix bug in backquoting in external dictionaries DDL. This fixes [#9619](https://github.com/ClickHouse/ClickHouse/issues/9619). [#9734](https://github.com/ClickHouse/ClickHouse/pull/9734) ([alesapin](https://github.com/alesapin)) + +### ClickHouse release v20.1.7.38, 2020-03-18 + +#### Bug Fix +* Fixed incorrect internal function names for `sumKahan` and `sumWithOverflow`. I lead to exception while using this functions in remote queries. [#9636](https://github.com/ClickHouse/ClickHouse/pull/9636) ([Azat Khuzhin](https://github.com/azat)). This issue was in all ClickHouse releases. +* Allow `ALTER ON CLUSTER` of `Distributed` tables with internal replication. This fixes [#3268](https://github.com/ClickHouse/ClickHouse/issues/3268). [#9617](https://github.com/ClickHouse/ClickHouse/pull/9617) ([shinoi2](https://github.com/shinoi2)). This issue was in all ClickHouse releases. +* Fix possible exceptions `Size of filter doesn't match size of column` and `Invalid number of rows in Chunk` in `MergeTreeRangeReader`. They could appear while executing `PREWHERE` in some cases. Fixes [#9132](https://github.com/ClickHouse/ClickHouse/issues/9132). [#9612](https://github.com/ClickHouse/ClickHouse/pull/9612) ([Anton Popov](https://github.com/CurtizJ)) +* Fixed the issue: timezone was not preserved if you write a simple arithmetic expression like `time + 1` (in contrast to an expression like `time + INTERVAL 1 SECOND`). This fixes [#5743](https://github.com/ClickHouse/ClickHouse/issues/5743). [#9323](https://github.com/ClickHouse/ClickHouse/pull/9323) ([alexey-milovidov](https://github.com/alexey-milovidov)). This issue was in all ClickHouse releases. +* Now it's not possible to create or add columns with simple cyclic aliases like `a DEFAULT b, b DEFAULT a`. [#9603](https://github.com/ClickHouse/ClickHouse/pull/9603) ([alesapin](https://github.com/alesapin)) +* Fixed the issue when padding at the end of base64 encoded value can be malformed. Update base64 library. This fixes [#9491](https://github.com/ClickHouse/ClickHouse/issues/9491), closes [#9492](https://github.com/ClickHouse/ClickHouse/issues/9492) [#9500](https://github.com/ClickHouse/ClickHouse/pull/9500) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix data race at destruction of `Poco::HTTPServer`. It could happen when server is started and immediately shut down. [#9468](https://github.com/ClickHouse/ClickHouse/pull/9468) ([Anton Popov](https://github.com/CurtizJ)) +* Fix possible crash/wrong number of rows in `LIMIT n WITH TIES` when there are a lot of rows equal to n'th row. [#9464](https://github.com/ClickHouse/ClickHouse/pull/9464) ([tavplubix](https://github.com/tavplubix)) +* Fix possible mismatched checksums with column TTLs. [#9451](https://github.com/ClickHouse/ClickHouse/pull/9451) ([Anton Popov](https://github.com/CurtizJ)) +* Fix crash when a user tries to `ALTER MODIFY SETTING` for old-formated `MergeTree` table engines family. [#9435](https://github.com/ClickHouse/ClickHouse/pull/9435) ([alesapin](https://github.com/alesapin)) +* Now we will try finalize mutations more frequently. [#9427](https://github.com/ClickHouse/ClickHouse/pull/9427) ([alesapin](https://github.com/alesapin)) +* Fix replication protocol incompatibility introduced in [#8598](https://github.com/ClickHouse/ClickHouse/issues/8598). [#9412](https://github.com/ClickHouse/ClickHouse/pull/9412) ([alesapin](https://github.com/alesapin)) +* Fix not(has()) for the bloom_filter index of array types. [#9407](https://github.com/ClickHouse/ClickHouse/pull/9407) ([achimbab](https://github.com/achimbab)) +* Fixed the behaviour of `match` and `extract` functions when haystack has zero bytes. The behaviour was wrong when haystack was constant. This fixes [#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([alexey-milovidov](https://github.com/alexey-milovidov)) [#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Build/Testing/Packaging Improvement + +* Exception handling now works correctly on Windows Subsystem for Linux. See https://github.com/ClickHouse-Extras/libunwind/pull/3 This fixes [#6480](https://github.com/ClickHouse/ClickHouse/issues/6480) [#9564](https://github.com/ClickHouse/ClickHouse/pull/9564) ([sobolevsv](https://github.com/sobolevsv)) + + +### ClickHouse release v20.1.6.30, 2020-03-05 + +#### Bug Fix + +* Fix data incompatibility when compressed with `T64` codec. +[#9039](https://github.com/ClickHouse/ClickHouse/pull/9039) [(abyss7)](https://github.com/abyss7) +* Fix order of ranges while reading from MergeTree table in one thread. Fixes [#8964](https://github.com/ClickHouse/ClickHouse/issues/8964). +[#9050](https://github.com/ClickHouse/ClickHouse/pull/9050) [(CurtizJ)](https://github.com/CurtizJ) +* Fix possible segfault in `MergeTreeRangeReader`, while executing `PREWHERE`. Fixes [#9064](https://github.com/ClickHouse/ClickHouse/issues/9064). +[#9106](https://github.com/ClickHouse/ClickHouse/pull/9106) [(CurtizJ)](https://github.com/CurtizJ) +* Fix `reinterpretAsFixedString` to return `FixedString` instead of `String`. +[#9052](https://github.com/ClickHouse/ClickHouse/pull/9052) [(oandrew)](https://github.com/oandrew) +* Fix `joinGet` with nullable return types. Fixes [#8919](https://github.com/ClickHouse/ClickHouse/issues/8919) +[#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) [(amosbird)](https://github.com/amosbird) +* Fix fuzz test and incorrect behaviour of bitTestAll/bitTestAny functions. +[#9143](https://github.com/ClickHouse/ClickHouse/pull/9143) [(alexey-milovidov)](https://github.com/alexey-milovidov) +* Fix the behaviour of match and extract functions when haystack has zero bytes. The behaviour was wrong when haystack was constant. Fixes [#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) +[#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) [(alexey-milovidov)](https://github.com/alexey-milovidov) +* Fixed execution of inversed predicates when non-strictly monotinic functional index is used. Fixes [#9034](https://github.com/ClickHouse/ClickHouse/issues/9034) +[#9223](https://github.com/ClickHouse/ClickHouse/pull/9223) [(Akazz)](https://github.com/Akazz) +* Allow to rewrite `CROSS` to `INNER JOIN` if there's `[NOT] LIKE` operator in `WHERE` section. Fixes [#9191](https://github.com/ClickHouse/ClickHouse/issues/9191) +[#9229](https://github.com/ClickHouse/ClickHouse/pull/9229) [(4ertus2)](https://github.com/4ertus2) +* Allow first column(s) in a table with Log engine be an alias. +[#9231](https://github.com/ClickHouse/ClickHouse/pull/9231) [(abyss7)](https://github.com/abyss7) +* Allow comma join with `IN()` inside. Fixes [#7314](https://github.com/ClickHouse/ClickHouse/issues/7314). +[#9251](https://github.com/ClickHouse/ClickHouse/pull/9251) [(4ertus2)](https://github.com/4ertus2) +* Improve `ALTER MODIFY/ADD` queries logic. Now you cannot `ADD` column without type, `MODIFY` default expression doesn't change type of column and `MODIFY` type doesn't loose default expression value. Fixes [#8669](https://github.com/ClickHouse/ClickHouse/issues/8669). +[#9227](https://github.com/ClickHouse/ClickHouse/pull/9227) [(alesapin)](https://github.com/alesapin) +* Fix mutations finalization, when already done mutation can have status is_done=0. +[#9217](https://github.com/ClickHouse/ClickHouse/pull/9217) [(alesapin)](https://github.com/alesapin) +* Support "Processors" pipeline for system.numbers and system.numbers_mt. This also fixes the bug when `max_execution_time` is not respected. +[#7796](https://github.com/ClickHouse/ClickHouse/pull/7796) [(KochetovNicolai)](https://github.com/KochetovNicolai) +* Fix wrong counting of `DictCacheKeysRequestedFound` metric. +[#9411](https://github.com/ClickHouse/ClickHouse/pull/9411) [(nikitamikhaylov)](https://github.com/nikitamikhaylov) +* Added a check for storage policy in `ATTACH PARTITION FROM`, `REPLACE PARTITION`, `MOVE TO TABLE` which otherwise could make data of part inaccessible after restart and prevent ClickHouse to start. +[#9383](https://github.com/ClickHouse/ClickHouse/pull/9383) [(excitoon)](https://github.com/excitoon) +* Fixed UBSan report in `MergeTreeIndexSet`. This fixes [#9250](https://github.com/ClickHouse/ClickHouse/issues/9250) +[#9365](https://github.com/ClickHouse/ClickHouse/pull/9365) [(alexey-milovidov)](https://github.com/alexey-milovidov) +* Fix possible datarace in BlockIO. +[#9356](https://github.com/ClickHouse/ClickHouse/pull/9356) [(KochetovNicolai)](https://github.com/KochetovNicolai) +* Support for `UInt64` numbers that don't fit in Int64 in JSON-related functions. Update `SIMDJSON` to master. This fixes [#9209](https://github.com/ClickHouse/ClickHouse/issues/9209) +[#9344](https://github.com/ClickHouse/ClickHouse/pull/9344) [(alexey-milovidov)](https://github.com/alexey-milovidov) +* Fix the issue when the amount of free space is not calculated correctly if the data directory is mounted to a separate device. For default disk calculate the free space from data subdirectory. This fixes [#7441](https://github.com/ClickHouse/ClickHouse/issues/7441) +[#9257](https://github.com/ClickHouse/ClickHouse/pull/9257) [(millb)](https://github.com/millb) +* Fix the issue when TLS connections may fail with the message `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error and SSL Exception: error:2400006E:random number generator::error retrieving entropy.` Update OpenSSL to upstream master. +[#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) [(alexey-milovidov)](https://github.com/alexey-milovidov) +* When executing `CREATE` query, fold constant expressions in storage engine arguments. Replace empty database name with current database. Fixes [#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Also fix check for local address in ClickHouseDictionarySource. +[#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) +* Fix segfault in `StorageMerge`, which can happen when reading from StorageFile. +[#9387](https://github.com/ClickHouse/ClickHouse/pull/9387) [(tabplubix)](https://github.com/tavplubix) +* Prevent losing data in `Kafka` in rare cases when exception happens after reading suffix but before commit. Fixes [#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Related: [#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) +[#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(filimonov)](https://github.com/filimonov) +* Fix bug leading to server termination when trying to use / drop `Kafka` table created with wrong parameters. Fixes [#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Incorporates [#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). +[#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(filimonov)](https://github.com/filimonov) + +#### New Feature +* Add `deduplicate_blocks_in_dependent_materialized_views` option to control the behaviour of idempotent inserts into tables with materialized views. This new feature was added to the bugfix release by a special request from Altinity. +[#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) + +### ClickHouse release v20.1.2.4, 2020-01-22 + +#### Backward Incompatible Change +* Make the setting `merge_tree_uniform_read_distribution` obsolete. The server still recognizes this setting but it has no effect. [#8308](https://github.com/ClickHouse/ClickHouse/pull/8308) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Changed return type of the function `greatCircleDistance` to `Float32` because now the result of calculation is `Float32`. [#7993](https://github.com/ClickHouse/ClickHouse/pull/7993) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now it's expected that query parameters are represented in "escaped" format. For example, to pass string `ab` you have to write `a\tb` or `a\b` and respectively, `a%5Ctb` or `a%5C%09b` in URL. This is needed to add the possibility to pass NULL as `\N`. This fixes [#7488](https://github.com/ClickHouse/ClickHouse/issues/7488). [#8517](https://github.com/ClickHouse/ClickHouse/pull/8517) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Enable `use_minimalistic_part_header_in_zookeeper` setting for `ReplicatedMergeTree` by default. This will significantly reduce amount of data stored in ZooKeeper. This setting is supported since version 19.1 and we already use it in production in multiple services without any issues for more than half a year. Disable this setting if you have a chance to downgrade to versions older than 19.1. [#6850](https://github.com/ClickHouse/ClickHouse/pull/6850) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Data skipping indices are production ready and enabled by default. The settings `allow_experimental_data_skipping_indices`, `allow_experimental_cross_to_join_conversion` and `allow_experimental_multiple_joins_emulation` are now obsolete and do nothing. [#7974](https://github.com/ClickHouse/ClickHouse/pull/7974) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add new `ANY JOIN` logic for `StorageJoin` consistent with `JOIN` operation. To upgrade without changes in behaviour you need add `SETTINGS any_join_distinct_right_table_keys = 1` to Engine Join tables metadata or recreate these tables after upgrade. [#8400](https://github.com/ClickHouse/ClickHouse/pull/8400) ([Artem Zuikov](https://github.com/4ertus2)) +* Require server to be restarted to apply the changes in logging configuration. This is a temporary workaround to avoid the bug where the server logs to a deleted log file (see [#8696](https://github.com/ClickHouse/ClickHouse/issues/8696)). [#8707](https://github.com/ClickHouse/ClickHouse/pull/8707) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +#### New Feature +* Added information about part paths to `system.merges`. [#8043](https://github.com/ClickHouse/ClickHouse/pull/8043) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Add ability to execute `SYSTEM RELOAD DICTIONARY` query in `ON CLUSTER` mode. [#8288](https://github.com/ClickHouse/ClickHouse/pull/8288) ([Guillaume Tassery](https://github.com/YiuRULE)) +* Add ability to execute `CREATE DICTIONARY` queries in `ON CLUSTER` mode. [#8163](https://github.com/ClickHouse/ClickHouse/pull/8163) ([alesapin](https://github.com/alesapin)) +* Now user's profile in `users.xml` can inherit multiple profiles. [#8343](https://github.com/ClickHouse/ClickHouse/pull/8343) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +* Added `system.stack_trace` table that allows to look at stack traces of all server threads. This is useful for developers to introspect server state. This fixes [#7576](https://github.com/ClickHouse/ClickHouse/issues/7576). [#8344](https://github.com/ClickHouse/ClickHouse/pull/8344) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add `DateTime64` datatype with configurable sub-second precision. [#7170](https://github.com/ClickHouse/ClickHouse/pull/7170) ([Vasily Nemkov](https://github.com/Enmk)) +* Add table function `clusterAllReplicas` which allows to query all the nodes in the cluster. [#8493](https://github.com/ClickHouse/ClickHouse/pull/8493) ([kiran sunkari](https://github.com/kiransunkari)) +* Add aggregate function `categoricalInformationValue` which calculates the information value of a discrete feature. [#8117](https://github.com/ClickHouse/ClickHouse/pull/8117) ([hcz](https://github.com/hczhcz)) +* Speed up parsing of data files in `CSV`, `TSV` and `JSONEachRow` format by doing it in parallel. [#7780](https://github.com/ClickHouse/ClickHouse/pull/7780) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Add function `bankerRound` which performs banker's rounding. [#8112](https://github.com/ClickHouse/ClickHouse/pull/8112) ([hcz](https://github.com/hczhcz)) +* Support more languages in embedded dictionary for region names: 'ru', 'en', 'ua', 'uk', 'by', 'kz', 'tr', 'de', 'uz', 'lv', 'lt', 'et', 'pt', 'he', 'vi'. [#8189](https://github.com/ClickHouse/ClickHouse/pull/8189) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improvements in consistency of `ANY JOIN` logic. Now `t1 ANY LEFT JOIN t2` equals `t2 ANY RIGHT JOIN t1`. [#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +* Add setting `any_join_distinct_right_table_keys` which enables old behaviour for `ANY INNER JOIN`. [#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +* Add new `SEMI` and `ANTI JOIN`. Old `ANY INNER JOIN` behaviour now available as `SEMI LEFT JOIN`. [#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +* Added `Distributed` format for `File` engine and `file` table function which allows to read from `.bin` files generated by asynchronous inserts into `Distributed` table. [#8535](https://github.com/ClickHouse/ClickHouse/pull/8535) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Add optional reset column argument for `runningAccumulate` which allows to reset aggregation results for each new key value. [#8326](https://github.com/ClickHouse/ClickHouse/pull/8326) ([Sergey Kononenko](https://github.com/kononencheg)) +* Add ability to use ClickHouse as Prometheus endpoint. [#7900](https://github.com/ClickHouse/ClickHouse/pull/7900) ([vdimir](https://github.com/Vdimir)) +* Add section `` in `config.xml` which restricts allowed hosts for remote table engines and table functions `URL`, `S3`, `HDFS`. [#7154](https://github.com/ClickHouse/ClickHouse/pull/7154) ([Mikhail Korotov](https://github.com/millb)) +* Added function `greatCircleAngle` which calculates the distance on a sphere in degrees. [#8105](https://github.com/ClickHouse/ClickHouse/pull/8105) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Changed Earth radius to be consistent with H3 library. [#8105](https://github.com/ClickHouse/ClickHouse/pull/8105) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added `JSONCompactEachRow` and `JSONCompactEachRowWithNamesAndTypes` formats for input and output. [#7841](https://github.com/ClickHouse/ClickHouse/pull/7841) ([Mikhail Korotov](https://github.com/millb)) +* Added feature for file-related table engines and table functions (`File`, `S3`, `URL`, `HDFS`) which allows to read and write `gzip` files based on additional engine parameter or file extension. [#7840](https://github.com/ClickHouse/ClickHouse/pull/7840) ([Andrey Bodrov](https://github.com/apbodrov)) +* Added the `randomASCII(length)` function, generating a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [#8401](https://github.com/ClickHouse/ClickHouse/pull/8401) ([BayoNet](https://github.com/BayoNet)) +* Added function `JSONExtractArrayRaw` which returns an array on unparsed json array elements from `JSON` string. [#8081](https://github.com/ClickHouse/ClickHouse/pull/8081) ([Oleg Matrokhin](https://github.com/errx)) +* Add `arrayZip` function which allows to combine multiple arrays of equal lengths into one array of tuples. [#8149](https://github.com/ClickHouse/ClickHouse/pull/8149) ([Winter Zhang](https://github.com/zhang2014)) +* Add ability to move data between disks according to configured `TTL`-expressions for `*MergeTree` table engines family. [#8140](https://github.com/ClickHouse/ClickHouse/pull/8140) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Added new aggregate function `avgWeighted` which allows to calculate weighted average. [#7898](https://github.com/ClickHouse/ClickHouse/pull/7898) ([Andrey Bodrov](https://github.com/apbodrov)) +* Now parallel parsing is enabled by default for `TSV`, `TSKV`, `CSV` and `JSONEachRow` formats. [#7894](https://github.com/ClickHouse/ClickHouse/pull/7894) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Add several geo functions from `H3` library: `h3GetResolution`, `h3EdgeAngle`, `h3EdgeLength`, `h3IsValid` and `h3kRing`. [#8034](https://github.com/ClickHouse/ClickHouse/pull/8034) ([Konstantin Malanchev](https://github.com/hombit)) +* Added support for brotli (`br`) compression in file-related storages and table functions. This fixes [#8156](https://github.com/ClickHouse/ClickHouse/issues/8156). [#8526](https://github.com/ClickHouse/ClickHouse/pull/8526) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add `groupBit*` functions for the `SimpleAggregationFunction` type. [#8485](https://github.com/ClickHouse/ClickHouse/pull/8485) ([Guillaume Tassery](https://github.com/YiuRULE)) + +#### Bug Fix +* Fix rename of tables with `Distributed` engine. Fixes issue [#7868](https://github.com/ClickHouse/ClickHouse/issues/7868). [#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) +* Now dictionaries support `EXPRESSION` for attributes in arbitrary string in non-ClickHouse SQL dialect. [#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([alesapin](https://github.com/alesapin)) +* Fix broken `INSERT SELECT FROM mysql(...)` query. This fixes [#8070](https://github.com/ClickHouse/ClickHouse/issues/8070) and [#7960](https://github.com/ClickHouse/ClickHouse/issues/7960). [#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) +* Fix error "Mismatch column sizes" when inserting default `Tuple` from `JSONEachRow`. This fixes [#5653](https://github.com/ClickHouse/ClickHouse/issues/5653). [#8606](https://github.com/ClickHouse/ClickHouse/pull/8606) ([tavplubix](https://github.com/tavplubix)) +* Now an exception will be thrown in case of using `WITH TIES` alongside `LIMIT BY`. Also add ability to use `TOP` with `LIMIT BY`. This fixes [#7472](https://github.com/ClickHouse/ClickHouse/issues/7472). [#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix unintendent dependency from fresh glibc version in `clickhouse-odbc-bridge` binary. [#8046](https://github.com/ClickHouse/ClickHouse/pull/8046) ([Amos Bird](https://github.com/amosbird)) +* Fix bug in check function of `*MergeTree` engines family. Now it doesn't fail in case when we have equal amount of rows in last granule and last mark (non-final). [#8047](https://github.com/ClickHouse/ClickHouse/pull/8047) ([alesapin](https://github.com/alesapin)) +* Fix insert into `Enum*` columns after `ALTER` query, when underlying numeric type is equal to table specified type. This fixes [#7836](https://github.com/ClickHouse/ClickHouse/issues/7836). [#7908](https://github.com/ClickHouse/ClickHouse/pull/7908) ([Anton Popov](https://github.com/CurtizJ)) +* Allowed non-constant negative "size" argument for function `substring`. It was not allowed by mistake. This fixes [#4832](https://github.com/ClickHouse/ClickHouse/issues/4832). [#7703](https://github.com/ClickHouse/ClickHouse/pull/7703) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix parsing bug when wrong number of arguments passed to `(O|J)DBC` table engine. [#7709](https://github.com/ClickHouse/ClickHouse/pull/7709) ([alesapin](https://github.com/alesapin)) +* Using command name of the running clickhouse process when sending logs to syslog. In previous versions, empty string was used instead of command name. [#8460](https://github.com/ClickHouse/ClickHouse/pull/8460) ([Michael Nacharov](https://github.com/mnach)) +* Fix check of allowed hosts for `localhost`. This PR fixes the solution provided in [#8241](https://github.com/ClickHouse/ClickHouse/pull/8241). [#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix rare crash in `argMin` and `argMax` functions for long string arguments, when result is used in `runningAccumulate` function. This fixes [#8325](https://github.com/ClickHouse/ClickHouse/issues/8325) [#8341](https://github.com/ClickHouse/ClickHouse/pull/8341) ([dinosaur](https://github.com/769344359)) +* Fix memory overcommit for tables with `Buffer` engine. [#8345](https://github.com/ClickHouse/ClickHouse/pull/8345) ([Azat Khuzhin](https://github.com/azat)) +* Fixed potential bug in functions that can take `NULL` as one of the arguments and return non-NULL. [#8196](https://github.com/ClickHouse/ClickHouse/pull/8196) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better metrics calculations in thread pool for background processes for `MergeTree` table engines. [#8194](https://github.com/ClickHouse/ClickHouse/pull/8194) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix function `IN` inside `WHERE` statement when row-level table filter is present. Fixes [#6687](https://github.com/ClickHouse/ClickHouse/issues/6687) [#8357](https://github.com/ClickHouse/ClickHouse/pull/8357) ([Ivan](https://github.com/abyss7)) +* Now an exception is thrown if the integral value is not parsed completely for settings values. [#7678](https://github.com/ClickHouse/ClickHouse/pull/7678) ([Mikhail Korotov](https://github.com/millb)) +* Fix exception when aggregate function is used in query to distributed table with more than two local shards. [#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) +* Now bloom filter can handle zero length arrays and doesn't perform redundant calculations. [#8242](https://github.com/ClickHouse/ClickHouse/pull/8242) ([achimbab](https://github.com/achimbab)) +* Fixed checking if a client host is allowed by matching the client host to `host_regexp` specified in `users.xml`. [#8241](https://github.com/ClickHouse/ClickHouse/pull/8241) ([Vitaly Baranov](https://github.com/vitlibar)) +* Relax ambiguous column check that leads to false positives in multiple `JOIN ON` section. [#8385](https://github.com/ClickHouse/ClickHouse/pull/8385) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed possible server crash (`std::terminate`) when the server cannot send or write data in `JSON` or `XML` format with values of `String` data type (that require `UTF-8` validation) or when compressing result data with Brotli algorithm or in some other rare cases. This fixes [#7603](https://github.com/ClickHouse/ClickHouse/issues/7603) [#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix race condition in `StorageDistributedDirectoryMonitor` found by CI. This fixes [#8364](https://github.com/ClickHouse/ClickHouse/issues/8364). [#8383](https://github.com/ClickHouse/ClickHouse/pull/8383) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Now background merges in `*MergeTree` table engines family preserve storage policy volume order more accurately. [#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Now table engine `Kafka` works properly with `Native` format. This fixes [#6731](https://github.com/ClickHouse/ClickHouse/issues/6731) [#7337](https://github.com/ClickHouse/ClickHouse/issues/7337) [#8003](https://github.com/ClickHouse/ClickHouse/issues/8003). [#8016](https://github.com/ClickHouse/ClickHouse/pull/8016) ([filimonov](https://github.com/filimonov)) +* Fixed formats with headers (like `CSVWithNames`) which were throwing exception about EOF for table engine `Kafka`. [#8016](https://github.com/ClickHouse/ClickHouse/pull/8016) ([filimonov](https://github.com/filimonov)) +* Fixed a bug with making set from subquery in right part of `IN` section. This fixes [#5767](https://github.com/ClickHouse/ClickHouse/issues/5767) and [#2542](https://github.com/ClickHouse/ClickHouse/issues/2542). [#7755](https://github.com/ClickHouse/ClickHouse/pull/7755) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix possible crash while reading from storage `File`. [#7756](https://github.com/ClickHouse/ClickHouse/pull/7756) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed reading of the files in `Parquet` format containing columns of type `list`. [#8334](https://github.com/ClickHouse/ClickHouse/pull/8334) ([maxulan](https://github.com/maxulan)) +* Fix error `Not found column` for distributed queries with `PREWHERE` condition dependent on sampling key if `max_parallel_replicas > 1`. [#7913](https://github.com/ClickHouse/ClickHouse/pull/7913) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix error `Not found column` if query used `PREWHERE` dependent on table's alias and the result set was empty because of primary key condition. [#7911](https://github.com/ClickHouse/ClickHouse/pull/7911) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed return type for functions `rand` and `randConstant` in case of `Nullable` argument. Now functions always return `UInt32` and never `Nullable(UInt32)`. [#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Disabled predicate push-down for `WITH FILL` expression. This fixes [#7784](https://github.com/ClickHouse/ClickHouse/issues/7784). [#7789](https://github.com/ClickHouse/ClickHouse/pull/7789) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed incorrect `count()` result for `SummingMergeTree` when `FINAL` section is used. [#3280](https://github.com/ClickHouse/ClickHouse/issues/3280) [#7786](https://github.com/ClickHouse/ClickHouse/pull/7786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Fix possible incorrect result for constant functions from remote servers. It happened for queries with functions like `version()`, `uptime()`, etc. which returns different constant values for different servers. This fixes [#7666](https://github.com/ClickHouse/ClickHouse/issues/7666). [#7689](https://github.com/ClickHouse/ClickHouse/pull/7689) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix complicated bug in push-down predicate optimization which leads to wrong results. This fixes a lot of issues on push-down predicate optimization. [#8503](https://github.com/ClickHouse/ClickHouse/pull/8503) ([Winter Zhang](https://github.com/zhang2014)) +* Fix crash in `CREATE TABLE .. AS dictionary` query. [#8508](https://github.com/ClickHouse/ClickHouse/pull/8508) ([Azat Khuzhin](https://github.com/azat)) +* Several improvements ClickHouse grammar in `.g4` file. [#8294](https://github.com/ClickHouse/ClickHouse/pull/8294) ([taiyang-li](https://github.com/taiyang-li)) +* Fix bug that leads to crashes in `JOIN`s with tables with engine `Join`. This fixes [#7556](https://github.com/ClickHouse/ClickHouse/issues/7556) [#8254](https://github.com/ClickHouse/ClickHouse/issues/8254) [#7915](https://github.com/ClickHouse/ClickHouse/issues/7915) [#8100](https://github.com/ClickHouse/ClickHouse/issues/8100). [#8298](https://github.com/ClickHouse/ClickHouse/pull/8298) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix redundant dictionaries reload on `CREATE DATABASE`. [#7916](https://github.com/ClickHouse/ClickHouse/pull/7916) ([Azat Khuzhin](https://github.com/azat)) +* Limit maximum number of streams for read from `StorageFile` and `StorageHDFS`. Fixes https://github.com/ClickHouse/ClickHouse/issues/7650. [#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin)) +* Fix bug in `ALTER ... MODIFY ... CODEC` query, when user specify both default expression and codec. Fixes [8593](https://github.com/ClickHouse/ClickHouse/issues/8593). [#8614](https://github.com/ClickHouse/ClickHouse/pull/8614) ([alesapin](https://github.com/alesapin)) +* Fix error in background merge of columns with `SimpleAggregateFunction(LowCardinality)` type. [#8613](https://github.com/ClickHouse/ClickHouse/pull/8613) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fixed type check in function `toDateTime64`. [#8375](https://github.com/ClickHouse/ClickHouse/pull/8375) ([Vasily Nemkov](https://github.com/Enmk)) +* Now server do not crash on `LEFT` or `FULL JOIN` with and Join engine and unsupported `join_use_nulls` settings. [#8479](https://github.com/ClickHouse/ClickHouse/pull/8479) ([Artem Zuikov](https://github.com/4ertus2)) +* Now `DROP DICTIONARY IF EXISTS db.dict` query doesn't throw exception if `db` doesn't exist. [#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Vitaly Baranov](https://github.com/vitlibar)) +* Fix possible crashes in table functions (`file`, `mysql`, `remote`) caused by usage of reference to removed `IStorage` object. Fix incorrect parsing of columns specified at insertion into table function. [#7762](https://github.com/ClickHouse/ClickHouse/pull/7762) ([tavplubix](https://github.com/tavplubix)) +* Ensure network be up before starting `clickhouse-server`. This fixes [#7507](https://github.com/ClickHouse/ClickHouse/issues/7507). [#8570](https://github.com/ClickHouse/ClickHouse/pull/8570) ([Zhichang Yu](https://github.com/yuzhichang)) +* Fix timeouts handling for secure connections, so queries doesn't hang indefenitely. This fixes [#8126](https://github.com/ClickHouse/ClickHouse/issues/8126). [#8128](https://github.com/ClickHouse/ClickHouse/pull/8128) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `clickhouse-copier`'s redundant contention between concurrent workers. [#7816](https://github.com/ClickHouse/ClickHouse/pull/7816) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) +* Now mutations doesn't skip attached parts, even if their mutation version were larger than current mutation version. [#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Yu](https://github.com/yuzhichang)) [#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([alesapin](https://github.com/alesapin)) +* Ignore redundant copies of `*MergeTree` data parts after move to another disk and server restart. [#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix crash in `FULL JOIN` with `LowCardinality` in `JOIN` key. [#8252](https://github.com/ClickHouse/ClickHouse/pull/8252) ([Artem Zuikov](https://github.com/4ertus2)) +* Forbidden to use column name more than once in insert query like `INSERT INTO tbl (x, y, x)`. This fixes [#5465](https://github.com/ClickHouse/ClickHouse/issues/5465), [#7681](https://github.com/ClickHouse/ClickHouse/issues/7681). [#7685](https://github.com/ClickHouse/ClickHouse/pull/7685) ([alesapin](https://github.com/alesapin)) +* Added fallback for detection the number of physical CPU cores for unknown CPUs (using the number of logical CPU cores). This fixes [#5239](https://github.com/ClickHouse/ClickHouse/issues/5239). [#7726](https://github.com/ClickHouse/ClickHouse/pull/7726) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `There's no column` error for materialized and alias columns. [#8210](https://github.com/ClickHouse/ClickHouse/pull/8210) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed sever crash when `EXISTS` query was used without `TABLE` or `DICTIONARY` qualifier. Just like `EXISTS t`. This fixes [#8172](https://github.com/ClickHouse/ClickHouse/issues/8172). This bug was introduced in version 19.17. [#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix rare bug with error `"Sizes of columns doesn't match"` that might appear when using `SimpleAggregateFunction` column. [#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) +* Fix bug where user with empty `allow_databases` got access to all databases (and same for `allow_dictionaries`). [#7793](https://github.com/ClickHouse/ClickHouse/pull/7793) ([DeifyTheGod](https://github.com/DeifyTheGod)) +* Fix client crash when server already disconnected from client. [#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) +* Fix `ORDER BY` behaviour in case of sorting by primary key prefix and non primary key suffix. [#7759](https://github.com/ClickHouse/ClickHouse/pull/7759) ([Anton Popov](https://github.com/CurtizJ)) +* Check if qualified column present in the table. This fixes [#6836](https://github.com/ClickHouse/ClickHouse/issues/6836). [#7758](https://github.com/ClickHouse/ClickHouse/pull/7758) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed behavior with `ALTER MOVE` ran immediately after merge finish moves superpart of specified. Fixes [#8103](https://github.com/ClickHouse/ClickHouse/issues/8103). [#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix possible server crash while using `UNION` with different number of columns. Fixes [#7279](https://github.com/ClickHouse/ClickHouse/issues/7279). [#7929](https://github.com/ClickHouse/ClickHouse/pull/7929) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix size of result substring for function `substr` with negative size. [#8589](https://github.com/ClickHouse/ClickHouse/pull/8589) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Now server does not execute part mutation in `MergeTree` if there are not enough free threads in background pool. [#8588](https://github.com/ClickHouse/ClickHouse/pull/8588) ([tavplubix](https://github.com/tavplubix)) +* Fix a minor typo on formatting `UNION ALL` AST. [#7999](https://github.com/ClickHouse/ClickHouse/pull/7999) ([litao91](https://github.com/litao91)) +* Fixed incorrect bloom filter results for negative numbers. This fixes [#8317](https://github.com/ClickHouse/ClickHouse/issues/8317). [#8566](https://github.com/ClickHouse/ClickHouse/pull/8566) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed potential buffer overflow in decompress. Malicious user can pass fabricated compressed data that will cause read after buffer. This issue was found by Eldar Zaitov from Yandex information security team. [#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix incorrect result because of integers overflow in `arrayIntersect`. [#7777](https://github.com/ClickHouse/ClickHouse/pull/7777) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Now `OPTIMIZE TABLE` query will not wait for offline replicas to perform the operation. [#8314](https://github.com/ClickHouse/ClickHouse/pull/8314) ([javi santana](https://github.com/javisantana)) +* Fixed `ALTER TTL` parser for `Replicated*MergeTree` tables. [#8318](https://github.com/ClickHouse/ClickHouse/pull/8318) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix communication between server and client, so server read temporary tables info after query failure. [#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) +* Fix `bitmapAnd` function error when intersecting an aggregated bitmap and a scalar bitmap. [#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Yue Huang](https://github.com/moon03432)) +* Refine the definition of `ZXid` according to the ZooKeeper Programmer's Guide which fixes bug in `clickhouse-cluster-copier`. [#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) +* `odbc` table function now respects `external_table_functions_use_nulls` setting. [#7506](https://github.com/ClickHouse/ClickHouse/pull/7506) ([Vasily Nemkov](https://github.com/Enmk)) +* Fixed bug that lead to a rare data race. [#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Alexander Kazakov](https://github.com/Akazz)) +* Now `SYSTEM RELOAD DICTIONARY` reloads a dictionary completely, ignoring `update_field`. This fixes [#7440](https://github.com/ClickHouse/ClickHouse/issues/7440). [#8037](https://github.com/ClickHouse/ClickHouse/pull/8037) ([Vitaly Baranov](https://github.com/vitlibar)) +* Add ability to check if dictionary exists in create query. [#8032](https://github.com/ClickHouse/ClickHouse/pull/8032) ([alesapin](https://github.com/alesapin)) +* Fix `Float*` parsing in `Values` format. This fixes [#7817](https://github.com/ClickHouse/ClickHouse/issues/7817). [#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) +* Fix crash when we cannot reserve space in some background operations of `*MergeTree` table engines family. [#7873](https://github.com/ClickHouse/ClickHouse/pull/7873) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix crash of merge operation when table contains `SimpleAggregateFunction(LowCardinality)` column. This fixes [#8515](https://github.com/ClickHouse/ClickHouse/issues/8515). [#8522](https://github.com/ClickHouse/ClickHouse/pull/8522) ([Azat Khuzhin](https://github.com/azat)) +* Restore support of all ICU locales and add the ability to apply collations for constant expressions. Also add language name to `system.collations` table. [#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([alesapin](https://github.com/alesapin)) +* Fix bug when external dictionaries with zero minimal lifetime (`LIFETIME(MIN 0 MAX N)`, `LIFETIME(N)`) don't update in background. [#7983](https://github.com/ClickHouse/ClickHouse/pull/7983) ([alesapin](https://github.com/alesapin)) +* Fix crash when external dictionary with ClickHouse source has subquery in query. [#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Fix incorrect parsing of file extension in table with engine `URL`. This fixes [#8157](https://github.com/ClickHouse/ClickHouse/issues/8157). [#8419](https://github.com/ClickHouse/ClickHouse/pull/8419) ([Andrey Bodrov](https://github.com/apbodrov)) +* Fix `CHECK TABLE` query for `*MergeTree` tables without key. Fixes [#7543](https://github.com/ClickHouse/ClickHouse/issues/7543). [#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([alesapin](https://github.com/alesapin)) +* Fixed conversion of `Float64` to MySQL type. [#8079](https://github.com/ClickHouse/ClickHouse/pull/8079) ([Yuriy Baranov](https://github.com/yurriy)) +* Now if table was not completely dropped because of server crash, server will try to restore and load it. [#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) +* Fixed crash in table function `file` while inserting into file that doesn't exist. Now in this case file would be created and then insert would be processed. [#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) +* Fix rare deadlock which can happen when `trace_log` is in enabled. [#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) +* Add ability to work with different types besides `Date` in `RangeHashed` external dictionary created from DDL query. Fixes [7899](https://github.com/ClickHouse/ClickHouse/issues/7899). [#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([alesapin](https://github.com/alesapin)) +* Fixes crash when `now64()` is called with result of another function. [#8270](https://github.com/ClickHouse/ClickHouse/pull/8270) ([Vasily Nemkov](https://github.com/Enmk)) +* Fixed bug with detecting client IP for connections through mysql wire protocol. [#7743](https://github.com/ClickHouse/ClickHouse/pull/7743) ([Dmitry Muzyka](https://github.com/dmitriy-myz)) +* Fix empty array handling in `arraySplit` function. This fixes [#7708](https://github.com/ClickHouse/ClickHouse/issues/7708). [#7747](https://github.com/ClickHouse/ClickHouse/pull/7747) ([hcz](https://github.com/hczhcz)) +* Fixed the issue when `pid-file` of another running `clickhouse-server` may be deleted. [#8487](https://github.com/ClickHouse/ClickHouse/pull/8487) ([Weiqing Xu](https://github.com/weiqxu)) +* Fix dictionary reload if it has `invalidate_query`, which stopped updates and some exception on previous update tries. [#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([alesapin](https://github.com/alesapin)) +* Fixed error in function `arrayReduce` that may lead to "double free" and error in aggregate function combinator `Resample` that may lead to memory leak. Added aggregate function `aggThrow`. This function can be used for testing purposes. [#8446](https://github.com/ClickHouse/ClickHouse/pull/8446) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Improvement +* Improved logging when working with `S3` table engine. [#8251](https://github.com/ClickHouse/ClickHouse/pull/8251) ([Grigory Pervakov](https://github.com/GrigoryPervakov)) +* Printed help message when no arguments are passed when calling `clickhouse-local`. This fixes [#5335](https://github.com/ClickHouse/ClickHouse/issues/5335). [#8230](https://github.com/ClickHouse/ClickHouse/pull/8230) ([Andrey Nagorny](https://github.com/Melancholic)) +* Add setting `mutations_sync` which allows to wait `ALTER UPDATE/DELETE` queries synchronously. [#8237](https://github.com/ClickHouse/ClickHouse/pull/8237) ([alesapin](https://github.com/alesapin)) +* Allow to set up relative `user_files_path` in `config.xml` (in the way similar to `format_schema_path`). [#7632](https://github.com/ClickHouse/ClickHouse/pull/7632) ([hcz](https://github.com/hczhcz)) +* Add exception for illegal types for conversion functions with `-OrZero` postfix. [#7880](https://github.com/ClickHouse/ClickHouse/pull/7880) ([Andrey Konyaev](https://github.com/akonyaev90)) +* Simplify format of the header of data sending to a shard in a distributed query. [#8044](https://github.com/ClickHouse/ClickHouse/pull/8044) ([Vitaly Baranov](https://github.com/vitlibar)) +* `Live View` table engine refactoring. [#8519](https://github.com/ClickHouse/ClickHouse/pull/8519) ([vzakaznikov](https://github.com/vzakaznikov)) +* Add additional checks for external dictionaries created from DDL-queries. [#8127](https://github.com/ClickHouse/ClickHouse/pull/8127) ([alesapin](https://github.com/alesapin)) +* Fix error `Column ... already exists` while using `FINAL` and `SAMPLE` together, e.g. `select count() from table final sample 1/2`. Fixes [#5186](https://github.com/ClickHouse/ClickHouse/issues/5186). [#7907](https://github.com/ClickHouse/ClickHouse/pull/7907) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Now table the first argument of `joinGet` function can be table indentifier. [#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([Amos Bird](https://github.com/amosbird)) +* Allow using `MaterializedView` with subqueries above `Kafka` tables. [#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) +* Now background moves between disks run it the seprate thread pool. [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) +* `SYSTEM RELOAD DICTIONARY` now executes synchronously. [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([Vitaly Baranov](https://github.com/vitlibar)) +* Stack traces now display physical addresses (offsets in object file) instead of virtual memory addresses (where the object file was loaded). That allows the use of `addr2line` when binary is position independent and ASLR is active. This fixes [#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Support new syntax for row-level security filters: `
`. Fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([Ivan](https://github.com/abyss7)) +* Now `cityHash` function can work with `Decimal` and `UUID` types. Fixes [#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([Mikhail Korotov](https://github.com/millb)) +* Removed fixed index granularity (it was 1024) from system logs because it's obsolete after implementation of adaptive granularity. [#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Enabled MySQL compatibility server when ClickHouse is compiled without SSL. [#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([Yuriy Baranov](https://github.com/yurriy)) +* Now server checksums distributed batches, which gives more verbose errors in case of corrupted data in batch. [#7914](https://github.com/ClickHouse/ClickHouse/pull/7914) ([Azat Khuzhin](https://github.com/azat)) +* Support `DROP DATABASE`, `DETACH TABLE`, `DROP TABLE` and `ATTACH TABLE` for `MySQL` database engine. [#8202](https://github.com/ClickHouse/ClickHouse/pull/8202) ([Winter Zhang](https://github.com/zhang2014)) +* Add authentication in S3 table function and table engine. [#7623](https://github.com/ClickHouse/ClickHouse/pull/7623) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Added check for extra parts of `MergeTree` at different disks, in order to not allow to miss data parts at undefined disks. [#8118](https://github.com/ClickHouse/ClickHouse/pull/8118) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Enable SSL support for Mac client and server. [#8297](https://github.com/ClickHouse/ClickHouse/pull/8297) ([Ivan](https://github.com/abyss7)) +* Now ClickHouse can work as MySQL federated server (see https://dev.mysql.com/doc/refman/5.7/en/federated-create-server.html). [#7717](https://github.com/ClickHouse/ClickHouse/pull/7717) ([Maxim Fedotov](https://github.com/MaxFedotov)) +* `clickhouse-client` now only enable `bracketed-paste` when multiquery is on and multiline is off. This fixes (#7757)[https://github.com/ClickHouse/ClickHouse/issues/7757]. [#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([Amos Bird](https://github.com/amosbird)) +* Support `Array(Decimal)` in `if` function. [#7721](https://github.com/ClickHouse/ClickHouse/pull/7721) ([Artem Zuikov](https://github.com/4ertus2)) +* Support Decimals in `arrayDifference`, `arrayCumSum` and `arrayCumSumNegative` functions. [#7724](https://github.com/ClickHouse/ClickHouse/pull/7724) ([Artem Zuikov](https://github.com/4ertus2)) +* Added `lifetime` column to `system.dictionaries` table. [#6820](https://github.com/ClickHouse/ClickHouse/issues/6820) [#7727](https://github.com/ClickHouse/ClickHouse/pull/7727) ([kekekekule](https://github.com/kekekekule)) +* Improved check for existing parts on different disks for `*MergeTree` table engines. Addresses [#7660](https://github.com/ClickHouse/ClickHouse/issues/7660). [#8440](https://github.com/ClickHouse/ClickHouse/pull/8440) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Integration with `AWS SDK` for `S3` interactions which allows to use all S3 features out of the box. [#8011](https://github.com/ClickHouse/ClickHouse/pull/8011) ([Pavel Kovalenko](https://github.com/Jokser)) +* Added support for subqueries in `Live View` tables. [#7792](https://github.com/ClickHouse/ClickHouse/pull/7792) ([vzakaznikov](https://github.com/vzakaznikov)) +* Check for using `Date` or `DateTime` column from `TTL` expressions was removed. [#7920](https://github.com/ClickHouse/ClickHouse/pull/7920) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Information about disk was added to `system.detached_parts` table. [#7833](https://github.com/ClickHouse/ClickHouse/pull/7833) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Now settings `max_(table|partition)_size_to_drop` can be changed without a restart. [#7779](https://github.com/ClickHouse/ClickHouse/pull/7779) ([Grigory Pervakov](https://github.com/GrigoryPervakov)) +* Slightly better usability of error messages. Ask user not to remove the lines below `Stack trace:`. [#7897](https://github.com/ClickHouse/ClickHouse/pull/7897) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better reading messages from `Kafka` engine in various formats after [#7935](https://github.com/ClickHouse/ClickHouse/issues/7935). [#8035](https://github.com/ClickHouse/ClickHouse/pull/8035) ([Ivan](https://github.com/abyss7)) +* Better compatibility with MySQL clients which don't support `sha2_password` auth plugin. [#8036](https://github.com/ClickHouse/ClickHouse/pull/8036) ([Yuriy Baranov](https://github.com/yurriy)) +* Support more column types in MySQL compatibility server. [#7975](https://github.com/ClickHouse/ClickHouse/pull/7975) ([Yuriy Baranov](https://github.com/yurriy)) +* Implement `ORDER BY` optimization for `Merge`, `Buffer` and `Materilized View` storages with underlying `MergeTree` tables. [#8130](https://github.com/ClickHouse/ClickHouse/pull/8130) ([Anton Popov](https://github.com/CurtizJ)) +* Now we always use POSIX implementation of `getrandom` to have better compatibility with old kernels (< 3.17). [#7940](https://github.com/ClickHouse/ClickHouse/pull/7940) ([Amos Bird](https://github.com/amosbird)) +* Better check for valid destination in a move TTL rule. [#8410](https://github.com/ClickHouse/ClickHouse/pull/8410) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Better checks for broken insert batches for `Distributed` table engine. [#7933](https://github.com/ClickHouse/ClickHouse/pull/7933) ([Azat Khuzhin](https://github.com/azat)) +* Add column with array of parts name which mutations must process in future to `system.mutations` table. [#8179](https://github.com/ClickHouse/ClickHouse/pull/8179) ([alesapin](https://github.com/alesapin)) +* Parallel merge sort optimization for processors. [#8552](https://github.com/ClickHouse/ClickHouse/pull/8552) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* The settings `mark_cache_min_lifetime` is now obsolete and does nothing. In previous versions, mark cache can grow in memory larger than `mark_cache_size` to accomodate data within `mark_cache_min_lifetime` seconds. That was leading to confusion and higher memory usage than expected, that is especially bad on memory constrained systems. If you will see performance degradation after installing this release, you should increase the `mark_cache_size`. [#8484](https://github.com/ClickHouse/ClickHouse/pull/8484) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Preparation to use `tid` everywhere. This is needed for [#7477](https://github.com/ClickHouse/ClickHouse/issues/7477). [#8276](https://github.com/ClickHouse/ClickHouse/pull/8276) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +#### Performance Improvement +* Performance optimizations in processors pipeline. [#7988](https://github.com/ClickHouse/ClickHouse/pull/7988) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Non-blocking updates of expired keys in cache dictionaries (with permission to read old ones). [#8303](https://github.com/ClickHouse/ClickHouse/pull/8303) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Compile ClickHouse without `-fno-omit-frame-pointer` globally to spare one more register. [#8097](https://github.com/ClickHouse/ClickHouse/pull/8097) ([Amos Bird](https://github.com/amosbird)) +* Speedup `greatCircleDistance` function and add performance tests for it. [#7307](https://github.com/ClickHouse/ClickHouse/pull/7307) ([Olga Khvostikova](https://github.com/stavrolia)) +* Improved performance of function `roundDown`. [#8465](https://github.com/ClickHouse/ClickHouse/pull/8465) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improved performance of `max`, `min`, `argMin`, `argMax` for `DateTime64` data type. [#8199](https://github.com/ClickHouse/ClickHouse/pull/8199) ([Vasily Nemkov](https://github.com/Enmk)) +* Improved performance of sorting without a limit or with big limit and external sorting. [#8545](https://github.com/ClickHouse/ClickHouse/pull/8545) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improved performance of formatting floating point numbers up to 6 times. [#8542](https://github.com/ClickHouse/ClickHouse/pull/8542) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improved performance of `modulo` function. [#7750](https://github.com/ClickHouse/ClickHouse/pull/7750) ([Amos Bird](https://github.com/amosbird)) +* Optimized `ORDER BY` and merging with single column key. [#8335](https://github.com/ClickHouse/ClickHouse/pull/8335) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Better implementation for `arrayReduce`, `-Array` and `-State` combinators. [#7710](https://github.com/ClickHouse/ClickHouse/pull/7710) ([Amos Bird](https://github.com/amosbird)) +* Now `PREWHERE` should be optimized to be at least as efficient as `WHERE`. [#7769](https://github.com/ClickHouse/ClickHouse/pull/7769) ([Amos Bird](https://github.com/amosbird)) +* Improve the way `round` and `roundBankers` handling negative numbers. [#8229](https://github.com/ClickHouse/ClickHouse/pull/8229) ([hcz](https://github.com/hczhcz)) +* Improved decoding performance of `DoubleDelta` and `Gorilla` codecs by roughly 30-40%. This fixes [#7082](https://github.com/ClickHouse/ClickHouse/issues/7082). [#8019](https://github.com/ClickHouse/ClickHouse/pull/8019) ([Vasily Nemkov](https://github.com/Enmk)) +* Improved performance of `base64` related functions. [#8444](https://github.com/ClickHouse/ClickHouse/pull/8444) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added a function `geoDistance`. It is similar to `greatCircleDistance` but uses approximation to WGS-84 ellipsoid model. The performance of both functions are near the same. [#8086](https://github.com/ClickHouse/ClickHouse/pull/8086) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Faster `min` and `max` aggregation functions for `Decimal` data type. [#8144](https://github.com/ClickHouse/ClickHouse/pull/8144) ([Artem Zuikov](https://github.com/4ertus2)) +* Vectorize processing `arrayReduce`. [#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Amos Bird](https://github.com/amosbird)) +* `if` chains are now optimized as `multiIf`. [#8355](https://github.com/ClickHouse/ClickHouse/pull/8355) ([kamalov-ruslan](https://github.com/kamalov-ruslan)) +* Fix performance regression of `Kafka` table engine introduced in 19.15. This fixes [#7261](https://github.com/ClickHouse/ClickHouse/issues/7261). [#7935](https://github.com/ClickHouse/ClickHouse/pull/7935) ([filimonov](https://github.com/filimonov)) +* Removed "pie" code generation that `gcc` from Debian packages occasionally brings by default. [#8483](https://github.com/ClickHouse/ClickHouse/pull/8483) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Parallel parsing data formats [#6553](https://github.com/ClickHouse/ClickHouse/pull/6553) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) +* Enable optimized parser of `Values` with expressions by default (`input_format_values_deduce_templates_of_expressions=1`). [#8231](https://github.com/ClickHouse/ClickHouse/pull/8231) ([tavplubix](https://github.com/tavplubix)) + +#### Build/Testing/Packaging Improvement +* Build fixes for `ARM` and in minimal mode. [#8304](https://github.com/ClickHouse/ClickHouse/pull/8304) ([proller](https://github.com/proller)) +* Add coverage file flush for `clickhouse-server` when std::atexit is not called. Also slightly improved logging in stateless tests with coverage. [#8267](https://github.com/ClickHouse/ClickHouse/pull/8267) ([alesapin](https://github.com/alesapin)) +* Update LLVM library in contrib. Avoid using LLVM from OS packages. [#8258](https://github.com/ClickHouse/ClickHouse/pull/8258) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Make bundled `curl` build fully quiet. [#8232](https://github.com/ClickHouse/ClickHouse/pull/8232) [#8203](https://github.com/ClickHouse/ClickHouse/pull/8203) ([Pavel Kovalenko](https://github.com/Jokser)) +* Fix some `MemorySanitizer` warnings. [#8235](https://github.com/ClickHouse/ClickHouse/pull/8235) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Use `add_warning` and `no_warning` macros in `CMakeLists.txt`. [#8604](https://github.com/ClickHouse/ClickHouse/pull/8604) ([Ivan](https://github.com/abyss7)) +* Add support of Minio S3 Compatible object (https://min.io/) for better integration tests. [#7863](https://github.com/ClickHouse/ClickHouse/pull/7863) [#7875](https://github.com/ClickHouse/ClickHouse/pull/7875) ([Pavel Kovalenko](https://github.com/Jokser)) +* Imported `libc` headers to contrib. It allows to make builds more consistent across various systems (only for `x86_64-linux-gnu`). [#5773](https://github.com/ClickHouse/ClickHouse/pull/5773) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Remove `-fPIC` from some libraries. [#8464](https://github.com/ClickHouse/ClickHouse/pull/8464) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Clean `CMakeLists.txt` for curl. See https://github.com/ClickHouse/ClickHouse/pull/8011#issuecomment-569478910 [#8459](https://github.com/ClickHouse/ClickHouse/pull/8459) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Silent warnings in `CapNProto` library. [#8220](https://github.com/ClickHouse/ClickHouse/pull/8220) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add performance tests for short string optimized hash tables. [#7679](https://github.com/ClickHouse/ClickHouse/pull/7679) ([Amos Bird](https://github.com/amosbird)) +* Now ClickHouse will build on `AArch64` even if `MADV_FREE` is not available. This fixes [#8027](https://github.com/ClickHouse/ClickHouse/issues/8027). [#8243](https://github.com/ClickHouse/ClickHouse/pull/8243) ([Amos Bird](https://github.com/amosbird)) +* Update `zlib-ng` to fix memory sanitizer problems. [#7182](https://github.com/ClickHouse/ClickHouse/pull/7182) [#8206](https://github.com/ClickHouse/ClickHouse/pull/8206) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Enable internal MySQL library on non-Linux system, because usage of OS packages is very fragile and usually doesn't work at all. This fixes [#5765](https://github.com/ClickHouse/ClickHouse/issues/5765). [#8426](https://github.com/ClickHouse/ClickHouse/pull/8426) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed build on some systems after enabling `libc++`. This supersedes [#8374](https://github.com/ClickHouse/ClickHouse/issues/8374). [#8380](https://github.com/ClickHouse/ClickHouse/pull/8380) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Make `Field` methods more type-safe to find more errors. [#7386](https://github.com/ClickHouse/ClickHouse/pull/7386) [#8209](https://github.com/ClickHouse/ClickHouse/pull/8209) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Added missing files to the `libc-headers` submodule. [#8507](https://github.com/ClickHouse/ClickHouse/pull/8507) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix wrong `JSON` quoting in performance test output. [#8497](https://github.com/ClickHouse/ClickHouse/pull/8497) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Now stack trace is displayed for `std::exception` and `Poco::Exception`. In previous versions it was available only for `DB::Exception`. This improves diagnostics. [#8501](https://github.com/ClickHouse/ClickHouse/pull/8501) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Porting `clock_gettime` and `clock_nanosleep` for fresh glibc versions. [#8054](https://github.com/ClickHouse/ClickHouse/pull/8054) ([Amos Bird](https://github.com/amosbird)) +* Enable `part_log` in example config for developers. [#8609](https://github.com/ClickHouse/ClickHouse/pull/8609) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix async nature of reload in `01036_no_superfluous_dict_reload_on_create_database*`. [#8111](https://github.com/ClickHouse/ClickHouse/pull/8111) ([Azat Khuzhin](https://github.com/azat)) +* Fixed codec performance tests. [#8615](https://github.com/ClickHouse/ClickHouse/pull/8615) ([Vasily Nemkov](https://github.com/Enmk)) +* Add install scripts for `.tgz` build and documentation for them. [#8612](https://github.com/ClickHouse/ClickHouse/pull/8612) [#8591](https://github.com/ClickHouse/ClickHouse/pull/8591) ([alesapin](https://github.com/alesapin)) +* Removed old `ZSTD` test (it was created in year 2016 to reproduce the bug that pre 1.0 version of ZSTD has had). This fixes [#8618](https://github.com/ClickHouse/ClickHouse/issues/8618). [#8619](https://github.com/ClickHouse/ClickHouse/pull/8619) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed build on Mac OS Catalina. [#8600](https://github.com/ClickHouse/ClickHouse/pull/8600) ([meo](https://github.com/meob)) +* Increased number of rows in codec performance tests to make results noticeable. [#8574](https://github.com/ClickHouse/ClickHouse/pull/8574) ([Vasily Nemkov](https://github.com/Enmk)) +* In debug builds, treat `LOGICAL_ERROR` exceptions as assertion failures, so that they are easier to notice. [#8475](https://github.com/ClickHouse/ClickHouse/pull/8475) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Make formats-related performance test more deterministic. [#8477](https://github.com/ClickHouse/ClickHouse/pull/8477) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Update `lz4` to fix a MemorySanitizer failure. [#8181](https://github.com/ClickHouse/ClickHouse/pull/8181) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Suppress a known MemorySanitizer false positive in exception handling. [#8182](https://github.com/ClickHouse/ClickHouse/pull/8182) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Update `gcc` and `g++` to version 9 in `build/docker/build.sh` [#7766](https://github.com/ClickHouse/ClickHouse/pull/7766) ([TLightSky](https://github.com/tlightsky)) +* Add performance test case to test that `PREWHERE` is worse than `WHERE`. [#7768](https://github.com/ClickHouse/ClickHouse/pull/7768) ([Amos Bird](https://github.com/amosbird)) +* Progress towards fixing one flacky test. [#8621](https://github.com/ClickHouse/ClickHouse/pull/8621) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Avoid MemorySanitizer report for data from `libunwind`. [#8539](https://github.com/ClickHouse/ClickHouse/pull/8539) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Updated `libc++` to the latest version. [#8324](https://github.com/ClickHouse/ClickHouse/pull/8324) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Build ICU library from sources. This fixes [#6460](https://github.com/ClickHouse/ClickHouse/issues/6460). [#8219](https://github.com/ClickHouse/ClickHouse/pull/8219) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Switched from `libressl` to `openssl`. ClickHouse should support TLS 1.3 and SNI after this change. This fixes [#8171](https://github.com/ClickHouse/ClickHouse/issues/8171). [#8218](https://github.com/ClickHouse/ClickHouse/pull/8218) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed UBSan report when using `chacha20_poly1305` from SSL (happens on connect to https://yandex.ru/). [#8214](https://github.com/ClickHouse/ClickHouse/pull/8214) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix mode of default password file for `.deb` linux distros. [#8075](https://github.com/ClickHouse/ClickHouse/pull/8075) ([proller](https://github.com/proller)) +* Improved expression for getting `clickhouse-server` PID in `clickhouse-test`. [#8063](https://github.com/ClickHouse/ClickHouse/pull/8063) ([Alexander Kazakov](https://github.com/Akazz)) +* Updated contrib/googletest to v1.10.0. [#8587](https://github.com/ClickHouse/ClickHouse/pull/8587) ([Alexander Burmak](https://github.com/Alex-Burmak)) +* Fixed ThreadSaninitizer report in `base64` library. Also updated this library to the latest version, but it doesn't matter. This fixes [#8397](https://github.com/ClickHouse/ClickHouse/issues/8397). [#8403](https://github.com/ClickHouse/ClickHouse/pull/8403) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix `00600_replace_running_query` for processors. [#8272](https://github.com/ClickHouse/ClickHouse/pull/8272) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Remove support for `tcmalloc` to make `CMakeLists.txt` simpler. [#8310](https://github.com/ClickHouse/ClickHouse/pull/8310) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Release gcc builds now use `libc++` instead of `libstdc++`. Recently `libc++` was used only with clang. This will improve consistency of build configurations and portability. [#8311](https://github.com/ClickHouse/ClickHouse/pull/8311) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Enable ICU library for build with MemorySanitizer. [#8222](https://github.com/ClickHouse/ClickHouse/pull/8222) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Suppress warnings from `CapNProto` library. [#8224](https://github.com/ClickHouse/ClickHouse/pull/8224) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Removed special cases of code for `tcmalloc`, because it's no longer supported. [#8225](https://github.com/ClickHouse/ClickHouse/pull/8225) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* In CI coverage task, kill the server gracefully to allow it to save the coverage report. This fixes incomplete coverage reports we've been seeing lately. [#8142](https://github.com/ClickHouse/ClickHouse/pull/8142) ([alesapin](https://github.com/alesapin)) +* Performance tests for all codecs against `Float64` and `UInt64` values. [#8349](https://github.com/ClickHouse/ClickHouse/pull/8349) ([Vasily Nemkov](https://github.com/Enmk)) +* `termcap` is very much deprecated and lead to various problems (f.g. missing "up" cap and echoing `^J` instead of multi line) . Favor `terminfo` or bundled `ncurses`. [#7737](https://github.com/ClickHouse/ClickHouse/pull/7737) ([Amos Bird](https://github.com/amosbird)) +* Fix `test_storage_s3` integration test. [#7734](https://github.com/ClickHouse/ClickHouse/pull/7734) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Support `StorageFile(, null) ` to insert block into given format file without actually write to disk. This is required for performance tests. [#8455](https://github.com/ClickHouse/ClickHouse/pull/8455) ([Amos Bird](https://github.com/amosbird)) +* Added argument `--print-time` to functional tests which prints execution time per test. [#8001](https://github.com/ClickHouse/ClickHouse/pull/8001) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Added asserts to `KeyCondition` while evaluating RPN. This will fix warning from gcc-9. [#8279](https://github.com/ClickHouse/ClickHouse/pull/8279) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Dump cmake options in CI builds. [#8273](https://github.com/ClickHouse/ClickHouse/pull/8273) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Don't generate debug info for some fat libraries. [#8271](https://github.com/ClickHouse/ClickHouse/pull/8271) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Make `log_to_console.xml` always log to stderr, regardless of is it interactive or not. [#8395](https://github.com/ClickHouse/ClickHouse/pull/8395) ([Alexander Kuzmenkov](https://github.com/akuzm)) +* Removed some unused features from `clickhouse-performance-test` tool. [#8555](https://github.com/ClickHouse/ClickHouse/pull/8555) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Now we will also search for `lld-X` with corresponding `clang-X` version. [#8092](https://github.com/ClickHouse/ClickHouse/pull/8092) ([alesapin](https://github.com/alesapin)) +* Parquet build improvement. [#8421](https://github.com/ClickHouse/ClickHouse/pull/8421) ([maxulan](https://github.com/maxulan)) +* More GCC warnings [#8221](https://github.com/ClickHouse/ClickHouse/pull/8221) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +* Package for Arch Linux now allows to run ClickHouse server, and not only client. [#8534](https://github.com/ClickHouse/ClickHouse/pull/8534) ([Vladimir Chebotarev](https://github.com/excitoon)) +* Fix test with processors. Tiny performance fixes. [#7672](https://github.com/ClickHouse/ClickHouse/pull/7672) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Update contrib/protobuf. [#8256](https://github.com/ClickHouse/ClickHouse/pull/8256) ([Matwey V. Kornilov](https://github.com/matwey)) +* In preparation of switching to c++20 as a new year celebration. "May the C++ force be with ClickHouse." [#8447](https://github.com/ClickHouse/ClickHouse/pull/8447) ([Amos Bird](https://github.com/amosbird)) + +#### Experimental Feature +* Added experimental setting `min_bytes_to_use_mmap_io`. It allows to read big files without copying data from kernel to userspace. The setting is disabled by default. Recommended threshold is about 64 MB, because mmap/munmap is slow. [#8520](https://github.com/ClickHouse/ClickHouse/pull/8520) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Reworked quotas as a part of access control system. Added new table `system.quotas`, new functions `currentQuota`, `currentQuotaKey`, new SQL syntax `CREATE QUOTA`, `ALTER QUOTA`, `DROP QUOTA`, `SHOW QUOTA`. [#7257](https://github.com/ClickHouse/ClickHouse/pull/7257) ([Vitaly Baranov](https://github.com/vitlibar)) +* Allow skipping unknown settings with warnings instead of throwing exceptions. [#7653](https://github.com/ClickHouse/ClickHouse/pull/7653) ([Vitaly Baranov](https://github.com/vitlibar)) +* Reworked row policies as a part of access control system. Added new table `system.row_policies`, new function `currentRowPolicies()`, new SQL syntax `CREATE POLICY`, `ALTER POLICY`, `DROP POLICY`, `SHOW CREATE POLICY`, `SHOW POLICIES`. [#7808](https://github.com/ClickHouse/ClickHouse/pull/7808) ([Vitaly Baranov](https://github.com/vitlibar)) + +#### Security Fix +* Fixed the possibility of reading directories structure in tables with `File` table engine. This fixes [#8536](https://github.com/ClickHouse/ClickHouse/issues/8536). [#8537](https://github.com/ClickHouse/ClickHouse/pull/8537) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +## [Changelog for 2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2019.md) diff --git a/docs/ru/whats_new/index.md b/docs/ru/whats_new/index.md new file mode 100644 index 00000000000..0901166b887 --- /dev/null +++ b/docs/ru/whats_new/index.md @@ -0,0 +1,6 @@ +--- +toc_folder_title: What's New +toc_priority: 72 +--- + + diff --git a/docs/ru/whats_new/roadmap.md b/docs/ru/whats_new/roadmap.md new file mode 100644 index 00000000000..3994ed4ac29 --- /dev/null +++ b/docs/ru/whats_new/roadmap.md @@ -0,0 +1,17 @@ +--- +machine_translated: true +machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 +--- + +# Дорожная карта {#roadmap} + +## Q1 2020 {#q1-2020} + +- Управление доступом на основе ролей + +## Q2 2020 {#q2-2020} + +- Интеграция с внешними службами аутентификации +- Пулы ресурсов для более точного распределения емкости кластера между пользователями + +{## [Оригинальная статья](https://clickhouse.tech/docs/en/roadmap/) ##} diff --git a/docs/ru/security_changelog.md b/docs/ru/whats_new/security_changelog.md similarity index 100% rename from docs/ru/security_changelog.md rename to docs/ru/whats_new/security_changelog.md diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml deleted file mode 100644 index 0df04f892cf..00000000000 --- a/docs/toc_ru.yml +++ /dev/null @@ -1,253 +0,0 @@ -nav: - -- 'Введение': - - 'Обзор': 'index.md' - - 'Отличительные возможности ClickHouse': 'introduction/distinctive_features.md' - - 'Особенности ClickHouse, которые могут считаться недостатками': 'introduction/features_considered_disadvantages.md' - - 'Производительность': 'introduction/performance.md' - - 'История': 'introduction/history.md' - - 'Информационная поддержка': 'introduction/info.md' - - 'Пользователи': 'introduction/adopters.md' - -- 'Начало работы': - - 'hidden': 'getting_started/index.md' - - 'Установка': 'getting_started/install.md' - - 'Руководство для начинающих': 'getting_started/tutorial.md' - - 'Тестовые наборы данных': - - 'Введение': 'getting_started/example_datasets/index.md' - - 'OnTime': 'getting_started/example_datasets/ontime.md' - - 'Данные о такси в Нью-Йорке': 'getting_started/example_datasets/nyc_taxi.md' - - 'AMPLab Big Data Benchmark': 'getting_started/example_datasets/amplab_benchmark.md' - - 'WikiStat': 'getting_started/example_datasets/wikistat.md' - - 'Терабайт логов кликов от Criteo': 'getting_started/example_datasets/criteo.md' - - 'Схема «Звезда»': 'getting_started/example_datasets/star_schema.md' - - 'Данные Яндекс.Метрики': 'getting_started/example_datasets/metrica.md' - - 'Playground': 'getting_started/playground.md' - -- 'Интерфейсы': - - 'Введение': 'interfaces/index.md' - - 'Клиент командной строки': 'interfaces/cli.md' - - 'Нативный интерфейс (TCP)': 'interfaces/tcp.md' - - 'HTTP-интерфейс': 'interfaces/http.md' - - 'MySQL-интерфейс': 'interfaces/mysql.md' - - 'Форматы входных и выходных данных': 'interfaces/formats.md' - - 'JDBC-драйвер': 'interfaces/jdbc.md' - - 'ODBC-драйвер': 'interfaces/odbc.md' - - 'C++ клиентская библиотека': 'interfaces/cpp.md' - - 'От сторонних разработчиков': - - 'Клиентские библиотеки': 'interfaces/third-party/client_libraries.md' - - 'Интеграции': 'interfaces/third-party/integrations.md' - - 'Визуальные интерфейсы': 'interfaces/third-party/gui.md' - - 'Прокси': 'interfaces/third-party/proxy.md' - -- 'Движки баз данных': - - 'Введение': 'database_engines/index.md' - - 'MySQL': 'database_engines/mysql.md' - - 'Lazy': 'database_engines/lazy.md' - -- 'Движки таблиц': - - 'Введение': 'operations/table_engines/index.md' - - 'Семейство MergeTree': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Репликация данных': 'operations/table_engines/replication.md' - - 'Произвольный ключ партиционирования': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'Семейство Log': - - 'Введение': 'operations/table_engines/log_family.md' - - 'StripeLog': 'operations/table_engines/stripelog.md' - - 'Log': 'operations/table_engines/log.md' - - 'TinyLog': 'operations/table_engines/tinylog.md' - - 'Интеграции': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - - 'JDBC': 'operations/table_engines/jdbc.md' - - 'ODBC': 'operations/table_engines/odbc.md' - - 'HDFS': 'operations/table_engines/hdfs.md' - - 'Особые': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'Внешние данные': 'operations/table_engines/external_data.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'GenerateRandom': 'operations/table_engines/generate.md' - -- 'Справка по SQL': - - 'hidden': 'query_language/index.md' - - 'Общий синтаксис': 'query_language/syntax.md' - - 'Запросы': - - 'SELECT': 'query_language/select.md' - - 'INSERT INTO': 'query_language/insert_into.md' - - 'CREATE': 'query_language/create.md' - - 'ALTER': 'query_language/alter.md' - - 'SYSTEM': 'query_language/system.md' - - 'SHOW': 'query_language/show.md' - - 'Прочие': 'query_language/misc.md' - - 'Функции': - - 'Введение': 'query_language/functions/index.md' - - 'Арифметические функции': 'query_language/functions/arithmetic_functions.md' - - 'Функции сравнения': 'query_language/functions/comparison_functions.md' - - 'Логические функции': 'query_language/functions/logical_functions.md' - - 'Функции преобразования типов': 'query_language/functions/type_conversion_functions.md' - - 'Функции для работы с датами и временем': 'query_language/functions/date_time_functions.md' - - 'Функции для работы со строками': 'query_language/functions/string_functions.md' - - 'Функции поиска в строках': 'query_language/functions/string_search_functions.md' - - 'Функции поиска и замены в строках': 'query_language/functions/string_replace_functions.md' - - 'Условные функции': 'query_language/functions/conditional_functions.md' - - 'Математические функции': 'query_language/functions/math_functions.md' - - 'Функции округления': 'query_language/functions/rounding_functions.md' - - 'Функции по работе с массивами': 'query_language/functions/array_functions.md' - - 'Функции разбиения и слияния строк и массивов': 'query_language/functions/splitting_merging_functions.md' - - 'Битовые функции': 'query_language/functions/bit_functions.md' - - 'Функции для битмапов': 'query_language/functions/bitmap_functions.md' - - 'Функции хэширования': 'query_language/functions/hash_functions.md' - - 'Функции генерации псевдослучайных чисел': 'query_language/functions/random_functions.md' - - 'Функции для работы с UUID': 'query_language/functions/uuid_functions.md' - - 'Функции кодирования': 'query_language/functions/encoding_functions.md' - - 'Функции для работы с URL': 'query_language/functions/url_functions.md' - - 'Функции для работы с IP-адресами': 'query_language/functions/ip_address_functions.md' - - 'Функции для работы с JSON.': 'query_language/functions/json_functions.md' - - 'Функции высшего порядка': 'query_language/functions/higher_order_functions.md' - - 'Функции для работы с внешними словарями': 'query_language/functions/ext_dict_functions.md' - - 'Функции для работы со словарями Яндекс.Метрики': 'query_language/functions/ym_dict_functions.md' - - 'Функции для реализации оператора IN.': 'query_language/functions/in_functions.md' - - 'Функция arrayJoin': 'query_language/functions/array_join.md' - - 'Функции для работы с географическими координатами': 'query_language/functions/geo.md' - - 'Функции c Nullable аргументами': 'query_language/functions/functions_for_nulls.md' - - 'Функции машинного обучения': 'query_language/functions/machine_learning_functions.md' - - 'Функции для интроспекции': 'query_language/functions/introspection.md' - - 'Прочие функции': 'query_language/functions/other_functions.md' - - 'Агрегатные функции': - - 'Введение': 'query_language/agg_functions/index.md' - - 'Справочник функций': 'query_language/agg_functions/reference.md' - - 'Комбинаторы агрегатных функций': 'query_language/agg_functions/combinators.md' - - 'Параметрические агрегатные функции': 'query_language/agg_functions/parametric_functions.md' - - 'Табличные функции': - - 'Введение': 'query_language/table_functions/index.md' - - 'file': 'query_language/table_functions/file.md' - - 'merge': 'query_language/table_functions/merge.md' - - 'numbers': 'query_language/table_functions/numbers.md' - - 'remote': 'query_language/table_functions/remote.md' - - 'url': 'query_language/table_functions/url.md' - - 'mysql': 'query_language/table_functions/mysql.md' - - 'jdbc': 'query_language/table_functions/jdbc.md' - - 'odbc': 'query_language/table_functions/odbc.md' - - 'hdfs': 'query_language/table_functions/hdfs.md' - - 'input': 'query_language/table_functions/input.md' - - 'generateRandom': 'query_language/table_functions/generate.md' - - 'Словари': - - 'Введение': 'query_language/dicts/index.md' - - 'Внешние словари': - - 'Общее описание': 'query_language/dicts/external_dicts.md' - - 'Настройка внешнего словаря': 'query_language/dicts/external_dicts_dict.md' - - 'Хранение словарей в памяти': 'query_language/dicts/external_dicts_dict_layout.md' - - 'Обновление словарей': 'query_language/dicts/external_dicts_dict_lifetime.md' - - 'Источники внешних словарей': 'query_language/dicts/external_dicts_dict_sources.md' - - 'Ключ и поля словаря': 'query_language/dicts/external_dicts_dict_structure.md' - - 'Иерархические словари': 'query_language/dicts/external_dicts_dict_hierarchical.md' - - 'Встроенные словари': 'query_language/dicts/internal_dicts.md' - - 'Операторы': 'query_language/operators.md' - - 'Типы данных': - - 'Введение': 'data_types/index.md' - - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' - - 'Float32, Float64': 'data_types/float.md' - - 'Decimal': 'data_types/decimal.md' - - 'Булевы значения': 'data_types/boolean.md' - - 'String': 'data_types/string.md' - - 'FixedString(N)': 'data_types/fixedstring.md' - - 'UUID': 'data_types/uuid.md' - - 'Date': 'data_types/date.md' - - 'DateTime': 'data_types/datetime.md' - - 'DateTime64': 'data_types/datetime64.md' - - 'Enum': 'data_types/enum.md' - - 'Array(T)': 'data_types/array.md' - - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - - 'Tuple(T1, T2, ...)': 'data_types/tuple.md' - - 'Nullable': 'data_types/nullable.md' - - 'Вложенные структуры данных': - - 'hidden': 'data_types/nested_data_structures/index.md' - - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md' - - 'Служебные типы данных': - - 'hidden': 'data_types/special_data_types/index.md' - - 'Expression': 'data_types/special_data_types/expression.md' - - 'Set': 'data_types/special_data_types/set.md' - - 'Nothing': 'data_types/special_data_types/nothing.md' - - 'Interval': 'data_types/special_data_types/interval.md' - - 'Domains': - - 'Overview': 'data_types/domains/overview.md' - - 'IPv4': 'data_types/domains/ipv4.md' - - 'IPv6': 'data_types/domains/ipv6.md' - -- 'Руководства': - - 'Обзор': 'guides/index.md' - - 'Применение CatBoost моделей': 'guides/apply_catboost_model.md' - -- 'Эксплуатация': - - 'Введение': 'operations/index.md' - - 'Требования': 'operations/requirements.md' - - 'Мониторинг': 'operations/monitoring.md' - - 'Устранение неисправностей': 'operations/troubleshooting.md' - - 'Советы по эксплуатации': 'operations/tips.md' - - 'Обновление ClickHouse': 'operations/update.md' - - 'Права доступа': 'operations/access_rights.md' - - 'Резервное копирование': 'operations/backup.md' - - 'Конфигурационные файлы': 'operations/configuration_files.md' - - 'Квоты': 'operations/quotas.md' - - 'Системные таблицы': 'operations/system_tables.md' - - 'Оптимизация производительности': - - 'Профилирование запросов': 'operations/performance/sampling_query_profiler.md' - - 'Тестирование оборудования': 'operations/performance_test.md' - - 'Конфигурационные параметры сервера': - - 'Введение': 'operations/server_settings/index.md' - - 'Серверные настройки': 'operations/server_settings/settings.md' - - 'Настройки': - - 'Введение': 'operations/settings/index.md' - - 'Разрешения на выполнение запросов': 'operations/settings/permissions_for_queries.md' - - 'Ограничения на сложность запроса': 'operations/settings/query_complexity.md' - - 'Настройки': 'operations/settings/settings.md' - - 'Ограничения на изменение настроек': 'operations/settings/constraints_on_settings.md' - - 'Профили настроек': 'operations/settings/settings_profiles.md' - - 'Настройки пользователей': 'operations/settings/settings_users.md' - - 'Утилиты': - - 'Введение': 'operations/utils/index.md' - - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - - 'clickhouse-benchmark': 'operations/utils/clickhouse-benchmark.md' - -- 'Разработка': - - 'hidden': 'development/index.md' - - 'Инструкция для начинающего разработчика ClickHouse': 'development/developer_instruction.md' - - 'Обзор архитектуры ClickHouse': 'development/architecture.md' - - 'Навигация по коду ClickHouse': 'development/browse_code.md' - - 'Как собрать ClickHouse на Linux': 'development/build.md' - - 'Как собрать ClickHouse на Mac OS X': 'development/build_osx.md' - - 'Как собрать ClickHouse на Linux для Mac OS X': 'development/build_cross_osx.md' - - 'Как собрать ClickHouse на Linux для AARCH64 (ARM64)': 'development/build_cross_arm.md' - - 'Как писать код на C++': 'development/style.md' - - 'Как запустить тесты': 'development/tests.md' - - 'Сторонние библиотеки': 'development/contrib.md' - -- 'Что нового': - - 'Changelog': - - '2020': 'changelog/index.md' - - '2019': 'changelog/2019.md' - - '2018': 'changelog/2018.md' - - '2017': 'changelog/2017.md' - - 'Security changelog': 'security_changelog.md' - - 'Roadmap': 'roadmap.md' - - 'Подробный roadmap 2020': 'extended_roadmap.md' - -- 'F.A.Q.': - - 'Общие вопросы': 'faq/general.md' diff --git a/docs/tools/convert_toc.py b/docs/tools/convert_toc.py index 9bfc347d244..5e3fe97de44 100755 --- a/docs/tools/convert_toc.py +++ b/docs/tools/convert_toc.py @@ -8,7 +8,7 @@ import yaml import util -lang = 'zh' +lang = 'ru' base_dir = os.path.join(os.path.dirname(__file__), '..') en_dir = os.path.join(base_dir, 'en') docs_dir = os.path.join(base_dir, lang) From d035173889d7ebfe067ecbf9a310dad5d0776eec Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 21:08:22 +0300 Subject: [PATCH 163/752] There's no toc_NN.yml files anymore --- docs/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/README.md b/docs/README.md index f075716a0b0..3c66408089c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -135,16 +135,13 @@ When adding a new file: $ ln -sr en/new/file.md lang/new/file.md ``` -- Reference the file from `toc_{en,ru,zh,ja,fa}.yaml` files with the pages index. - - ### Adding a New Language 1. Create a new docs subfolder named using the [ISO-639-1 language code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). 2. Add Markdown files with the translation, mirroring the folder structure of other languages. -3. Commit and open a pull request with the new content. +3. Commit and open a pull-request with the new content. When everything is ready, we will add the new language to the website. From c1558f8c18e2727c5991cf923ee79710de89d1d0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 21:08:57 +0300 Subject: [PATCH 164/752] fix link --- docs/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/README.md b/docs/README.md index 3c66408089c..a7473094ad7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -203,4 +203,4 @@ Templates: ## How to Build Documentation -You can build your documentation manually by following the instructions in [docs/tools/README.md](docs/tools/README.md). Also, our CI runs the documentation build after the `documentation` label is added to PR. You can see the results of a build in the GitHub interface. If you have no permissions to add labels, a reviewer of your PR will add it. +You can build your documentation manually by following the instructions in [docs/tools/README.md](../docs/tools/README.md). Also, our CI runs the documentation build after the `documentation` label is added to PR. You can see the results of a build in the GitHub interface. If you have no permissions to add labels, a reviewer of your PR will add it. From 26dd6140b21c99df5f51f81ac6fe1263ca08bcc6 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Thu, 9 Apr 2020 21:10:27 +0300 Subject: [PATCH 165/752] Added new config settings to control timeouts * "lock_acquire_timeout" controls for how long a query will continue to acquire each lock on its argument tables * "lock_acquire_timeout_for_background_operations" is a per-table setting for storages of *MergeTree family --- src/Core/Defines.h | 4 ++ src/Core/Settings.h | 1 + .../PushingToViewsBlockOutputStream.cpp | 7 +++- src/Databases/DatabaseMySQL.cpp | 4 +- src/Functions/FunctionJoinGet.cpp | 3 +- src/Interpreters/InterpreterAlterQuery.cpp | 7 +++- src/Interpreters/InterpreterCreateQuery.cpp | 3 +- src/Interpreters/InterpreterDescribeQuery.cpp | 3 +- src/Interpreters/InterpreterDropQuery.cpp | 12 +++--- src/Interpreters/InterpreterInsertQuery.cpp | 3 +- src/Interpreters/InterpreterRenameQuery.cpp | 3 +- src/Interpreters/InterpreterSelectQuery.cpp | 3 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Storages/IStorage.cpp | 33 +++++++-------- src/Storages/IStorage.h | 11 ++--- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/MergeTree/DataPartsExchange.cpp | 3 +- src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../ReplicatedMergeTreeCleanupThread.cpp | 3 +- .../ReplicatedMergeTreePartCheckThread.cpp | 4 +- src/Storages/StorageBuffer.cpp | 5 ++- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 9 +++-- src/Storages/StorageMerge.cpp | 18 +++++---- src/Storages/StorageMerge.h | 5 ++- src/Storages/StorageMergeTree.cpp | 29 ++++++++------ src/Storages/StorageNull.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 40 ++++++++++++------- src/Storages/System/StorageSystemColumns.cpp | 15 +++---- .../System/StorageSystemPartsBase.cpp | 4 +- src/Storages/System/StorageSystemPartsBase.h | 2 + src/Storages/System/StorageSystemTables.cpp | 3 +- 32 files changed, 151 insertions(+), 95 deletions(-) diff --git a/src/Core/Defines.h b/src/Core/Defines.h index f2d4a517712..c797f527be9 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -91,3 +91,7 @@ # define ASAN_UNPOISON_MEMORY_REGION(a, b) # define ASAN_POISON_MEMORY_REGION(a, b) #endif + +/// Actually, there may be multiple acquisitions of different locks for a given table within one query. +/// Check with IStorage class for the list of possible locks +#define DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC 120 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 753231603b2..ec03dfa1a4e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -406,6 +406,7 @@ struct Settings : public SettingsCollection M(SettingBool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(SettingUInt64, max_parser_depth, 1000, "Maximum parser depth.", 0) \ M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ + M(SettingSeconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 991d206777a..8e547767584 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -25,7 +25,8 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( * Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * but it's clear that here is not the best place for this functionality. */ - addTableLock(storage->lockStructureForShare(true, context.getInitialQueryId())); + addTableLock( + storage->lockStructureForShare(true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout)); /// If the "root" table deduplactes blocks, there are no need to make deduplication for children /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks @@ -54,7 +55,9 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( if (auto * materialized_view = dynamic_cast(dependent_table.get())) { - addTableLock(materialized_view->lockStructureForShare(true, context.getInitialQueryId())); + addTableLock( + materialized_view->lockStructureForShare( + true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout)); StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); diff --git a/src/Databases/DatabaseMySQL.cpp b/src/Databases/DatabaseMySQL.cpp index 959121585ea..1cbbd4b06d9 100644 --- a/src/Databases/DatabaseMySQL.cpp +++ b/src/Databases/DatabaseMySQL.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ namespace ErrorCodes constexpr static const auto suffix = ".remove_flag"; static constexpr const std::chrono::seconds cleaner_sleep_time{30}; +static const SettingSeconds lock_acquire_timeout{10}; static String toQueryStringWithQuote(const std::vector & quote_list) { @@ -358,7 +360,7 @@ void DatabaseMySQL::cleanOutdatedTables() ++iterator; else { - const auto table_lock = (*iterator)->lockAlterIntention(RWLockImpl::NO_QUERY); + const auto table_lock = (*iterator)->lockAlterIntention(RWLockImpl::NO_QUERY, lock_acquire_timeout); (*iterator)->shutdown(); (*iterator)->is_dropped = true; diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 0860deccb14..7940bad2cf4 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -65,7 +65,8 @@ FunctionBaseImplPtr JoinGetOverloadResolver::build(const ColumnsWithTypeAndName auto join = storage_join->getJoin(); DataTypes data_types(arguments.size()); - auto table_lock = storage_join->lockStructureForShare(false, context.getInitialQueryId()); + auto table_lock = storage_join->lockStructureForShare( + false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 315527765ef..7412b6b683b 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -82,7 +82,9 @@ BlockIO InterpreterAlterQuery::execute() if (!mutation_commands.empty()) { - auto table_lock_holder = table->lockStructureForShare(false /* because mutation is executed asyncronously */, context.getCurrentQueryId()); + auto table_lock_holder = table->lockStructureForShare( + false /* because mutation is executed asyncronously */, + context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); MutationsInterpreter(table, mutation_commands, context, false).validate(table_lock_holder); table->mutate(mutation_commands, context); } @@ -109,7 +111,8 @@ BlockIO InterpreterAlterQuery::execute() if (!alter_commands.empty()) { - auto table_lock_holder = table->lockAlterIntention(context.getCurrentQueryId()); + auto table_lock_holder = table->lockAlterIntention( + context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); StorageInMemoryMetadata metadata = table->getInMemoryMetadata(); alter_commands.validate(metadata, context); alter_commands.prepare(metadata); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f15796688e1..81b238a8973 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -403,7 +403,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS StoragePtr as_storage = DatabaseCatalog::instance().getTable({as_database_name, create.as_table}); /// as_storage->getColumns() and setEngine(...) must be called under structure lock of other_table for CREATE ... AS other_table. - as_storage_lock = as_storage->lockStructureForShare(false, context.getCurrentQueryId()); + as_storage_lock = as_storage->lockStructureForShare( + false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); properties.columns = as_storage->getColumns(); /// Secondary indices make sense only for MergeTree family of storage engines. diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 1353c01ebf6..f9c769a523e 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -89,7 +89,8 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() table = DatabaseCatalog::instance().getTable(table_id); } - auto table_lock = table->lockStructureForShare(false, context.getInitialQueryId()); + auto table_lock = table->lockStructureForShare( + false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); columns = table->getColumns(); } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 42d9528abd5..70707c814ca 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -93,7 +93,7 @@ BlockIO InterpreterDropQuery::executeToTable( context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id); table->shutdown(); /// If table was already dropped by anyone, an exception will be thrown - auto table_lock = table->lockExclusively(context.getCurrentQueryId()); + auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); /// Drop table from memory, don't touch data and metadata database->detachTable(table_name); } @@ -103,7 +103,7 @@ BlockIO InterpreterDropQuery::executeToTable( table->checkTableCanBeDropped(); /// If table was already dropped by anyone, an exception will be thrown - auto table_lock = table->lockExclusively(context.getCurrentQueryId()); + auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); /// Drop table data, don't touch metadata table->truncate(query_ptr, context, table_lock); } @@ -115,7 +115,7 @@ BlockIO InterpreterDropQuery::executeToTable( table->shutdown(); /// If table was already dropped by anyone, an exception will be thrown - auto table_lock = table->lockExclusively(context.getCurrentQueryId()); + auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); const std::string metadata_file_without_extension = database->getMetadataPath() + escapeForFileName(table_id.table_name); const auto prev_metadata_name = metadata_file_without_extension + ".sql"; @@ -216,7 +216,8 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, if (kind == ASTDropQuery::Kind::Truncate) { /// If table was already dropped by anyone, an exception will be thrown - auto table_lock = table->lockExclusively(context.getCurrentQueryId()); + auto table_lock = + table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); /// Drop table data, don't touch metadata table->truncate(query_ptr, context, table_lock); } @@ -225,7 +226,8 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, context_handle.removeExternalTable(table_name); table->shutdown(); /// If table was already dropped by anyone, an exception will be thrown - auto table_lock = table->lockExclusively(context.getCurrentQueryId()); + auto table_lock = + table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); /// Delete table data table->drop(table_lock); table->is_dropped = true; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 39b99b10c0d..fc5d76ee216 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -109,7 +109,8 @@ BlockIO InterpreterInsertQuery::execute() BlockIO res; StoragePtr table = getTable(query); - auto table_lock = table->lockStructureForShare(true, context.getInitialQueryId()); + auto table_lock = table->lockStructureForShare( + true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); auto query_sample_block = getSampleBlock(query, table); if (!query.table_function) diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 4f54f759510..9a4f4b1b197 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -79,7 +79,8 @@ BlockIO InterpreterRenameQuery::execute() { database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name)); auto from_table = database_catalog.getTable({elem.from_database_name, elem.from_table_name}); - auto from_table_lock = from_table->lockExclusively(context.getCurrentQueryId()); + auto from_table_lock = + from_table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); database_catalog.getDatabase(elem.from_database_name)->renameTable( context, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 80a7831475b..b355d5af6b1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -255,7 +255,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) { - table_lock = storage->lockStructureForShare(false, context->getInitialQueryId()); + table_lock = storage->lockStructureForShare( + false, context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); table_id = storage->getStorageID(); } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9a7d6ae7c5a..87d995372ef 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -326,7 +326,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica, table->shutdown(); { /// If table was already dropped by anyone, an exception will be thrown - auto table_lock = table->lockExclusively(context.getCurrentQueryId()); + auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); create_ast = database->getCreateTableQuery(system_context, replica.table_name); database->detachTable(replica.table_name); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index ab3a750db16..3bf8054485c 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -315,63 +315,64 @@ bool IStorage::isVirtualColumn(const String & column_name) const return getColumns().get(column_name).is_virtual; } -RWLockImpl::LockHolder IStorage::tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type, const String & query_id) +RWLockImpl::LockHolder IStorage::tryLockTimed( + const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) { - auto lock_holder = rwlock->getLock(type, query_id, RWLockImpl::default_locking_timeout_ms); + auto lock_holder = rwlock->getLock(type, query_id, std::chrono::milliseconds(acquire_timeout.totalMilliseconds())); if (!lock_holder) { const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE"; throw Exception( type_str + " locking attempt on \"" + getStorageID().getFullTableName() + - "\" has timed out! (" + toString(RWLockImpl::default_locking_timeout_ms.count()) + "ms) " + "\" has timed out! (" + toString(acquire_timeout.totalMilliseconds()) + "ms ). " "Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); } return lock_holder; } -TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) +TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id, const SettingSeconds & acquire_timeout) { TableStructureReadLockHolder result; if (will_add_new_data) - result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Read, query_id); - result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Read, query_id); + result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Read, query_id, acquire_timeout); + result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Read, query_id, acquire_timeout); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); return result; } -TableStructureWriteLockHolder IStorage::lockAlterIntention(const String & query_id) +TableStructureWriteLockHolder IStorage::lockAlterIntention(const String & query_id, const SettingSeconds & acquire_timeout) { TableStructureWriteLockHolder result; - result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id); + result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id, acquire_timeout); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); return result; } -void IStorage::lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id) +void IStorage::lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id, const SettingSeconds & acquire_timeout) { if (!lock_holder.alter_intention_lock) throw Exception("Alter intention lock for table " + getStorageID().getNameForLogs() + " was not taken. This is a bug.", ErrorCodes::LOGICAL_ERROR); if (!lock_holder.new_data_structure_lock) - lock_holder.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id); - lock_holder.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id); + lock_holder.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id, acquire_timeout); + lock_holder.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id, acquire_timeout); } -TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id) +TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout) { TableStructureWriteLockHolder result; - result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id); + result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id, acquire_timeout); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); - result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id); - result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id); + result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id, acquire_timeout); + result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id, acquire_timeout); return result; } @@ -386,7 +387,7 @@ void IStorage::alter( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata metadata = getInMemoryMetadata(); params.apply(metadata); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 581fc8a67e7..dd4e8506f9f 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -196,24 +196,25 @@ private: ConstraintsDescription constraints; private: - RWLockImpl::LockHolder tryLockTimed(const RWLock & rwlock, RWLockImpl::Type type, const String & query_id); + RWLockImpl::LockHolder tryLockTimed( + const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout); public: /// Acquire this lock if you need the table structure to remain constant during the execution of /// the query. If will_add_new_data is true, this means that the query will add new data to the table /// (INSERT or a parts merge). - TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id); + TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id, const SettingSeconds & acquire_timeout); /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you /// can modify the table structure. It can later be upgraded to the exclusive lock. - TableStructureWriteLockHolder lockAlterIntention(const String & query_id); + TableStructureWriteLockHolder lockAlterIntention(const String & query_id, const SettingSeconds & acquire_timeout); /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries /// to ensure that no other query uses the table structure and it can be safely changed. - void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id); + void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id, const SettingSeconds & acquire_timeout); /// Acquire the full exclusive lock immediately. No other queries can run concurrently. - TableStructureWriteLockHolder lockExclusively(const String & query_id); + TableStructureWriteLockHolder lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout); /** Returns stage to which query is going to be processed in read() function. * (Normally, the function only reads the columns from the list, but in other cases, diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 049110a3294..569e5c24e1c 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -519,7 +519,7 @@ void StorageLiveView::drop(TableStructureWriteLockHolder &) void StorageLiveView::refresh(const Context & context) { - auto alter_lock = lockAlterIntention(context.getCurrentQueryId()); + auto alter_lock = lockAlterIntention(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); { std::lock_guard lock(mutex); if (getNewBlocks()) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6373c85a15d..c656fbf0c58 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -85,7 +85,8 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo try { - auto storage_lock = data.lockStructureForShare(false, RWLockImpl::NO_QUERY); + auto storage_lock = data.lockStructureForShare( + false, RWLockImpl::NO_QUERY, data.getSettings()->lock_acquire_timeout_for_background_operations); MergeTreeData::DataPartPtr part = findPart(part_name); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index bbd1fd6cbeb..02c852b4f4b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -42,6 +42,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, number_of_free_entries_in_pool_to_execute_mutation, 10, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ M(SettingSeconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \ M(SettingSeconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories.", 0) \ + M(SettingSeconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \ \ /** Inserts settings. */ \ M(SettingUInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 77a5bca7a92..b1164f6621c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -57,7 +57,8 @@ void ReplicatedMergeTreeCleanupThread::iterate() { /// TODO: Implement tryLockStructureForShare. - auto lock = storage.lockStructureForShare(false, ""); + auto lock = storage.lockStructureForShare( + false, RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); storage.clearOldTemporaryDirectories(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 17b716d14c2..b587b5f71c0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -203,7 +203,9 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na else if (part->name == part_name) { auto zookeeper = storage.getZooKeeper(); - auto table_lock = storage.lockStructureForShare(false, RWLockImpl::NO_QUERY); + + auto table_lock = storage.lockStructureForShare( + false, RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( part->getColumns(), part->checksums); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 7699f8379d9..2702b344dc3 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -168,7 +168,8 @@ Pipes StorageBuffer::read( if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); - auto destination_lock = destination->lockStructureForShare(false, context.getCurrentQueryId()); + auto destination_lock = destination->lockStructureForShare( + false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [this, destination](const String& column_name) { @@ -757,7 +758,7 @@ std::optional StorageBuffer::totalBytes() const void StorageBuffer::alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); checkAlterIsPossible(params, context.getSettingsRef()); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b4375dd5b0a..7e6b9d14e02 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -460,7 +460,7 @@ void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, co void StorageDistributed::alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); checkAlterIsPossible(params, context.getSettingsRef()); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 3fb25bf8275..056e2cbb42f 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -185,7 +185,9 @@ Pipes StorageMaterializedView::read( const unsigned num_streams) { auto storage = getTargetTable(); - auto lock = storage->lockStructureForShare(false, context.getCurrentQueryId()); + auto lock = storage->lockStructureForShare( + false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + if (query_info.order_by_optimizer) query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage); @@ -200,7 +202,8 @@ Pipes StorageMaterializedView::read( BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const Context & context) { auto storage = getTargetTable(); - auto lock = storage->lockStructureForShare(true, context.getCurrentQueryId()); + auto lock = storage->lockStructureForShare( + true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto stream = storage->write(query, context); stream->addTableLock(lock); return stream; @@ -258,7 +261,7 @@ void StorageMaterializedView::alter( const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata metadata = getInMemoryMetadata(); params.apply(metadata); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f3322c7dfff..a108e615bee 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -118,7 +118,8 @@ bool StorageMerge::isRemote() const bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const { /// It's beneficial if it is true for at least one table. - StorageListWithLocks selected_tables = getSelectedTables(query_context.getCurrentQueryId()); + StorageListWithLocks selected_tables = getSelectedTables( + query_context.getCurrentQueryId(), query_context.getSettingsRef()); size_t i = 0; for (const auto & table : selected_tables) @@ -195,7 +196,7 @@ Pipes StorageMerge::read( * This is necessary to correctly pass the recommended number of threads to each table. */ StorageListWithLocks selected_tables = getSelectedTables( - query_info.query, has_table_virtual_column, context.getCurrentQueryId()); + query_info.query, has_table_virtual_column, context.getCurrentQueryId(), context.getSettingsRef()); if (selected_tables.empty()) /// FIXME: do we support sampling in this case? @@ -355,7 +356,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer } -StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String & query_id) const +StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String & query_id, const Settings & settings) const { StorageListWithLocks selected_tables; auto iterator = getDatabaseIterator(); @@ -364,7 +365,8 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String { auto & table = iterator->table(); if (table.get() != this) - selected_tables.emplace_back(table, table->lockStructureForShare(false, query_id), iterator->name()); + selected_tables.emplace_back( + table, table->lockStructureForShare(false, query_id, settings.lock_acquire_timeout), iterator->name()); iterator->next(); } @@ -373,7 +375,8 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String } -StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, const String & query_id) const +StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( + const ASTPtr & query, bool has_virtual_column, const String & query_id, const Settings & settings) const { StorageListWithLocks selected_tables; DatabaseTablesIteratorPtr iterator = getDatabaseIterator(); @@ -389,7 +392,8 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr if (storage.get() != this) { - selected_tables.emplace_back(storage, storage->lockStructureForShare(false, query_id), iterator->name()); + selected_tables.emplace_back( + storage, storage->lockStructureForShare(false, query_id, settings.lock_acquire_timeout), iterator->name()); virtual_column->insert(iterator->name()); } @@ -434,7 +438,7 @@ void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Se void StorageMerge::alter( const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata storage_metadata = getInMemoryMetadata(); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 1d2df3cb9ce..bb3205184b1 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -57,9 +57,10 @@ private: using StorageWithLockAndName = std::tuple; using StorageListWithLocks = std::list; - StorageListWithLocks getSelectedTables(const String & query_id) const; + StorageListWithLocks getSelectedTables(const String & query_id, const Settings & settings) const; - StorageMerge::StorageListWithLocks getSelectedTables(const ASTPtr & query, bool has_virtual_column, const String & query_id) const; + StorageMerge::StorageListWithLocks getSelectedTables( + const ASTPtr & query, bool has_virtual_column, const String & query_id, const Settings & settings) const; template StoragePtr getFirstTable(F && predicate) const; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2efeff19657..5bf16f49fbe 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -223,7 +223,7 @@ void StorageMergeTree::alter( /// This alter can be performed at metadata level only if (commands.isSettingsAlter()) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); changeSettings(metadata.settings_ast, table_lock_holder); @@ -231,7 +231,7 @@ void StorageMergeTree::alter( } else { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); changeSettings(metadata.settings_ast, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. @@ -537,7 +537,8 @@ bool StorageMergeTree::merge( bool deduplicate, String * out_disable_reason) { - auto table_lock_holder = lockStructureForShare(true, RWLockImpl::NO_QUERY); + auto table_lock_holder = lockStructureForShare( + true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); FutureMergedMutatedPart future_part; @@ -655,7 +656,8 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::movePartsTask() bool StorageMergeTree::tryMutatePart() { - auto table_lock_holder = lockStructureForShare(true, RWLockImpl::NO_QUERY); + auto table_lock_holder = lockStructureForShare( + true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); size_t max_ast_elements = global_context.getSettingsRef().max_expanded_ast_elements; FutureMergedMutatedPart future_part; @@ -780,7 +782,8 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::mergeMutateTask() { { /// TODO: Implement tryLockStructureForShare. - auto lock_structure = lockStructureForShare(false, ""); + auto lock_structure = lockStructureForShare( + false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); clearOldPartsFromFilesystem(); clearOldTemporaryDirectories(); } @@ -973,14 +976,16 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma case PartitionCommand::FREEZE_PARTITION: { - auto lock = lockStructureForShare(false, context.getCurrentQueryId()); + auto lock = lockStructureForShare( + false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); freezePartition(command.partition, command.with_name, context, lock); } break; case PartitionCommand::FREEZE_ALL_PARTITIONS: { - auto lock = lockStructureForShare(false, context.getCurrentQueryId()); + auto lock = lockStructureForShare( + false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); freezeAll(command.with_name, context, lock); } break; @@ -998,7 +1003,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons /// This protects against "revival" of data for a removed partition after completion of merge. auto merge_blocker = merger_mutator.merges_blocker.cancel(); /// Waits for completion of merge and does not start new ones. - auto lock = lockExclusively(context.getCurrentQueryId()); + auto lock = lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); String partition_id = getPartitionIDFromQuery(partition, context); @@ -1045,8 +1050,8 @@ void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_par void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) { - auto lock1 = lockStructureForShare(false, context.getCurrentQueryId()); - auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId()); + auto lock1 = lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); Stopwatch watch; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table); @@ -1116,8 +1121,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context) { - auto lock1 = lockStructureForShare(false, context.getCurrentQueryId()); - auto lock2 = dest_table->lockStructureForShare(false, context.getCurrentQueryId()); + auto lock1 = lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = dest_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 878be5bbf2d..bafb3d9a9fb 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -48,7 +48,7 @@ void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Set void StorageNull::alter( const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId()); + lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata metadata = getInMemoryMetadata(); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8896151561b..ab5898458c5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1025,7 +1025,8 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) ReservationPtr reserved_space = reserveSpacePreferringTTLRules(estimated_space_for_merge, ttl_infos, time(nullptr), max_volume_index); - auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); + auto table_lock = lockStructureForShare( + false, RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); FutureMergedMutatedPart future_merged_part(parts, entry.new_part_type); if (future_merged_part.name != entry.new_part_name) @@ -1160,7 +1161,8 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM /// Can throw an exception. ReservationPtr reserved_space = reserveSpace(estimated_space_for_result, source_part->disk); - auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); + auto table_lock = lockStructureForShare( + false, RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); MutableDataPartPtr new_part; Transaction transaction(*this); @@ -1514,7 +1516,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) PartDescriptions parts_to_add; DataPartsVector parts_to_remove; - auto table_lock_holder_dst_table = lockStructureForShare(false, RWLockImpl::NO_QUERY); + auto table_lock_holder_dst_table = lockStructureForShare( + false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); for (size_t i = 0; i < entry_replace.new_part_names.size(); ++i) { @@ -1576,7 +1579,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) return 0; } - table_lock_holder_src_table = source_table->lockStructureForShare(false, RWLockImpl::NO_QUERY); + table_lock_holder_src_table = source_table->lockStructureForShare( + false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); DataPartStates valid_states{MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}; @@ -2699,7 +2703,8 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin TableStructureReadLockHolder table_lock_holder; if (!to_detached) - table_lock_holder = lockStructureForShare(true, RWLockImpl::NO_QUERY); + table_lock_holder = lockStructureForShare( + true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); /// Logging Stopwatch stopwatch; @@ -3166,7 +3171,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer { /// TODO (relax this lock) - auto table_lock = lockExclusively(RWLockImpl::NO_QUERY); + auto table_lock = lockExclusively(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); @@ -3193,7 +3198,8 @@ void StorageReplicatedMergeTree::alter( if (params.isSettingsAlter()) { - lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId()); + lockStructureExclusively( + table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// We don't replicate storage_settings_ptr ALTER. It's local operation. /// Also we don't upgrade alter lock to table structure lock. StorageInMemoryMetadata metadata = getInMemoryMetadata(); @@ -3259,7 +3265,8 @@ void StorageReplicatedMergeTree::alter( if (ast_to_str(current_metadata.settings_ast) != ast_to_str(future_metadata.settings_ast)) { - lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId()); + lockStructureExclusively( + table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// Just change settings current_metadata.settings_ast = future_metadata.settings_ast; changeSettings(current_metadata.settings_ast, table_lock_holder); @@ -3428,14 +3435,16 @@ void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const Part case PartitionCommand::FREEZE_PARTITION: { - auto lock = lockStructureForShare(false, query_context.getCurrentQueryId()); + auto lock = lockStructureForShare( + false, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); freezePartition(command.partition, command.with_name, query_context, lock); } break; case PartitionCommand::FREEZE_ALL_PARTITIONS: { - auto lock = lockStructureForShare(false, query_context.getCurrentQueryId()); + auto lock = lockStructureForShare( + false, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); freezeAll(command.with_name, query_context, lock); } break; @@ -4443,7 +4452,8 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() { /// Critical section is not required (since grabOldParts() returns unique part set on each call) - auto table_lock = lockStructureForShare(false, RWLockImpl::NO_QUERY); + auto table_lock = lockStructureForShare( + false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); auto zookeeper = getZooKeeper(); DataPartsVector parts = grabOldParts(); @@ -4738,8 +4748,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ const Context & context) { /// First argument is true, because we possibly will add new data to current table. - auto lock1 = lockStructureForShare(true, context.getCurrentQueryId()); - auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId()); + auto lock1 = lockStructureForShare(true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); Stopwatch watch; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table); @@ -4917,8 +4927,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context) { - auto lock1 = lockStructureForShare(false, context.getCurrentQueryId()); - auto lock2 = dest_table->lockStructureForShare(false, context.getCurrentQueryId()); + auto lock1 = lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = dest_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index cbf6ada9ed3..26e2376c3f7 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -62,12 +62,12 @@ public: ColumnPtr databases_, ColumnPtr tables_, Storages storages_, - const std::shared_ptr & access_, - String query_id_) + const Context & context) : SourceWithProgress(header_) , columns_mask(std::move(columns_mask_)), max_block_size(max_block_size_) , databases(std::move(databases_)), tables(std::move(tables_)), storages(std::move(storages_)) - , query_id(std::move(query_id_)), total_tables(tables->size()), access(access_) + , total_tables(tables->size()), access(context.getAccess()) + , query_id(context.getCurrentQueryId()), lock_acquire_timeout(context.getSettingsRef().lock_acquire_timeout) { } @@ -103,7 +103,7 @@ protected: try { - table_lock = storage->lockStructureForShare(false, query_id); + table_lock = storage->lockStructureForShare(false, query_id, lock_acquire_timeout); } catch (const Exception & e) { @@ -227,10 +227,11 @@ private: ColumnPtr databases; ColumnPtr tables; Storages storages; - String query_id; size_t db_table_num = 0; size_t total_tables; std::shared_ptr access; + String query_id; + SettingSeconds lock_acquire_timeout; }; @@ -331,8 +332,8 @@ Pipes StorageSystemColumns::read( pipes.emplace_back(std::make_shared( std::move(columns_mask), std::move(header), max_block_size, - std::move(filtered_database_column), std::move(filtered_table_column), std::move(storages), - context.getAccess(), context.getCurrentQueryId())); + std::move(filtered_database_column), std::move(filtered_table_column), + std::move(storages), context)); return pipes; } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index d8f564b0160..19c6f6b3d03 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -62,7 +62,7 @@ StoragesInfo::getParts(MergeTreeData::DataPartStateVector & state, bool has_stat } StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const Context & context) - : query_id(context.getCurrentQueryId()) + : query_id(context.getCurrentQueryId()), settings(context.getSettings()) { /// Will apply WHERE to subset of columns and then add more columns. /// This is kind of complicated, but we use WHERE to do less work. @@ -192,7 +192,7 @@ StoragesInfo StoragesInfoStream::next() try { /// For table not to be dropped and set of columns to remain constant. - info.table_lock = info.storage->lockStructureForShare(false, query_id); + info.table_lock = info.storage->lockStructureForShare(false, query_id, settings.lock_acquire_timeout); } catch (const Exception & e) { diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index b30f7c62914..be8e45146cb 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -36,6 +36,8 @@ public: private: String query_id; + Settings settings; + ColumnPtr database_column; ColumnPtr table_column; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index a8d5fc2ec57..f4ce4a8b717 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -244,7 +244,8 @@ protected: if (need_lock_structure) { table = tables_it->table(); - lock = table->lockStructureForShare(false, context.getCurrentQueryId()); + lock = table->lockStructureForShare( + false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); } } catch (const Exception & e) From 3166eab2dfc89323b9d756aeba6866381fd9c186 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 21:56:03 +0300 Subject: [PATCH 166/752] Update release.sh --- docs/tools/release.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tools/release.sh b/docs/tools/release.sh index e0f580c383b..8eec2d758da 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -4,9 +4,9 @@ set -ex BASE_DIR=$(dirname $(readlink -f $0)) BUILD_DIR="${BASE_DIR}/../build" PUBLISH_DIR="${BASE_DIR}/../publish" -BASE_DOMAIN="${BASE_DOMAIN:-clickhouse.tech}" +BASE_DOMAIN="${BASE_DOMAIN:-content.clickhouse.tech}" GIT_TEST_URI="${GIT_TEST_URI:-git@github.com:ClickHouse/clickhouse.github.io.git}" -GIT_PROD_URI="git@github.com:ClickHouse/clickhouse.github.io.git" +GIT_PROD_URI="git@github.com:ClickHouse/clickhouse-website-content.git" EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---enable-stable-releases --minify}" HISTORY_SIZE="${HISTORY_SIZE:-5}" From 0cd49d746f7aa7be71e801fa589923a74a489b60 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Thu, 9 Apr 2020 23:00:57 +0300 Subject: [PATCH 167/752] several fixes --- src/Interpreters/DictionaryReader.h | 2 ++ src/Interpreters/ExpressionAnalyzer.cpp | 39 +++++++++++++------------ src/Interpreters/TableJoin.cpp | 23 +++++++++++++++ src/Interpreters/TableJoin.h | 1 + 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/DictionaryReader.h b/src/Interpreters/DictionaryReader.h index c1cd3e9ef1d..bb13758f46c 100644 --- a/src/Interpreters/DictionaryReader.h +++ b/src/Interpreters/DictionaryReader.h @@ -41,6 +41,7 @@ public: result.type = prepare_function->getReturnType(); if (result.type->getTypeId() != expected_type) throw Exception("Type mismatch in dictionary reader for: " + column_name, ErrorCodes::TYPE_MISMATCH); + block.insert(result); function = prepare_function->prepare(block, arg_positions, result_pos); } @@ -113,6 +114,7 @@ public: { size_t column_name_pos = key_size + i; auto & column = result_header.getByPosition(i); + arguments_get[1].column = DataTypeString().createColumnConst(1, src_column_names[i]); ColumnNumbers positions_get{0, column_name_pos, key_position}; functions_get.emplace_back(FunctionWrapper( *dict_get, arguments_get, sample_block, positions_get, column.name, column.type->getTypeId())); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index fbf2b663f3b..2ce7b373178 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -520,18 +520,9 @@ static ExpressionActionsPtr createJoinedBlockActions(const Context & context, co return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false); } -static bool allowDictJoin(const TableJoin & table_join, const Context & context, String & dict_name) +static bool allowDictJoin(StoragePtr joined_storage, const Context & context, String & dict_name, String & key_name) { - if (!table_join.joined_storage) - return false; - - const Names & right_keys = table_join.keyNamesRight(); - if (right_keys.size() != 1) - return false; - - const String & key_name = right_keys[0]; /// TODO: compound name - - auto * dict = dynamic_cast(table_join.joined_storage.get()); + auto * dict = dynamic_cast(joined_storage.get()); if (!dict) return false; @@ -541,20 +532,30 @@ static bool allowDictJoin(const TableJoin & table_join, const Context & context, return false; const DictionaryStructure & structure = dictionary->getStructure(); - return structure.id && (structure.id->name == key_name); /// key is UInt64 + if (structure.id) + { + key_name = structure.id->name; + return true; + } + return false; } -static std::shared_ptr makeJoin(std::shared_ptr analyzed_join, const Block & sample_block, - const Names & original_right_columns, const Context & context) +static std::shared_ptr makeJoin(std::shared_ptr analyzed_join, const Block & sample_block, const Context & context) { bool allow_merge_join = analyzed_join->allowMergeJoin(); + /// HashJoin with Dictionary optimisation String dict_name; - if (allowDictJoin(*analyzed_join, context, dict_name)) + String key_name; + if (analyzed_join->joined_storage && allowDictJoin(analyzed_join->joined_storage, context, dict_name, key_name)) { - analyzed_join->dictionary_reader = std::make_shared( - dict_name, original_right_columns, sample_block.getNamesAndTypesList(), context); - return std::make_shared(analyzed_join, sample_block); + Names original_names; + NamesAndTypesList result_columns; + if (analyzed_join->allowDictJoin(key_name, sample_block, original_names, result_columns)) + { + analyzed_join->dictionary_reader = std::make_shared(dict_name, original_names, result_columns, context); + return std::make_shared(analyzed_join, sample_block); + } } if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join)) @@ -601,7 +602,7 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer /// TODO You do not need to set this up when JOIN is only needed on remote servers. subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside - subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block, original_right_columns, context); + subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block, context); /// Do not make subquery for join over dictionary. if (syntax->analyzed_join->dictionary_reader) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 30b5e8e4483..5e57c740bf1 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -238,4 +238,27 @@ bool TableJoin::allowMergeJoin() const return allow_merge_join; } +bool TableJoin::allowDictJoin(const String & dict_key, const Block & sample_block, Names & names, NamesAndTypesList & result_columns) const +{ + const Names & right_keys = keyNamesRight(); + if (right_keys.size() != 1) + return false; + + for (auto & col : sample_block) + { + String original = original_names.find(col.name)->second; + if (col.name == right_keys[0]) + { + if (original != dict_key) + return false; /// JOIN key != Dictionary key + continue; /// do not extract key column + } + + names.push_back(original); + result_columns.push_back({col.name, col.type}); + } + + return true; +} + } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 4cde414e270..c8c51918e27 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -96,6 +96,7 @@ public: const SizeLimits & sizeLimits() const { return size_limits; } VolumePtr getTemporaryVolume() { return tmp_volume; } bool allowMergeJoin() const; + bool allowDictJoin(const String & dict_key, const Block & sample_block, Names &, NamesAndTypesList &) const; bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; } bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; } bool forceHashJoin() const { return join_algorithm == JoinAlgorithm::HASH; } From 0accd2908c347038db03f26911a8d2de5ae72ea5 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Thu, 9 Apr 2020 23:11:20 +0300 Subject: [PATCH 168/752] Fix up styler's grudge --- src/Storages/IStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 3bf8054485c..5a792080370 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -324,7 +324,7 @@ RWLockImpl::LockHolder IStorage::tryLockTimed( const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE"; throw Exception( type_str + " locking attempt on \"" + getStorageID().getFullTableName() + - "\" has timed out! (" + toString(acquire_timeout.totalMilliseconds()) + "ms ). " + "\" has timed out! (" + toString(acquire_timeout.totalMilliseconds()) + "ms) " "Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED); } From 2b51b5ee5fdf414d1fc1e6cace53f578351afd4c Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 23:32:35 +0300 Subject: [PATCH 169/752] Update release.sh --- docs/tools/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 8eec2d758da..faca0e8ec17 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -5,7 +5,7 @@ BASE_DIR=$(dirname $(readlink -f $0)) BUILD_DIR="${BASE_DIR}/../build" PUBLISH_DIR="${BASE_DIR}/../publish" BASE_DOMAIN="${BASE_DOMAIN:-content.clickhouse.tech}" -GIT_TEST_URI="${GIT_TEST_URI:-git@github.com:ClickHouse/clickhouse.github.io.git}" +GIT_TEST_URI="${GIT_TEST_URI:-git@github.com:ClickHouse/clickhouse-website-content.git}" GIT_PROD_URI="git@github.com:ClickHouse/clickhouse-website-content.git" EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---enable-stable-releases --minify}" HISTORY_SIZE="${HISTORY_SIZE:-5}" From 9326016e5fd3c50550cfa23a47c063c41a886b70 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 23:50:39 +0300 Subject: [PATCH 170/752] Put single-page content into a separate js file (#10160) --- docs/tools/build.py | 17 +++++++++++++++-- website/templates/docs/content.html | 15 ++++++++++----- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index 65b9f9f8c04..1719fe051d3 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -219,7 +219,20 @@ def build_single_page_version(lang, args, nav, cfg): os.path.join(site_temp, 'single'), single_page_output_path ) - + + single_page_index_html = os.path.join(single_page_output_path, 'index.html') + single_page_content_js = os.path.join(single_page_output_path, 'content.js') + with open(single_page_index_html, 'r') as f: + sp_prefix, sp_js, sp_suffix = f.read().split('') + with open(single_page_index_html, 'w') as f: + f.write(sp_prefix) + f.write(sp_suffix) + with open(single_page_content_js, 'w') as f: + if args.minify: + import jsmin + sp_js = jsmin.jsmin(sp_js) + f.write(sp_js) + logging.info(f'Re-building single page for {lang} pdf/test') with util.temp_dir() as test_dir: extra['single_page'] = False @@ -400,7 +413,7 @@ if __name__ == '__main__': from build import build build(args) - + if args.livereload: new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')] new_args = sys.executable + ' ' + ' '.join(new_args) diff --git a/website/templates/docs/content.html b/website/templates/docs/content.html index 320f1a2b53f..d4ff1fd8554 100644 --- a/website/templates/docs/content.html +++ b/website/templates/docs/content.html @@ -17,11 +17,7 @@ {% endif %} {% if single_page and page.content %} - + {% endif %} @@ -32,3 +28,12 @@ {% endif %} +{% if single_page and page.content %} + +(function() { + {% for chunk in page.content|chunks %} + document.write({{ chunk|tojson|safe }}); + {% endfor %} +})(); + +{% endif %} From 345978ae35776781a136504f828336ca2bbe1151 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 9 Apr 2020 23:51:01 +0300 Subject: [PATCH 171/752] normalize ru markdown (#10159) --- docs/ru/development/architecture.md | 6 +- docs/ru/development/build.md | 2 +- docs/ru/development/build_cross_arm.md | 2 +- docs/ru/development/developer_instruction.md | 4 +- docs/ru/development/style.md | 6 +- docs/ru/development/tests.md | 2 +- docs/ru/getting_started/install.md | 2 +- docs/ru/getting_started/tutorial.md | 2 +- docs/ru/interfaces/formats.md | 8 +- docs/ru/introduction/adopters.md | 2 +- docs/ru/introduction/history.md | 26 ++--- docs/ru/operations/performance_test.md | 2 +- docs/ru/operations/quotas.md | 4 +- docs/ru/operations/system_tables.md | 102 ++++++++++--------- docs/ru/operations/tips.md | 4 +- docs/ru/operations/troubleshooting.md | 2 +- 16 files changed, 89 insertions(+), 87 deletions(-) diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index f5f57179ece..0d1fc2ff947 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -118,7 +118,7 @@ A `Block` это контейнер, представляющий подмнож Существуют обычные функции и агрегатные функции. Агрегатные функции см. В следующем разделе. -Ordinary functions don't change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`'s данных для реализации векторизованного выполнения запросов. +Ordinary functions don’t change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s данных для реализации векторизованного выполнения запросов. Есть некоторые другие функции, такие как [размер блока](../sql_reference/functions/other_functions.md#function-blocksize), [роунумберинблок](../sql_reference/functions/other_functions.md#function-rownumberinblock), и [runningAccumulate](../sql_reference/functions/other_functions.md#function-runningaccumulate), которые эксплуатируют обработку блоков и нарушают независимость строк. @@ -157,7 +157,7 @@ ClickHouse имеет сильную типизацию, поэтому нет !!! note "Примечание" Для большинства внешних приложений мы рекомендуем использовать интерфейс HTTP, поскольку он прост и удобен в использовании. Протокол TCP более тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных, а также использует пользовательское обрамление для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что она требует связывания большей части кодовой базы ClickHouse, что нецелесообразно. -## Выполнение Распределенных Запросов {#distributed-query-execution} +## Выполнение Распределенных Запросов {#distributed-query-execution} Серверы в кластерной установке в основном независимы. Вы можете создать `Distributed` таблица на одном или всех серверах кластера. То `Distributed` table does not store data itself – it only provides a «view» ко всем локальным таблицам на нескольких узлах кластера. Когда вы выберите из `Distributed` таблица, он переписывает этот запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет запрос к ним. То `Distributed` таблица запрашивает удаленные серверы для обработки запроса только до стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и сливает их. Распределенная таблица пытается распределить как можно больше работы на удаленные серверы и не отправляет много промежуточных данных по сети. @@ -175,7 +175,7 @@ ClickHouse имеет сильную типизацию, поэтому нет Когда вы `INSERT` куча данных в `MergeTree`, эта связка сортируется по порядку первичного ключа и образует новую часть. Существуют фоновые потоки, которые периодически выделяют некоторые детали и объединяют их в одну сортированную деталь, чтобы сохранить количество деталей относительно низким. Вот почему он так называется `MergeTree`. Конечно, слияние приводит к тому, что «write amplification». Все части неизменны: они только создаются и удаляются, но не изменяются. Когда SELECT выполняется, он содержит снимок таблицы (набор деталей). После слияния мы также сохраняем старые детали в течение некоторого времени, чтобы облегчить восстановление после сбоя, поэтому, если мы видим, что какая-то объединенная деталь, вероятно, сломана, мы можем заменить ее исходными частями. -`MergeTree` это не дерево LSM, потому что оно не содержит «memtable» и «log»: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity's sake, and because we are already inserting data in batches in our applications. +`MergeTree` это не дерево LSM, потому что оно не содержит «memtable» и «log»: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications. > Таблицы MergeTree могут иметь только один (первичный) индекс: вторичных индексов не существует. Было бы неплохо разрешить несколько физических представлений в одной логической таблице, например, хранить данные в более чем одном физическом порядке или даже разрешить представления с предварительно агрегированными данными наряду с исходными данными. diff --git a/docs/ru/development/build.md b/docs/ru/development/build.md index 3e0c3763be6..f0e0ccfd4aa 100644 --- a/docs/ru/development/build.md +++ b/docs/ru/development/build.md @@ -21,7 +21,7 @@ $ sudo apt-get install git cmake python ninja-build Есть несколько способов сделать это. -### Установка из PPA пакет {#install-from-a-ppa-package} +### Установка из PPA пакет {#install-from-a-ppa-package} ``` bash $ sudo apt-get install software-properties-common diff --git a/docs/ru/development/build_cross_arm.md b/docs/ru/development/build_cross_arm.md index 27e2d73c759..184028212e9 100644 --- a/docs/ru/development/build_cross_arm.md +++ b/docs/ru/development/build_cross_arm.md @@ -3,7 +3,7 @@ machine_translated: true machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# Как построить ClickHouse на Linux для архитектуры AArch64 (ARM64) {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} +# Как построить ClickHouse на Linux для архитектуры AArch64 (ARM64) {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} Это для случая, когда у вас есть Linux-машина и вы хотите использовать ее для сборки `clickhouse` двоичный файл, который будет работать на другой машине Linux с архитектурой процессора AARCH64. Это предназначено для непрерывной проверки интеграции, которая выполняется на серверах Linux. diff --git a/docs/ru/development/developer_instruction.md b/docs/ru/development/developer_instruction.md index 4bc2ada8c1e..11ac3a73f6e 100644 --- a/docs/ru/development/developer_instruction.md +++ b/docs/ru/development/developer_instruction.md @@ -71,7 +71,7 @@ ClickHouse не работает и не собирается на 32-битны После этого, вы сможете добавлять в свой репозиторий обновления из репозитория Яндекса с помощью команды `git pull upstream master`. -## Работа с сабмодулями git {#rabota-s-sabmoduliami-git} +## Работа с сабмодулями Git {#rabota-s-sabmoduliami-git} Работа с сабмодулями git может быть достаточно болезненной. Следующие команды позволят содержать их в порядке: @@ -267,7 +267,7 @@ Mac OS X: clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.hits FORMAT TSV" < hits_v1.tsv clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visits FORMAT TSV" < visits_v1.tsv -# Создание pull request {#sozdanie-pull-request} +# Создание Pull Request {#sozdanie-pull-request} Откройте свой форк репозитория в интерфейсе GitHub. Если вы вели разработку в бранче, выберите этот бранч. На странице будет доступна кнопка «Pull request». По сути, это означает «создать заявку на принятие моих изменений в основной репозиторий». diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 091419394f7..a282ec6ec5c 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -431,9 +431,9 @@ enum class CompressionMethod Примеры: -- проще всего разместить объект на стеке, или сделать его членом другого класса. -- для большого количества маленьких объектов используйте контейнеры. -- для автоматического освобождения маленького количества объектов, выделенных на куче, используйте `shared_ptr/unique_ptr`. +- проще всего разместить объект на стеке, или сделать его членом другого класса. +- для большого количества маленьких объектов используйте контейнеры. +- для автоматического освобождения маленького количества объектов, выделенных на куче, используйте `shared_ptr/unique_ptr`. **2.** Управление ресурсами. diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md index 630ceecf2b2..1dfcdfdfe6f 100644 --- a/docs/ru/development/tests.md +++ b/docs/ru/development/tests.md @@ -215,7 +215,7 @@ $ clickhouse benchmark --concurrency 16 < queries.tsv `FORTIFY_SOURCE` используется по умолчанию. Это почти бесполезно, но все же имеет смысл в редких случаях, и мы не отключаем его. -## Стиль Кода {#code-style} +## Стиль Кода {#code-style} Описаны правила стиля кода [здесь](https://clickhouse.tech/docs/en/development/style/). diff --git a/docs/ru/getting_started/install.md b/docs/ru/getting_started/install.md index 7caffb498e9..cd571156d03 100644 --- a/docs/ru/getting_started/install.md +++ b/docs/ru/getting_started/install.md @@ -57,7 +57,7 @@ sudo yum install clickhouse-server clickhouse-client Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64. -### Из tgz архивов {#from-tgz-archives} +### Из Tgz архивов {#from-tgz-archives} Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов. diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md index 4a31f4b23a2..69cdeac8387 100644 --- a/docs/ru/getting_started/tutorial.md +++ b/docs/ru/getting_started/tutorial.md @@ -85,7 +85,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Импорт Образца Набора Данных {#import-sample-dataset} -Теперь пришло время заполнить наш сервер ClickHouse некоторыми образцами данных. В этом уроке мы будем использовать анонимизированные данные Яндекса.Metrica, первый сервис, который запускает ClickHouse в производственном режиме до того, как он стал открытым исходным кодом (подробнее об этом в [раздел истории](../introduction/history.md)). Есть [несколько способов импорта Яндекса.Набор метрика ](example_datasets/metrica.md), и ради учебника мы пойдем с самым реалистичным из них. +Теперь пришло время заполнить наш сервер ClickHouse некоторыми образцами данных. В этом уроке мы будем использовать анонимизированные данные Яндекса.Metrica, первый сервис, который запускает ClickHouse в производственном режиме до того, как он стал открытым исходным кодом (подробнее об этом в [раздел истории](../introduction/history.md)). Есть [несколько способов импорта Яндекса.Набор метрика](example_datasets/metrica.md), и ради учебника мы пойдем с самым реалистичным из них. ### Загрузка и извлечение данных таблицы {#download-and-extract-table-data} diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 27cab90bdd4..b1707a55193 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -953,8 +953,8 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql_reference/data_types/index.md) ClickHouse для запросов `INSERT` и `SELECT`. -| Тип данных Parquet (`INSERT`) | Тип данных ClickHouse | Тип данных Parquet (`SELECT`) | -|-------------------------------|---------------------------------------------|-------------------------------| +| Тип данных Parquet (`INSERT`) | Тип данных ClickHouse | Тип данных Parquet (`SELECT`) | +|-------------------------------|-----------------------------------------------------------|-------------------------------| | `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | `UINT8` | | `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | `INT8` | | `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | `UINT16` | @@ -1001,8 +1001,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ Таблица показывает поддержанные типы данных и их соответствие [типам данных](../sql_reference/data_types/index.md) ClickHouse для запросов `INSERT`. -| Тип данных ORC (`INSERT`) | Тип данных ClickHouse | -|---------------------------|---------------------------------------| +| Тип данных ORC (`INSERT`) | Тип данных ClickHouse | +|---------------------------|-----------------------------------------------------| | `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | | `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | | `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md index 1b7d56b19d1..20c465f6418 100644 --- a/docs/ru/introduction/adopters.md +++ b/docs/ru/introduction/adopters.md @@ -70,7 +70,7 @@ machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 | [Технология Сяосин.](https://www.xiaoheiban.cn/) | Образование | Общая цель | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | | [Сималайя](https://www.ximalaya.com/) | Общий доступ к аудио | OLAP | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | | [Облако Яндекса](https://cloud.yandex.ru/services/managed-clickhouse) | Публичное Облако | Главный продукт | — | — | [Разговор на русском языке, декабрь 2019 года](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| [DataLens Яндекс ](https://cloud.yandex.ru/services/datalens) | Бизнес-разведка | Главный продукт | — | — | [Слайды на русском языке, декабрь 2019 года](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| [DataLens Яндекс](https://cloud.yandex.ru/services/datalens) | Бизнес-разведка | Главный продукт | — | — | [Слайды на русском языке, декабрь 2019 года](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | | [Яндекс Маркет](https://market.yandex.ru/) | электронная коммерция | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, январь 2019 года](https://youtu.be/_l1qP0DyBcA?t=478) | | [Яндекс Метрика](https://metrica.yandex.com) | Веб-аналитика | Главный продукт | 360 серверов в одном кластере, 1862 сервера в одном отделе | 66.41 ПИБ / 5.68 ПИБ | [Слайды, Февраль 2020 Года](https://presentations.clickhouse.tech/meetup40/introduction/#13) | | [ЦВТ](https://htc-cs.ru/) | Разработка программного обеспечения | Метрики, Ведение Журнала | — | — | [Сообщение в блоге, март 2019 года, на русском языке](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | diff --git a/docs/ru/introduction/history.md b/docs/ru/introduction/history.md index 856263704e5..65254b0f4f0 100644 --- a/docs/ru/introduction/history.md +++ b/docs/ru/introduction/history.md @@ -13,11 +13,11 @@ ClickHouse изначально разрабатывался для обеспе Также ClickHouse используется: -- для хранения данных Вебвизора; -- для обработки промежуточных данных; -- для построения глобальных отчётов Аналитиками; -- для выполнения запросов в целях отладки движка Метрики; -- для анализа логов работы API и пользовательского интерфейса. +- для хранения данных Вебвизора; +- для обработки промежуточных данных; +- для построения глобальных отчётов Аналитиками; +- для выполнения запросов в целях отладки движка Метрики; +- для анализа логов работы API и пользовательского интерфейса. ClickHouse имеет более десятка инсталляций в других отделах Яндекса: в Вертикальных сервисах, Маркете, Директе, БК, Бизнес аналитике, Мобильной разработке, AdFox, Персональных сервисах и т п. @@ -27,14 +27,14 @@ ClickHouse имеет более десятка инсталляций в дру Но агрегированные данные являются очень ограниченным решением, по следующим причинам: -- вы должны заранее знать перечень отчётов, необходимых пользователю; -- то есть, пользователь не может построить произвольный отчёт; -- при агрегации по большому количеству ключей, объём данных не уменьшается и агрегация бесполезна; -- при большом количестве отчётов, получается слишком много вариантов агрегации (комбинаторный взрыв); -- при агрегации по ключам высокой кардинальности (например, URL) объём данных уменьшается не сильно (менее чем в 2 раза); -- из-за этого, объём данных при агрегации может не уменьшиться, а вырасти; -- пользователи будут смотреть не все отчёты, которые мы для них посчитаем - то есть, большая часть вычислений бесполезна; -- возможно нарушение логической целостности данных для разных агрегаций; +- вы должны заранее знать перечень отчётов, необходимых пользователю; +- то есть, пользователь не может построить произвольный отчёт; +- при агрегации по большому количеству ключей, объём данных не уменьшается и агрегация бесполезна; +- при большом количестве отчётов, получается слишком много вариантов агрегации (комбинаторный взрыв); +- при агрегации по ключам высокой кардинальности (например, URL) объём данных уменьшается не сильно (менее чем в 2 раза); +- из-за этого, объём данных при агрегации может не уменьшиться, а вырасти; +- пользователи будут смотреть не все отчёты, которые мы для них посчитаем - то есть, большая часть вычислений бесполезна; +- возможно нарушение логической целостности данных для разных агрегаций; Как видно, если ничего не агрегировать, и работать с неагрегированными данными, то это даже может уменьшить объём вычислений. diff --git a/docs/ru/operations/performance_test.md b/docs/ru/operations/performance_test.md index 391bcddd412..9b5c6f4fed3 100644 --- a/docs/ru/operations/performance_test.md +++ b/docs/ru/operations/performance_test.md @@ -42,7 +42,7 @@ machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql -1. Загрузите тестовые данные в соответствии с [Яндекс.Набор метрика ](../getting_started/example_datasets/metrica.md) инструкция («hits» таблица, содержащая 100 миллионов строк). +1. Загрузите тестовые данные в соответствии с [Яндекс.Набор метрика](../getting_started/example_datasets/metrica.md) инструкция («hits» таблица, содержащая 100 миллионов строк). diff --git a/docs/ru/operations/quotas.md b/docs/ru/operations/quotas.md index f109b889350..399e80d2011 100644 --- a/docs/ru/operations/quotas.md +++ b/docs/ru/operations/quotas.md @@ -7,8 +7,8 @@ В отличие от них, квоты: -- ограничивают не один запрос, а множество запросов, которые могут быть выполнены за интервал времени; -- при распределённой обработке запроса, учитывают ресурсы, потраченные на всех удалённых серверах. +- ограничивают не один запрос, а множество запросов, которые могут быть выполнены за интервал времени; +- при распределённой обработке запроса, учитывают ресурсы, потраченные на всех удалённых серверах. Рассмотрим фрагмент файла users.xml, описывающего квоты. diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index dfc15e6281a..b68aa570f52 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -141,37 +141,37 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova' Столбцы: -- `database` ([String](../sql_reference/data_types/string.md)) — Имя базы данных, в которой находится словарь, созданный с помощью DDL-запроса. Пустая строка для других словарей. -- `name` ([String](../sql_reference/data_types/string.md)) — [Имя словаря](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md). -- `status` ([Enum8](../sql_reference/data_types/enum.md)) — Статус словаря. Возможные значения: - - `NOT_LOADED` — Словарь не загружен, потому что не использовался. - - `LOADED` — Словарь загружен успешно. - - `FAILED` — Словарь не загружен в результате ошибки. - - `LOADING` — Словарь в процессе загрузки. - - `LOADED_AND_RELOADING` — Словарь загружен успешно, сейчас перезагружается (частые причины: запрос [SYSTEM RELOAD DICTIONARY](../sql_reference/statements/system.md#query_language-system-reload-dictionary), таймаут, изменение настроек словаря). - - `FAILED_AND_RELOADING` — Словарь не загружен в результате ошибки, сейчас перезагружается. -- `origin` ([String](../sql_reference/data_types/string.md)) — Путь к конфигурационному файлу, описывающему словарь. -- `type` ([String](../sql_reference/data_types/string.md)) — Тип размещения словаря. [Хранение словарей в памяти](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md). -- `key` — [Тип ключа](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-key): Числовой ключ ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) или Составной ключ ([String](../sql_reference/data_types/string.md)) — строка вида "(тип 1, тип 2, ..., тип n)". -- `attribute.names` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Массив [имен атрибутов](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. -- `attribute.types` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Соответствующий массив [типов атрибутов](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. -- `bytes_allocated` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Объем оперативной памяти, используемый словарем. -- `query_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Количество запросов с момента загрузки словаря или с момента последней успешной перезагрузки. -- `hit_rate` ([Float64](../sql_reference/data_types/float.md)) — Для cache-словарей — процент закэшированных значений. -- `element_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Количество элементов, хранящихся в словаре. -- `load_factor` ([Float64](../sql_reference/data_types/float.md)) — Процент заполнения словаря (для хэшированного словаря — процент заполнения хэш-таблицы). -- `source` ([String](../sql_reference/data_types/string.md)) — Текст, описывающий [источник данных](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) для словаря. -- `lifetime_min` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Минимальное [время обновления](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. -- `lifetime_max` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Максимальное [время обновления](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. -- `loading_start_time` ([DateTime](../sql_reference/data_types/datetime.md)) — Время начала загрузки словаря. -- `loading_duration` ([Float32](../sql_reference/data_types/float.md)) — Время, затраченное на загрузку словаря. -- `last_exception` ([String](../sql_reference/data_types/string.md)) — Текст ошибки, возникающей при создании или перезагрузке словаря, если словарь не удалось создать. +- `database` ([String](../sql_reference/data_types/string.md)) — Имя базы данных, в которой находится словарь, созданный с помощью DDL-запроса. Пустая строка для других словарей. +- `name` ([String](../sql_reference/data_types/string.md)) — [Имя словаря](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md). +- `status` ([Enum8](../sql_reference/data_types/enum.md)) — Статус словаря. Возможные значения: + - `NOT_LOADED` — Словарь не загружен, потому что не использовался. + - `LOADED` — Словарь загружен успешно. + - `FAILED` — Словарь не загружен в результате ошибки. + - `LOADING` — Словарь в процессе загрузки. + - `LOADED_AND_RELOADING` — Словарь загружен успешно, сейчас перезагружается (частые причины: запрос [SYSTEM RELOAD DICTIONARY](../sql_reference/statements/system.md#query_language-system-reload-dictionary), таймаут, изменение настроек словаря). + - `FAILED_AND_RELOADING` — Словарь не загружен в результате ошибки, сейчас перезагружается. +- `origin` ([String](../sql_reference/data_types/string.md)) — Путь к конфигурационному файлу, описывающему словарь. +- `type` ([String](../sql_reference/data_types/string.md)) — Тип размещения словаря. [Хранение словарей в памяти](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md). +- `key` — [Тип ключа](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-key): Числовой ключ ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) или Составной ключ ([String](../sql_reference/data_types/string.md)) — строка вида “(тип 1, тип 2, …, тип n)”. +- `attribute.names` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Массив [имен атрибутов](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `attribute.types` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Соответствующий массив [типов атрибутов](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `bytes_allocated` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Объем оперативной памяти, используемый словарем. +- `query_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Количество запросов с момента загрузки словаря или с момента последней успешной перезагрузки. +- `hit_rate` ([Float64](../sql_reference/data_types/float.md)) — Для cache-словарей — процент закэшированных значений. +- `element_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Количество элементов, хранящихся в словаре. +- `load_factor` ([Float64](../sql_reference/data_types/float.md)) — Процент заполнения словаря (для хэшированного словаря — процент заполнения хэш-таблицы). +- `source` ([String](../sql_reference/data_types/string.md)) — Текст, описывающий [источник данных](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) для словаря. +- `lifetime_min` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Минимальное [время обновления](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `lifetime_max` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Максимальное [время обновления](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `loading_start_time` ([DateTime](../sql_reference/data_types/datetime.md)) — Время начала загрузки словаря. +- `loading_duration` ([Float32](../sql_reference/data_types/float.md)) — Время, затраченное на загрузку словаря. +- `last_exception` ([String](../sql_reference/data_types/string.md)) — Текст ошибки, возникающей при создании или перезагрузке словаря, если словарь не удалось создать. **Пример** Настройте словарь. -```sql +``` sql CREATE DICTIONARY dictdb.dict ( `key` Int64 DEFAULT -1, @@ -186,11 +186,11 @@ LAYOUT(FLAT()) Убедитесь, что словарь загружен. -```sql +``` sql SELECT * FROM system.dictionaries ``` -```text +``` text ┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ │ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ @@ -921,33 +921,33 @@ WHERE Если этот запрос ничего не возвращает - значит всё хорошо. -## system.settings {#system-tables-system-settings} +## system.settings {#system-tables-system-settings} Содержит информацию о сессионных настройках для текущего пользователя. Столбцы: -- `name` ([String](../sql_reference/data_types/string.md)) — имя настройки. -- `value` ([String](../sql_reference/data_types/string.md)) — значение настройки. -- `changed` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — показывает, изменена ли настройка по отношению к значению по умолчанию. -- `description` ([String](../sql_reference/data_types/string.md)) — краткое описание настройки. -- `min` ([Nullable](../sql_reference/data_types/nullable.md)([String](../sql_reference/data_types/string.md))) — минимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../sql_reference/syntax.md#null-literal). -- `max` ([Nullable](../sql_reference/data_types/nullable.md)([String](../sql_reference/data_types/string.md))) — максимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../sql_reference/syntax.md#null-literal). -- `readonly` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку: - - `0` — Текущий пользователь может изменять настройку. - - `1` — Текущий пользователь не может изменять настройку. +- `name` ([String](../sql_reference/data_types/string.md)) — имя настройки. +- `value` ([String](../sql_reference/data_types/string.md)) — значение настройки. +- `changed` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — показывает, изменена ли настройка по отношению к значению по умолчанию. +- `description` ([String](../sql_reference/data_types/string.md)) — краткое описание настройки. +- `min` ([Nullable](../sql_reference/data_types/nullable.md)([String](../sql_reference/data_types/string.md))) — минимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../sql_reference/syntax.md#null-literal). +- `max` ([Nullable](../sql_reference/data_types/nullable.md)([String](../sql_reference/data_types/string.md))) — максимальное значение настройки, если задано [ограничение](settings/constraints_on_settings.md#constraints-on-settings). Если нет, то поле содержит [NULL](../sql_reference/syntax.md#null-literal). +- `readonly` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — Показывает, может ли пользователь изменять настройку: + - `0` — Текущий пользователь может изменять настройку. + - `1` — Текущий пользователь не может изменять настройку. **Пример** Пример показывает как получить информацию о настройках, имена которых содержат `min_i`. -```sql -SELECT * -FROM system.settings +``` sql +SELECT * +FROM system.settings WHERE name LIKE '%min_i%' ``` -```text +``` text ┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ │ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ │ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ @@ -957,21 +957,23 @@ WHERE name LIKE '%min_i%' Использование `WHERE changed` может быть полезно, например, если необходимо проверить: -- Что настройки корректно загрузились из конфигурационного файла и используются. -- Настройки, изменённые в текущей сессии. +- Что настройки корректно загрузились из конфигурационного файла и используются. +- Настройки, изменённые в текущей сессии. -```sql + + +``` sql SELECT * FROM system.settings WHERE changed AND name='load_balancing' ``` - **Cм. также** -- [Настройки](settings/index.md#settings) -- [Разрешения для запросов](settings/permissions_for_queries.md#settings_readonly) -- [Ограничения для значений настроек](settings/constraints_on_settings.md) +- [Настройки](settings/index.md#settings) +- [Разрешения для запросов](settings/permissions_for_queries.md#settings_readonly) +- [Ограничения для значений настроек](settings/constraints_on_settings.md) + +## system.table\_engines {#system.table_engines} -## system.table_engines ``` text ┌─name───────────────────┬─value───────┬─changed─┐ │ max_threads │ 8 │ 1 │ diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md index 4aa4605defb..271a6a35e25 100644 --- a/docs/ru/operations/tips.md +++ b/docs/ru/operations/tips.md @@ -1,6 +1,6 @@ # Советы по эксплуатации {#sovety-po-ekspluatatsii} -## CPU scaling governor {#cpu-scaling-governor} +## CPU Scaling Governor {#cpu-scaling-governor} Всегда используйте `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. @@ -25,7 +25,7 @@ $ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gov $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory ``` -## Huge pages {#huge-pages} +## Huge Pages {#huge-pages} Механизм прозрачных huge pages нужно отключить. Он мешает работе аллокаторов памяти, что приводит к значительной деградации производительности. diff --git a/docs/ru/operations/troubleshooting.md b/docs/ru/operations/troubleshooting.md index 7c4be02456c..a045de41ccc 100644 --- a/docs/ru/operations/troubleshooting.md +++ b/docs/ru/operations/troubleshooting.md @@ -7,7 +7,7 @@ ## Установка дистрибутива {#troubleshooting-installation-errors} -### Не получается скачать deb-пакеты из репозитория ClickHouse с помощью apt-get {#ne-poluchaetsia-skachat-deb-pakety-iz-repozitoriia-clickhouse-s-pomoshchiu-apt-get} +### Не получается скачать deb-пакеты из репозитория ClickHouse с помощью Apt-get {#ne-poluchaetsia-skachat-deb-pakety-iz-repozitoriia-clickhouse-s-pomoshchiu-apt-get} - Проверьте настройки брандмауэра. - Если по какой-либо причине вы не можете получить доступ к репозиторию, скачайте пакеты как описано в разделе [Начало работы](../getting_started/index.md) и установите их вручную командой `sudo dpkg -i `. Также, необходим пакет `tzdata`. From cd2eac9f930de2ad9b7320ae2b79467f434e5f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D0=B5=D0=BC=20=D0=A1=D1=82=D1=80=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D1=86=D0=BE=D0=B2?= Date: Thu, 9 Apr 2020 23:52:53 +0300 Subject: [PATCH 172/752] codestyle fixed --- src/Dictionaries/ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/DictionarySourceFactory.cpp | 4 ++-- src/Dictionaries/DictionarySourceHelpers.cpp | 8 ++++---- src/Dictionaries/DictionarySourceHelpers.h | 6 ++---- src/Dictionaries/ExecutableDictionarySource.cpp | 1 - 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index aa06e1b8660..5ad4bcd3af0 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -229,7 +229,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) bool /* check_config */) -> DictionarySourcePtr { Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config); - + /// Note that processors are not supported yet (see constructor), /// hence it is not possible to override experimental_use_processors setting return std::make_unique(dict_struct, config, config_prefix + ".clickhouse", sample_block, context_local_copy); diff --git a/src/Dictionaries/DictionarySourceFactory.cpp b/src/Dictionaries/DictionarySourceFactory.cpp index 8431e065dd4..25931e6a724 100644 --- a/src/Dictionaries/DictionarySourceFactory.cpp +++ b/src/Dictionaries/DictionarySourceFactory.cpp @@ -84,11 +84,11 @@ DictionarySourcePtr DictionarySourceFactory::create( { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); - + if (keys.empty() || keys.size() > 2) throw Exception{name + ": element dictionary.source should have one or two child elements", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG}; - + const auto & source_type = keys.front(); const auto found = registered_sources.find(source_type); diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index 7dfa67b5167..bbf6fb8fb1b 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -53,12 +53,12 @@ void formatKeys( } Context copyContextAndApplySettings( - const std::string & config_prefix, - const Context & context, + const std::string & config_prefix, + const Context & context, const Poco::Util::AbstractConfiguration & config) { Context local_context(context); - if (config.has(config_prefix + ".settings")) + if (config.has(config_prefix + ".settings")) { const auto prefix = config_prefix + ".settings"; Settings settings; @@ -66,7 +66,7 @@ Context copyContextAndApplySettings( settings.loadSettingsFromConfig(prefix, config); local_context.setSettings(settings); } - + return local_context; } diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h index 7dc5f319432..ac173b0178d 100644 --- a/src/Dictionaries/DictionarySourceHelpers.h +++ b/src/Dictionaries/DictionarySourceHelpers.h @@ -14,8 +14,6 @@ using BlockOutputStreamPtr = std::shared_ptr; struct DictionaryStructure; class Context; - - /// Write keys to block output stream. /// For simple key @@ -30,8 +28,8 @@ void formatKeys( /// Used for applying settings to copied context in some register[...]Source functions Context copyContextAndApplySettings( - const std::string & config_prefix, - const Context & context, + const std::string & config_prefix, + const Context & context, const Poco::Util::AbstractConfiguration & config); } diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 6f4cd747b87..34943d62b44 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -15,7 +15,6 @@ #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" #include "registerDictionaries.h" -#include "DictionarySourceHelpers.h" namespace DB { From 22a55c49516a7c7ad273959e1cdd293eacd5e6ff Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 18:45:29 -0300 Subject: [PATCH 173/752] Doc. merge_tree_settings --- .../settings/merge_tree_settings.md | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 docs/ru/operations/settings/merge_tree_settings.md diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md new file mode 100644 index 00000000000..4bb8791ef82 --- /dev/null +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -0,0 +1,75 @@ +# Настройки MergeTree таблиц {#merge-tree-settings} + +Значения по умолчанию (для всех таблиц) задаются в config.xml в секции merge_tree. + +Пример: +```text + + 5 + +``` + +Эти значения можно задать (перекрыть) у таблиц задав в секции `Settings` у команды `CREATE TABLE`. + +Пример: +```sql +CREATE TABLE foo +( + `A` Int64 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS max_suspicious_broken_parts = 500; +``` + +Или изменить с помощью команды `ALTER TABLE ... MODIFY SETTING`. + +Пример: +```sql +ALTER TABLE foo + MODIFY SETTING max_suspicious_broken_parts = 100; +``` + + +## parts_to_throw_insert {#parts-to-throw-insert} + +Eсли число кусков в партиции превышает значение `parts_to_throw_insert` INSERT прерывается с исключением 'Too many parts (300). Merges are processing significantly slower than inserts'. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: 300. + +Для достижения максимальной производительности запросов `SELECT` необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree). +Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки 'Too many parts', но в тоже время вы позже заметите возможную проблему со слияниями. + + +## parts_to_delay_insert {#parts-to-delay-insert} + +Eсли число кусков в партиции превышает значение `parts_to_delay_insert` INSERT искусственно замедляется. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: 150. + +ClickHouse искусственно выполняет `INSERT` дольше (добавляет 'sleep'), чтобы фоновый механизм слияния успевал слиять куски быстрее чем они добавляются. + + +## max_delay_to_insert {#max-delay-to-insert} + +Время в секундах на которое будет замедлен `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert) + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: 1. + +`INSERT` будет замедлен на max_delay_to_insert/(количество кусков в партиции - parts_to_delay_insert). +Т.е. если в партиции уже 299 кусков и parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на 1/(299-150) (~0.5) секунд. + + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) From bc8b724f5efd00b22a5e2e8293a50a5167282b5a Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 19:05:38 -0300 Subject: [PATCH 174/752] Update merge_tree_settings.md fix max_delay_to_insert formula --- docs/ru/operations/settings/merge_tree_settings.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 4bb8791ef82..b0714368ed2 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -9,7 +9,7 @@
``` -Эти значения можно задать (перекрыть) у таблиц задав в секции `Settings` у команды `CREATE TABLE`. +Эти значения можно задать (перекрыть) у таблиц в секции `SETTINGS` у команды `CREATE TABLE`. Пример: ```sql @@ -33,7 +33,7 @@ ALTER TABLE foo ## parts_to_throw_insert {#parts-to-throw-insert} -Eсли число кусков в партиции превышает значение `parts_to_throw_insert` INSERT прерывается с исключением 'Too many parts (300). Merges are processing significantly slower than inserts'. +Eсли число кусков в партиции превышает значение `parts_to_throw_insert` INSERT прерывается с исключением 'Too many parts (N). Merges are processing significantly slower than inserts'. Возможные значения: @@ -47,7 +47,7 @@ Eсли число кусков в партиции превышает знач ## parts_to_delay_insert {#parts-to-delay-insert} -Eсли число кусков в партиции превышает значение `parts_to_delay_insert` INSERT искусственно замедляется. +Eсли число кусков в партиции превышает значение `parts_to_delay_insert` `INSERT` искусственно замедляется. Возможные значения: @@ -68,8 +68,11 @@ ClickHouse искусственно выполняет `INSERT` дольше (д Значение по умолчанию: 1. -`INSERT` будет замедлен на max_delay_to_insert/(количество кусков в партиции - parts_to_delay_insert). -Т.е. если в партиции уже 299 кусков и parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на 1/(299-150) (~0.5) секунд. +Величина задержи (в миллисекундах) для `INSERT` вычисляется по формуле +`pow(max_delay_to_insert * 1000, (1 + parts_count_in_partition - parts_to_delay_insert) / (parts_to_throw_insert - parts_to_delay_insert))` + +Т.е. если в партиции уже 299 кусков и parts_to_throw_insert =300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд. + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) From 6a10d0df00caf9c952d87d18a87f96303bf9af91 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 19:08:35 -0300 Subject: [PATCH 175/752] Update merge_tree_settings.md fix max_delay_to_insert formula --- docs/ru/operations/settings/merge_tree_settings.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index b0714368ed2..5adda56a76b 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -69,9 +69,14 @@ ClickHouse искусственно выполняет `INSERT` дольше (д Значение по умолчанию: 1. Величина задержи (в миллисекундах) для `INSERT` вычисляется по формуле -`pow(max_delay_to_insert * 1000, (1 + parts_count_in_partition - parts_to_delay_insert) / (parts_to_throw_insert - parts_to_delay_insert))` -Т.е. если в партиции уже 299 кусков и parts_to_throw_insert =300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд. +```code +max_k = parts_to_throw_insert - parts_to_delay_insert +k = 1 + parts_count_in_partition - parts_to_delay_insert +delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) +``` + +Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд. From 825785283c2daae26d72b363c7d77232d20592e1 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 19:17:12 -0300 Subject: [PATCH 176/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 5adda56a76b..e9ec07c9fc8 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -33,7 +33,7 @@ ALTER TABLE foo ## parts_to_throw_insert {#parts-to-throw-insert} -Eсли число кусков в партиции превышает значение `parts_to_throw_insert` INSERT прерывается с исключением 'Too many parts (N). Merges are processing significantly slower than inserts'. +Eсли число кусков в партиции превышает значение `parts_to_throw_insert` INSERT прерывается с исключением `Too many parts (N). Merges are processing significantly slower than inserts`. Возможные значения: @@ -60,7 +60,7 @@ ClickHouse искусственно выполняет `INSERT` дольше (д ## max_delay_to_insert {#max-delay-to-insert} -Время в секундах на которое будет замедлен `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert) +Величина в секундах, которая используется для расчета задержки `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert). Возможные значения: @@ -68,7 +68,7 @@ ClickHouse искусственно выполняет `INSERT` дольше (д Значение по умолчанию: 1. -Величина задержи (в миллисекундах) для `INSERT` вычисляется по формуле +Величина задержи (в миллисекундах) для `INSERT` вычисляется по формуле: ```code max_k = parts_to_throw_insert - parts_to_delay_insert From c4c78f8e73f15292d07eef2fd79402baa26f92d8 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 19:19:42 -0300 Subject: [PATCH 177/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index e9ec07c9fc8..4da8f126708 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -42,7 +42,8 @@ Eсли число кусков в партиции превышает знач Значение по умолчанию: 300. Для достижения максимальной производительности запросов `SELECT` необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree). -Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки 'Too many parts', но в тоже время вы позже заметите возможную проблему со слияниями. + +Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки `Too many parts`, но в тоже время вы позже обнаружите возможную проблему со слияниями (например из-за недостатка места на диске), и деградацию производительности `SELECT`. ## parts_to_delay_insert {#parts-to-delay-insert} From 976ef5af3c6a9b627d0a2ef8882a1f90557228a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 10 Apr 2020 01:26:32 +0300 Subject: [PATCH 178/752] Updated results from Jack P. Gao --- website/benchmark_hardware.html | 88 ++++++++++++++++----------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index 64eb576cc66..36f034457f8 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -2429,53 +2429,53 @@ var results = }, { - "system": "AMD EPYC 7702, 256 cores, 512 GiB, NVMe SSD, version 19.16", + "system": "AMD EPYC 7702, 256 cores, 512 GiB, NVMe SSD", "time": "2020-04-09 00:00:00", "result": [ -[0.103, 0.038, 0.043], -[0.072, 0.042, 0.044], -[0.118, 0.051, 0.057], -[0.222, 0.054, 0.051], -[0.339, 0.193, 0.215], -[0.376, 0.189, 0.175], -[0.114, 0.040, 0.052], -[0.085, 0.055, 0.049], -[0.354, 0.180, 0.168], -[0.372, 0.172, 0.161], -[0.276, 0.105, 0.100], -[0.259, 0.110, 0.115], -[0.399, 0.222, 0.207], -[0.586, 0.261, 0.262], -[0.394, 0.251, 0.228], -[0.350, 0.194, 0.189], -[0.705, 0.468, 0.462], -[0.653, 0.368, 0.381], -[1.285, 0.826, 0.922], -[0.223, 0.032, 0.036], -[1.690, 0.186, 0.178], -[1.916, 0.231, 0.189], -[3.551, 0.602, 0.595], -[3.198, 0.607, 0.478], -[0.530, 0.143, 0.138], -[0.311, 0.079, 0.090], -[0.554, 0.137, 0.134], -[1.775, 0.305, 0.293], -[1.480, 0.257, 0.276], -[0.864, 0.838, 0.795], -[0.529, 0.183, 0.177], -[1.051, 0.226, 0.230], -[1.719, 1.074, 1.075], -[2.134, 0.856, 0.873], -[2.123, 0.829, 0.846], -[0.380, 0.285, 0.280], -[0.193, 0.187, 0.183], -[0.080, 0.080, 0.080], -[0.077, 0.066, 0.068], -[0.432, 0.405, 0.444], -[0.050, 0.038, 0.037], -[0.032, 0.028, 0.025], -[0.010, 0.010, 0.008] +[0.006, 0.002, 0.002], +[0.252, 0.072, 0.057], +[0.113, 0.066, 0.057], +[0.197, 0.055, 0.065], +[0.311, 0.199, 0.217], +[0.360, 0.200, 0.183], +[0.119, 0.050, 0.045], +[0.066, 0.061, 0.057], +[0.320, 0.150, 0.144], +[0.346, 0.170, 0.162], +[0.226, 0.117, 0.115], +[0.265, 0.112, 0.118], +[0.402, 0.249, 0.250], +[0.561, 0.327, 0.332], +[0.397, 0.267, 0.257], +[0.323, 0.221, 0.233], +[0.710, 0.527, 0.517], +[0.667, 0.437, 0.443], +[1.269, 0.936, 0.957], +[0.189, 0.043, 0.043], +[1.673, 0.206, 0.169], +[1.937, 0.214, 0.184], +[3.527, 0.755, 0.737], +[3.197, 0.551, 0.523], +[0.519, 0.076, 0.086], +[0.268, 0.060, 0.080], +[0.522, 0.075, 0.079], +[1.693, 0.345, 0.351], +[1.466, 0.330, 0.318], +[1.078, 0.974, 1.019], +[0.501, 0.196, 0.200], +[1.032, 0.266, 0.271], +[1.621, 1.156, 1.169], +[2.089, 0.998, 0.972], +[2.106, 0.974, 0.959], +[0.366, 0.305, 0.305], +[0.190, 0.187, 0.183], +[0.071, 0.066, 0.075], +[0.072, 0.068, 0.062], +[0.415, 0.353, 0.457], +[0.034, 0.032, 0.028], +[0.031, 0.027, 0.032], +[0.024, 0.007, 0.007] ] }, ]; From 14e22ab6b355fe93a9e570ec565a10680f22435f Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 21:16:48 -0300 Subject: [PATCH 179/752] old_parts_lifetime --- docs/ru/operations/settings/merge_tree_settings.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 4da8f126708..98fef837740 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -79,6 +79,20 @@ delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд. +## old_parts_lifetime {#old_parts_lifetime} + +Время (в секундах) хранения неактивных кусков, для защиты от потери данных при спонтанной перезагрузке сервера или О.С. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: 480. + +После слияния нескольких кусков в новый кусок, ClickHouse помечает исходные куски как неактивные и удаляет после `old_parts_lifetime` секунд. +Неактивные куски удаляются если они не используются в текущих запросах, т.е. если счетчик ссылок куска -- `refcount` равен нулю. +Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера, новый (смерженный) кусок может быть потерян или испорчен, в этом случае ClickHouse при загрузке при проверке целостности кусков обнаружит это и вернет неактивные куски в активные и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken) и перемещается в папку detached. +Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже (30 сек.), экспериментально было найдено время - 480 секунд, за которое почти гарантировано новый кусок будет записан на диск и безопасно удалять неактивные куски. [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) From eb57e54a21a00d22bb3ddfde66e91d57e36f7af6 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 21:23:12 -0300 Subject: [PATCH 180/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 98fef837740..e3be061526d 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -89,9 +89,11 @@ delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) Значение по умолчанию: 480. -После слияния нескольких кусков в новый кусок, ClickHouse помечает исходные куски как неактивные и удаляет после `old_parts_lifetime` секунд. +После слияния нескольких кусков в новый кусок, ClickHouse помечает исходные куски как неактивные и удаляет их после `old_parts_lifetime` секунд. Неактивные куски удаляются если они не используются в текущих запросах, т.е. если счетчик ссылок куска -- `refcount` равен нулю. -Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера, новый (смерженный) кусок может быть потерян или испорчен, в этом случае ClickHouse при загрузке при проверке целостности кусков обнаружит это и вернет неактивные куски в активные и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken) и перемещается в папку detached. + +Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера, новый (смерженный) кусок может быть потерян или испорчен. В этом случае ClickHouse при загрузке при проверке целостности кусков обнаружит это и вернет неактивные куски в список активных и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken_) и перемещается в папку detached. + Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже (30 сек.), экспериментально было найдено время - 480 секунд, за которое почти гарантировано новый кусок будет записан на диск и безопасно удалять неактивные куски. From af73f555817ae39d1abaf82504804dd91f39d128 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 21:24:37 -0300 Subject: [PATCH 181/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index e3be061526d..46bf78dda0f 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -79,7 +79,7 @@ delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд. -## old_parts_lifetime {#old_parts_lifetime} +## old_parts_lifetime {#old-parts-lifetime} Время (в секундах) хранения неактивных кусков, для защиты от потери данных при спонтанной перезагрузке сервера или О.С. From aba149d8a26def16efb71afeb95ba03c2f94283f Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Thu, 9 Apr 2020 22:35:28 -0300 Subject: [PATCH 182/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 46bf78dda0f..afd88694def 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -33,7 +33,7 @@ ALTER TABLE foo ## parts_to_throw_insert {#parts-to-throw-insert} -Eсли число кусков в партиции превышает значение `parts_to_throw_insert` INSERT прерывается с исключением `Too many parts (N). Merges are processing significantly slower than inserts`. +Eсли число кусков в партиции превышает значение `parts_to_throw_insert`, INSERT прерывается с исключением `Too many parts (N). Merges are processing significantly slower than inserts`. Возможные значения: @@ -43,12 +43,12 @@ Eсли число кусков в партиции превышает знач Для достижения максимальной производительности запросов `SELECT` необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree). -Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки `Too many parts`, но в тоже время вы позже обнаружите возможную проблему со слияниями (например из-за недостатка места на диске), и деградацию производительности `SELECT`. +Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки `Too many parts`, но в тоже время вы позже обнаружите возможную проблему со слияниями (например, из-за недостатка места на диске) и деградацию производительности `SELECT`. ## parts_to_delay_insert {#parts-to-delay-insert} -Eсли число кусков в партиции превышает значение `parts_to_delay_insert` `INSERT` искусственно замедляется. +Eсли число кусков в партиции превышает значение `parts_to_delay_insert`, `INSERT` искусственно замедляется. Возможные значения: @@ -56,7 +56,7 @@ Eсли число кусков в партиции превышает знач Значение по умолчанию: 150. -ClickHouse искусственно выполняет `INSERT` дольше (добавляет 'sleep'), чтобы фоновый механизм слияния успевал слиять куски быстрее чем они добавляются. +ClickHouse искусственно выполняет `INSERT` дольше (добавляет 'sleep'), чтобы фоновый механизм слияния успевал слиять куски быстрее, чем они добавляются. ## max_delay_to_insert {#max-delay-to-insert} @@ -90,11 +90,11 @@ delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) Значение по умолчанию: 480. После слияния нескольких кусков в новый кусок, ClickHouse помечает исходные куски как неактивные и удаляет их после `old_parts_lifetime` секунд. -Неактивные куски удаляются если они не используются в текущих запросах, т.е. если счетчик ссылок куска -- `refcount` равен нулю. +Неактивные куски удаляются, если они не используются в текущих запросах, т.е. если счетчик ссылок куска -- `refcount` равен нулю. -Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера, новый (смерженный) кусок может быть потерян или испорчен. В этом случае ClickHouse при загрузке при проверке целостности кусков обнаружит это и вернет неактивные куски в список активных и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken_) и перемещается в папку detached. +Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера новый (смерженный) кусок может быть потерян или испорчен. В этом случае ClickHouse в процессе старта при проверке целостности кусков обнаружит проблему, вернет неактивные куски в список активных и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken_) и перемещается в папку detached. Если проверка целостности не обнаруживает проблем в смерженном куске, то исходные неактивные куски переименовываются (добавляется префикс ignored_) и перемещаются в папку detached. -Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже (30 сек.), экспериментально было найдено время - 480 секунд, за которое почти гарантировано новый кусок будет записан на диск и безопасно удалять неактивные куски. +Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже. Экспериментально было найдено время - 480 секунд, за которое гарантированно новый кусок будет записан на диск. [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) From c39e3a51adf41e429560e5e06118d7faffea2431 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Fri, 10 Apr 2020 12:30:54 +0900 Subject: [PATCH 183/752] Fix Set::insertFromBlockImplCase() --- src/Interpreters/Set.cpp | 7 +++++-- .../queries/0_stateless/01231_operator_null_in.sql | 14 +++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 0504f9d9e6d..54992eeff2c 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -87,7 +87,10 @@ void NO_INLINE Set::insertFromBlockImplCase( { if ((*null_map)[i]) { - has_null = true; + if (transform_null_in) + { + has_null = true; + } if constexpr (build_filter) { @@ -180,7 +183,7 @@ bool Set::insertFromBlock(const Block & block) /// We will insert to the Set only keys, where all components are not NULL. ConstNullMapPtr null_map{}; - ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map, transform_null_in); /// Filter to extract distinct values from the block. ColumnUInt8::MutablePtr filter; diff --git a/tests/queries/0_stateless/01231_operator_null_in.sql b/tests/queries/0_stateless/01231_operator_null_in.sql index 12361373001..3c4333c8ea6 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.sql +++ b/tests/queries/0_stateless/01231_operator_null_in.sql @@ -90,4 +90,16 @@ SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3')] FRO SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1'), (NULL, NULL)] FROM null_in_tuple WHERE t global in ((1, '1'), (NULL, NULL)); SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3')] FROM null_in_tuple WHERE t global not in ((1, '1'), (NULL, NULL)); -DROP TABLE IF EXISTS null_in_subquery; +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1')] FROM null_in_tuple WHERE t in ((1, '1'), (1, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1')] FROM null_in_tuple WHERE t in ((1, '1'), (NULL, '1')); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1'), (2, NULL)] FROM null_in_tuple WHERE t in ((1, '1'), (NULL, '1'), (2, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1'), (NULL, '3')] FROM null_in_tuple WHERE t in ((1, '1'), (1, NULL), (NULL, '3')); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(1, '1'), (2, NULL), (NULL, '3'), (NULL, NULL)] FROM null_in_tuple WHERE t in ((1, '1'), (1, NULL), (2, NULL), (NULL, '3'), (NULL, NULL)); + +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3'), (NULL, NULL)] FROM null_in_tuple WHERE t not in ((1, '1'), (1, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, '3'), (NULL, NULL)] FROM null_in_tuple WHERE t not in ((1, '1'), (NULL, '1')); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(NULL, '3'), (NULL, NULL)] FROM null_in_tuple WHERE t not in ((1, '1'), (NULL, '1'), (2, NULL)); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [(2, NULL), (NULL, NULL)] FROM null_in_tuple WHERE t not in ((1, '1'), (1, NULL), (NULL, '3')); +SELECT arraySort(x -> (x.1, x.2), groupArray(t)) == [] FROM null_in_tuple WHERE t not in ((1, '1'), (1, NULL), (2, NULL), (NULL, '3'), (NULL, NULL)); + +DROP TABLE IF EXISTS null_in_tuple; From c6bf39d7a9a8606fa00caff764cb16133a3620f0 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Fri, 10 Apr 2020 14:02:55 +0900 Subject: [PATCH 184/752] Fix 01231_operator_null_in.reference --- .../0_stateless/01231_operator_null_in.reference | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/queries/0_stateless/01231_operator_null_in.reference b/tests/queries/0_stateless/01231_operator_null_in.reference index 7432b657191..5cd5e5ee5fb 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.reference +++ b/tests/queries/0_stateless/01231_operator_null_in.reference @@ -52,3 +52,13 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 From 7fa5afecb44484e04f710ee7876ee590fbf2d1cd Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Fri, 10 Apr 2020 14:42:36 +0900 Subject: [PATCH 185/752] Support transform_null_in option for StorageSet --- src/Interpreters/Set.cpp | 7 +--- src/Storages/StorageSet.cpp | 6 +-- .../01231_operator_null_in.reference | 16 ++++++++ .../0_stateless/01231_operator_null_in.sql | 38 +++++++++++++++++++ 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 54992eeff2c..a4fea5dd705 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -87,10 +87,7 @@ void NO_INLINE Set::insertFromBlockImplCase( { if ((*null_map)[i]) { - if (transform_null_in) - { - has_null = true; - } + has_null = true; if constexpr (build_filter) { @@ -397,7 +394,7 @@ void NO_INLINE Set::executeImplCase( { if (has_null_map && (*null_map)[i]) { - if (has_null) + if (transform_null_in && has_null) vec_res[i] = !negative; else vec_res[i] = negative; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 7d2a7ee128f..79f5198b304 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -112,7 +112,7 @@ StorageSet::StorageSet( const ConstraintsDescription & constraints_, const Context & context_) : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_}, - set(std::make_shared(SizeLimits(), false, context_.getSettingsRef().transform_null_in)) + set(std::make_shared(SizeLimits(), false, true)) { Block header = getSampleBlock(); header = header.sortColumns(); @@ -127,7 +127,7 @@ void StorageSet::finishInsert() { set->finishInsert(); } size_t StorageSet::getSize() const { return set->getTotalRowCount(); } -void StorageSet::truncate(const ASTPtr &, const Context & context, TableStructureWriteLockHolder &) +void StorageSet::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) { Poco::File(path).remove(true); Poco::File(path).createDirectories(); @@ -137,7 +137,7 @@ void StorageSet::truncate(const ASTPtr &, const Context & context, TableStructur header = header.sortColumns(); increment = 0; - set = std::make_shared(SizeLimits(), false, context.getSettingsRef().transform_null_in); + set = std::make_shared(SizeLimits(), false, true); set->setHeader(header); } diff --git a/tests/queries/0_stateless/01231_operator_null_in.reference b/tests/queries/0_stateless/01231_operator_null_in.reference index 5cd5e5ee5fb..b76f42e9af4 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.reference +++ b/tests/queries/0_stateless/01231_operator_null_in.reference @@ -62,3 +62,19 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01231_operator_null_in.sql b/tests/queries/0_stateless/01231_operator_null_in.sql index 3c4333c8ea6..ddebaf23900 100644 --- a/tests/queries/0_stateless/01231_operator_null_in.sql +++ b/tests/queries/0_stateless/01231_operator_null_in.sql @@ -40,8 +40,46 @@ SELECT count() == 3 FROM null_in WHERE i global not in (1, 3); SELECT count() == 3 FROM null_in WHERE i global not in range(4); SELECT count() == 3 FROM null_in WHERE s global not in ('1', '3'); +DROP TABLE IF EXISTS test_set; +CREATE TABLE test_set (i Nullable(int)) ENGINE = Set(); +INSERT INTO test_set VALUES (1), (NULL); + +SET transform_null_in = 0; + +SELECT count() == 1 FROM null_in WHERE i in test_set; +SELECT count() == 2 FROM null_in WHERE i not in test_set; +SELECT count() == 1 FROM null_in WHERE i global in test_set; +SELECT count() == 2 FROM null_in WHERE i global not in test_set; + +SET transform_null_in = 1; + +SELECT count() == 3 FROM null_in WHERE i in test_set; +SELECT count() == 2 FROM null_in WHERE i not in test_set; +SELECT count() == 3 FROM null_in WHERE i global in test_set; +SELECT count() == 2 FROM null_in WHERE i global not in test_set; + +-- Create with transform_null_in +CREATE TABLE test_set2 (i Nullable(int)) ENGINE = Set(); +INSERT INTO test_set2 VALUES (1), (NULL); + +SET transform_null_in = 0; + +SELECT count() == 1 FROM null_in WHERE i in test_set2; +SELECT count() == 2 FROM null_in WHERE i not in test_set2; +SELECT count() == 1 FROM null_in WHERE i global in test_set2; +SELECT count() == 2 FROM null_in WHERE i global not in test_set2; + +SET transform_null_in = 1; + +SELECT count() == 3 FROM null_in WHERE i in test_set2; +SELECT count() == 2 FROM null_in WHERE i not in test_set2; +SELECT count() == 3 FROM null_in WHERE i global in test_set2; +SELECT count() == 2 FROM null_in WHERE i global not in test_set2; + +DROP TABLE IF EXISTS test_set; DROP TABLE IF EXISTS null_in; + DROP TABLE IF EXISTS null_in_subquery; CREATE TABLE null_in_subquery (dt DateTime, idx int, i Nullable(UInt64)) ENGINE = MergeTree() PARTITION BY dt ORDER BY idx; INSERT INTO null_in_subquery SELECT number % 3, number, number FROM system.numbers LIMIT 99999; From b8bfbad85777287f523cbda6d8fea34f2681763a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 09:35:07 +0300 Subject: [PATCH 186/752] Update sitemap_static.xml --- website/sitemap_static.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/website/sitemap_static.xml b/website/sitemap_static.xml index 751ad4e8ce2..33d258674f6 100644 --- a/website/sitemap_static.xml +++ b/website/sitemap_static.xml @@ -12,4 +12,8 @@ https://clickhouse.tech/benchmark_hardware.html weekly + + https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html + daily + From 888baad56574822348f744d2ff85a845fd3c70b5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Apr 2020 10:35:13 +0300 Subject: [PATCH 187/752] tests/queries/0_stateless/01056_create_table_as: drop dictionary at start --- tests/queries/0_stateless/01056_create_table_as.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01056_create_table_as.sql b/tests/queries/0_stateless/01056_create_table_as.sql index 868e1f082dd..f95df9b7906 100644 --- a/tests/queries/0_stateless/01056_create_table_as.sql +++ b/tests/queries/0_stateless/01056_create_table_as.sql @@ -15,6 +15,7 @@ CREATE TABLE t3 AS v; -- { serverError 80; } DROP TABLE v; -- dictionary +DROP DICTIONARY IF EXISTS dict; DROP DATABASE if exists test_01056_dict_data; CREATE DATABASE test_01056_dict_data; CREATE TABLE test_01056_dict_data.dict_data (key Int, value UInt16) Engine=Memory(); From 25f9a1a2490845153f39a34f2b41b16911669b5a Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2020 07:41:44 +0000 Subject: [PATCH 188/752] Bump mkdocs-macros-plugin from 0.4.4 to 0.4.6 in /docs/tools Bumps [mkdocs-macros-plugin](https://github.com/fralau/mkdocs_macros_plugin) from 0.4.4 to 0.4.6. - [Release notes](https://github.com/fralau/mkdocs_macros_plugin/releases) - [Commits](https://github.com/fralau/mkdocs_macros_plugin/commits) Signed-off-by: dependabot-preview[bot] --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 587bcabb8fb..0e3e3c24b5f 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -19,7 +19,7 @@ Markdown==3.2.1 MarkupSafe==1.1.1 mkdocs==1.1 mkdocs-htmlproofer-plugin==0.0.3 -mkdocs-macros-plugin==0.4.4 +mkdocs-macros-plugin==0.4.6 nltk==3.4.5 nose==1.3.7 protobuf==3.11.3 From 569b85eda4b73110fe26f39e26fed94645ba4cb2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Apr 2020 10:46:23 +0300 Subject: [PATCH 189/752] Add ability to use unbundled msgpack Actually it works before but by accidentally and with warnings. --- cmake/find/msgpack.cmake | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake index a1f18bb1eb0..0b56bbc1a0d 100644 --- a/cmake/find/msgpack.cmake +++ b/cmake/find/msgpack.cmake @@ -1,2 +1,17 @@ -set(MSGPACK_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include) +option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library instead of bundled" ${NOT_UNBUNDLED}) + +if (USE_INTERNAL_MSGPACK_LIBRARY) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include/msgpack.hpp") + message(WARNING "submodule contrib/msgpack-c is missing. to fix try run: \n git submodule update --init --recursive") + set(USE_INTERNAL_MSGPACK_LIBRARY 0) + set(MISSING_INTERNAL_MSGPACK_LIBRARY 1) + endif() +endif() + +if (USE_INTERNAL_MSGPACK_LIBRARY) + set(MSGPACK_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include) +else() + find_path(MSGPACK_INCLUDE_DIR NAMES msgpack.hpp PATHS ${MSGPACK_INCLUDE_PATHS}) +endif() + message(STATUS "Using msgpack: ${MSGPACK_INCLUDE_DIR}") From 66d443df14da259b49c95481f4d21a48376f072b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Apr 2020 11:30:23 +0300 Subject: [PATCH 190/752] Add libmsgpack-dev into the image (for unbundled build) --- docker/packager/deb/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index bedde0a2013..6aa550aaf82 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -48,6 +48,7 @@ RUN apt-get --allow-unauthenticated update -y \ libltdl-dev \ libre2-dev \ libjemalloc-dev \ + libmsgpack-dev \ unixodbc-dev \ odbcinst \ tzdata \ From d3acce8afa18f5122ad4b49c2966d7c5f4f4ad2e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 5 Apr 2020 16:07:05 +0300 Subject: [PATCH 191/752] Fix build. --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- .../Transforms/MergingSortedTransform.cpp | 331 ------------------ .../Transforms/MergingSortedTransform.h | 160 --------- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- 4 files changed, 2 insertions(+), 493 deletions(-) delete mode 100644 src/Processors/Transforms/MergingSortedTransform.cpp delete mode 100644 src/Processors/Transforms/MergingSortedTransform.h diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 514efb90a00..4b85b222949 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -72,6 +72,7 @@ #include #include +#include #include #include #include @@ -84,7 +85,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Processors/Transforms/MergingSortedTransform.cpp b/src/Processors/Transforms/MergingSortedTransform.cpp deleted file mode 100644 index b9e74277023..00000000000 --- a/src/Processors/Transforms/MergingSortedTransform.cpp +++ /dev/null @@ -1,331 +0,0 @@ -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -MergingSortedTransform::MergingSortedTransform( - const Block & header, - size_t num_inputs, - const SortDescription & description_, - size_t max_block_size_, - UInt64 limit_, - bool quiet_, - bool have_all_inputs_) - : IProcessor(InputPorts(num_inputs, header), {header}) - , description(description_), max_block_size(max_block_size_), limit(limit_), quiet(quiet_) - , have_all_inputs(have_all_inputs_) - , merged_data(header), source_chunks(num_inputs), cursors(num_inputs) -{ - auto & sample = outputs.front().getHeader(); - /// Replace column names in description to positions. - for (auto & column_description : description) - { - has_collation |= column_description.collator != nullptr; - if (!column_description.column_name.empty()) - { - column_description.column_number = sample.getPositionByName(column_description.column_name); - column_description.column_name.clear(); - } - } -} - -void MergingSortedTransform::addInput() -{ - if (have_all_inputs) - throw Exception("MergingSortedTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); - - inputs.emplace_back(outputs.front().getHeader(), this); - source_chunks.emplace_back(); - cursors.emplace_back(); -} - -void MergingSortedTransform::setHaveAllInputs() -{ - if (have_all_inputs) - throw Exception("MergingSortedTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); - - have_all_inputs = true; -} - -IProcessor::Status MergingSortedTransform::prepare() -{ - if (!have_all_inputs) - return Status::NeedData; - - auto & output = outputs.front(); - - /// Special case for no inputs. - if (inputs.empty()) - { - output.finish(); - return Status::Finished; - } - - /// Check can output. - - if (output.isFinished()) - { - for (auto & in : inputs) - in.close(); - - return Status::Finished; - } - - /// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before. - bool is_port_full = !output.canPush(); - - /// Special case for single input. - if (inputs.size() == 1) - { - auto & input = inputs.front(); - if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } - - input.setNeeded(); - - if (input.hasData()) - { - if (!is_port_full) - output.push(input.pull()); - - return Status::PortFull; - } - - return Status::NeedData; - } - - /// Push if has data. - if (merged_data.mergedRows() && !is_port_full) - output.push(merged_data.pull()); - - if (!is_initialized) - { - /// Check for inputs we need. - bool all_inputs_has_data = true; - auto it = inputs.begin(); - for (size_t i = 0; it != inputs.end(); ++i, ++it) - { - auto & input = *it; - if (input.isFinished()) - continue; - - if (!cursors[i].empty()) - { - // input.setNotNeeded(); - continue; - } - - input.setNeeded(); - - if (!input.hasData()) - { - all_inputs_has_data = false; - continue; - } - - auto chunk = input.pull(); - if (!chunk.hasRows()) - { - - if (!input.isFinished()) - all_inputs_has_data = false; - - continue; - } - - updateCursor(std::move(chunk), i); - } - - if (!all_inputs_has_data) - return Status::NeedData; - - if (has_collation) - queue_with_collation = SortingHeap(cursors); - else - queue_without_collation = SortingHeap(cursors); - - is_initialized = true; - return Status::Ready; - } - else - { - if (is_finished) - { - - if (is_port_full) - return Status::PortFull; - - for (auto & input : inputs) - input.close(); - - outputs.front().finish(); - - return Status::Finished; - } - - if (need_data) - { - auto & input = *std::next(inputs.begin(), next_input_to_read); - if (!input.isFinished()) - { - input.setNeeded(); - - if (!input.hasData()) - return Status::NeedData; - - auto chunk = input.pull(); - if (!chunk.hasRows() && !input.isFinished()) - return Status::NeedData; - - updateCursor(std::move(chunk), next_input_to_read); - - if (has_collation) - queue_with_collation.push(cursors[next_input_to_read]); - else - queue_without_collation.push(cursors[next_input_to_read]); - } - - need_data = false; - } - - if (is_port_full) - return Status::PortFull; - - return Status::Ready; - } -} - -void MergingSortedTransform::work() -{ - if (has_collation) - merge(queue_with_collation); - else - merge(queue_without_collation); -} - -template -void MergingSortedTransform::merge(TSortingHeap & queue) -{ - /// Returns MergeStatus which we should return if we are going to finish now. - auto can_read_another_row = [&, this]() - { - if (limit && merged_data.totalMergedRows() >= limit) - { - //std::cerr << "Limit reached\n"; - is_finished = true; - return false; - } - - return merged_data.mergedRows() < max_block_size; - }; - - /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` - while (queue.isValid()) - { - /// Shouldn't happen at first iteration, but check just in case. - if (!can_read_another_row()) - return; - - auto current = queue.current(); - - /** And what if the block is totally less or equal than the rest for the current cursor? - * Or is there only one data source left in the queue? Then you can take the entire block on current cursor. - */ - if (current.impl->isFirst() - && (queue.size() == 1 - || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild())))) - { - //std::cerr << "current block is totally less or equals\n"; - - /// If there are already data in the current block, we first return it. We'll get here again the next time we call the merge function. - if (merged_data.mergedRows() != 0) - { - //std::cerr << "merged rows is non-zero\n"; - return; - } - - /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl) - size_t source_num = current.impl->order; - insertFromChunk(source_num); - queue.removeTop(); - return; - } - - //std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n"; - //std::cerr << "Inserting row\n"; - merged_data.insertRow(current->all_columns, current->pos); - - if (out_row_sources_buf) - { - /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl) - RowSourcePart row_source(current.impl->order); - out_row_sources_buf->write(row_source.data); - } - - if (!current->isLast()) - { -// std::cerr << "moving to next row\n"; - queue.next(); - } - else - { - /// We will get the next block from the corresponding source, if there is one. - queue.removeTop(); - -// std::cerr << "It was last row, fetching next block\n"; - need_data = true; - next_input_to_read = current.impl->order; - - if (limit && merged_data.totalMergedRows() >= limit) - is_finished = true; - - return; - } - } - is_finished = true; -} - -void MergingSortedTransform::insertFromChunk(size_t source_num) -{ - if (source_num >= cursors.size()) - throw Exception("Logical error in MergingSortedTrandform", ErrorCodes::LOGICAL_ERROR); - - //std::cerr << "copied columns\n"; - - auto num_rows = source_chunks[source_num].getNumRows(); - - UInt64 total_merged_rows_after_insertion = merged_data.mergedRows() + num_rows; - if (limit && total_merged_rows_after_insertion > limit) - { - num_rows = total_merged_rows_after_insertion - limit; - merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); - is_finished = true; - } - else - { - merged_data.insertFromChunk(std::move(source_chunks[source_num]), 0); - need_data = true; - next_input_to_read = source_num; - } - source_chunks[source_num] = Chunk(); - - if (out_row_sources_buf) - { - RowSourcePart row_source(source_num); - for (size_t i = 0; i < num_rows; ++i) - out_row_sources_buf->write(row_source.data); - } -} - - -} diff --git a/src/Processors/Transforms/MergingSortedTransform.h b/src/Processors/Transforms/MergingSortedTransform.h deleted file mode 100644 index 914ad543c75..00000000000 --- a/src/Processors/Transforms/MergingSortedTransform.h +++ /dev/null @@ -1,160 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -class MergingSortedTransform : public IProcessor -{ -public: - MergingSortedTransform( - const Block & header, - size_t num_inputs, - const SortDescription & description_, - size_t max_block_size, - UInt64 limit = 0, - bool quiet = false, - bool have_all_inputs = true); - - String getName() const override { return "MergingSortedTransform"; } - Status prepare() override; - void work() override; - - void addInput(); - void setHaveAllInputs(); - -protected: - - class MergedData - { - public: - explicit MergedData(const Block & header) - { - columns.reserve(header.columns()); - for (const auto & column : header) - columns.emplace_back(column.type->createColumn()); - } - - void insertRow(const ColumnRawPtrs & raw_columns, size_t row) - { - size_t num_columns = raw_columns.size(); - for (size_t i = 0; i < num_columns; ++i) - columns[i]->insertFrom(*raw_columns[i], row); - - ++total_merged_rows; - ++merged_rows; - } - - void insertFromChunk(Chunk && chunk, size_t limit_rows) - { - if (merged_rows) - throw Exception("Cannot insert to MergedData from Chunk because MergedData is not empty.", - ErrorCodes::LOGICAL_ERROR); - - auto num_rows = chunk.getNumRows(); - columns = chunk.mutateColumns(); - if (limit_rows && num_rows > limit_rows) - { - num_rows = limit_rows; - for (auto & column : columns) - column = (*column->cut(0, num_rows)).mutate(); - } - - total_merged_rows += num_rows; - merged_rows = num_rows; - } - - Chunk pull() - { - MutableColumns empty_columns; - empty_columns.reserve(columns.size()); - - for (const auto & column : columns) - empty_columns.emplace_back(column->cloneEmpty()); - - empty_columns.swap(columns); - Chunk chunk(std::move(empty_columns), merged_rows); - merged_rows = 0; - - return chunk; - } - - UInt64 totalMergedRows() const { return total_merged_rows; } - UInt64 mergedRows() const { return merged_rows; } - - private: - UInt64 total_merged_rows = 0; - UInt64 merged_rows = 0; - MutableColumns columns; - }; - - /// Settings - SortDescription description; - const size_t max_block_size; - UInt64 limit; - bool has_collation = false; - bool quiet = false; - - std::atomic have_all_inputs; - - MergedData merged_data; - - /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) - /// If it is not nullptr then it should be populated during execution - WriteBuffer * out_row_sources_buf = nullptr; - - /// Chunks currently being merged. - std::vector source_chunks; - - SortCursorImpls cursors; - - SortingHeap queue_without_collation; - SortingHeap queue_with_collation; - -private: - - /// Processor state. - bool is_initialized = false; - bool is_finished = false; - bool need_data = false; - size_t next_input_to_read = 0; - - template - void merge(TSortingHeap & queue); - - void insertFromChunk(size_t source_num); - - void updateCursor(Chunk chunk, size_t source_num) - { - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); - - auto & source_chunk = source_chunks[source_num]; - - if (source_chunk.empty()) - { - source_chunk = std::move(chunk); - cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); - has_collation |= cursors[source_num].has_collation; - } - else - { - source_chunk = std::move(chunk); - cursors[source_num].reset(source_chunk.getColumns(), {}); - } - } -}; - -} diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 370286117ae..4a7a411f45a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -58,7 +58,7 @@ namespace std #include #include #include -#include +#include #include #include #include From 8726ad9e3b51b55b36bf072df93215b40a5a7e3e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 16:50:51 +0300 Subject: [PATCH 192/752] Try fix collapsing. --- src/Processors/Merges/CollapsingSortedTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/Merges/CollapsingSortedTransform.cpp b/src/Processors/Merges/CollapsingSortedTransform.cpp index 009aed0983f..82787ec5e18 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -219,6 +219,8 @@ void CollapsingSortedTransform::merge() throw Exception("Incorrect data: Sign = " + toString(sign) + " (must be 1 or -1).", ErrorCodes::INCORRECT_DATA); + ++current_pos; + if (!current->isLast()) { queue.next(); From fe442d8c9ae42caf15fe0f201b5f682823d1ecf4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 17:21:49 +0300 Subject: [PATCH 193/752] Remove special case for simple input in IMergingTransform. --- src/Processors/Merges/IMergingTransform.cpp | 29 --------------------- src/Processors/Merges/IMergingTransform.h | 1 - 2 files changed, 30 deletions(-) diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 2c5a7affd40..23985e286ff 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -60,31 +60,6 @@ void IMergingTransform::prepareOutputChunk(MergedData & merged_data) output_chunk = merged_data.pull(); } -IProcessor::Status IMergingTransform::prepareSingleInput() -{ - auto & input = inputs.front(); - auto & output = outputs.front(); - - if (input.isFinished()) - { - output.finish(); - onFinish(); - return Status::Finished; - } - - input.setNeeded(); - - if (input.hasData()) - { - if (output.canPush()) - output.push(input.pull()); - - return Status::PortFull; - } - - return Status::NeedData; -} - IProcessor::Status IMergingTransform::prepareInitializeInputs() { /// Add information about inputs. @@ -168,10 +143,6 @@ IProcessor::Status IMergingTransform::prepare() return Status::Finished; } - /// Special case for single input. - if (inputs.size() == 1) - return prepareSingleInput(); - /// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before. bool is_port_full = !output.canPush(); diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index a1046d207ad..72a9d043046 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -62,7 +62,6 @@ private: std::vector input_states; - Status prepareSingleInput(); Status prepareInitializeInputs(); }; From 71fab516f252acd72285bf040db2d2d087d57edb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 18:03:38 +0300 Subject: [PATCH 194/752] Fix AggregatingSorted for simple aggregate functions. --- .../Merges/AggregatingSortedTransform.cpp | 28 ++++++++++++++--- .../Merges/AggregatingSortedTransform.h | 4 +-- .../Merges/CollapsingSortedTransform.h | 5 +-- .../Merges/GraphiteRollupSortedTransform.h | 5 +-- .../Merges/ReplacingSortedTransform.h | 5 +-- src/Processors/Merges/RowRef.h | 31 ++++++++++++++++++- .../Merges/VersionedCollapsingTransform.h | 5 +-- 7 files changed, 67 insertions(+), 16 deletions(-) diff --git a/src/Processors/Merges/AggregatingSortedTransform.cpp b/src/Processors/Merges/AggregatingSortedTransform.cpp index 68f60aed8a2..fe86d01b390 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/src/Processors/Merges/AggregatingSortedTransform.cpp @@ -68,6 +68,24 @@ namespace return def; } + + MutableColumns getMergedColumns(const Block & header, const AggregatingSortedTransform::ColumnsDefinition & def) + { + MutableColumns columns; + columns.resize(header.columns()); + + for (auto & desc : def.columns_to_simple_aggregate) + { + auto & type = header.getByPosition(desc.column_number).type; + columns[desc.column_number] = recursiveRemoveLowCardinality(type)->createColumn(); + } + + for (size_t i = 0; i < columns.size(); ++i) + if (!columns[i]) + columns[i] = header.getByPosition(i).type->createColumn(); + + return columns; + } } AggregatingSortedTransform::AggregatingSortedTransform( @@ -75,7 +93,7 @@ AggregatingSortedTransform::AggregatingSortedTransform( SortDescription description_, size_t max_block_size) : IMergingTransform(num_inputs, header, header, true) , columns_definition(defineColumns(header, description_)) - , merged_data(header.cloneEmptyColumns(), false, max_block_size) + , merged_data(getMergedColumns(header, columns_definition), false, max_block_size) , description(std::move(description_)) , source_chunks(num_inputs) , cursors(num_inputs) @@ -106,7 +124,7 @@ void AggregatingSortedTransform::updateCursor(Chunk chunk, size_t source_num) column = column->convertToFullColumnIfConst(); for (auto & desc : columns_definition.columns_to_simple_aggregate) - if (desc.type_to_convert) + if (desc.inner_type) columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); chunk.setColumns(std::move(columns), num_rows); @@ -145,10 +163,10 @@ void AggregatingSortedTransform::work() for (auto & desc : columns_definition.columns_to_simple_aggregate) { - if (desc.type_to_convert) + if (desc.inner_type) { - auto & from_type = header.getByPosition(desc.column_number).type; - auto & to_type = desc.type_to_convert; + auto & from_type = desc.inner_type; + auto & to_type = header.getByPosition(desc.column_number).type; columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type); } } diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index bb950a68257..247d92d99e9 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -129,13 +129,13 @@ public: size_t column_number = 0; IColumn * column = nullptr; - const DataTypePtr type_to_convert; + const DataTypePtr inner_type; AlignedBuffer state; bool created = false; SimpleAggregateDescription(AggregateFunctionPtr function_, const size_t column_number_, DataTypePtr type) - : function(std::move(function_)), column_number(column_number_), type_to_convert(std::move(type)) + : function(std::move(function_)), column_number(column_number_), inner_type(std::move(type)) { add_function = function->getAddressOfAddFunction(); state.reset(function->sizeOfData(), function->alignOfData()); diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 46e3fb2e693..7e64d3253fc 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -64,14 +64,15 @@ private: SortingHeap queue; bool is_queue_initialized = false; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 4; /// first_negative, last_positive, last, current. RowRef first_negative_row; RowRef last_positive_row; RowRef last_row; - detail::SharedChunkAllocator chunk_allocator; - size_t count_positive = 0; /// The number of positive rows for the current primary key. size_t count_negative = 0; /// The number of negative rows for the current primary key. bool last_is_positive = false; /// true if the last row for the current primary key is positive. diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index 4dd394198ad..ce9fb6e019c 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -237,12 +237,13 @@ private: /// Path name of current bucket StringRef current_group_path; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + static constexpr size_t max_row_refs = 2; /// current_subgroup_newest_row, current_row. /// Last row with maximum version for current primary key (time bucket). RowRef current_subgroup_newest_row; - detail::SharedChunkAllocator chunk_allocator; - /// Time of last read row time_t current_time = 0; time_t current_time_rounded = 0; diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index ff447ee8da6..ffbfe92be96 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -50,6 +50,9 @@ private: SortingHeap queue; bool is_queue_initialized = false; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 3; /// last, current, selected. RowRef last_row; @@ -57,8 +60,6 @@ private: RowRef selected_row; /// Last row with maximum version for current primary key. size_t max_pos = 0; /// The position (into current_row_sources) of the row with the highest version. - detail::SharedChunkAllocator chunk_allocator; - /// Sources of rows with the current primary key. PODArray current_row_sources; diff --git a/src/Processors/Merges/RowRef.h b/src/Processors/Merges/RowRef.h index 48ce92dbdbd..5aeae952067 100644 --- a/src/Processors/Merges/RowRef.h +++ b/src/Processors/Merges/RowRef.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -51,6 +53,9 @@ class SharedChunkAllocator public: explicit SharedChunkAllocator(size_t max_chunks) { + if (max_chunks == 0) + max_chunks = 1; + chunks.resize(max_chunks); free_chunks.reserve(max_chunks); @@ -74,12 +79,36 @@ public: return SharedChunkPtr(&chunks[pos]); } + ~SharedChunkAllocator() + { + if (free_chunks.size() != chunks.size()) + { + LOG_ERROR(&Logger::get("SharedChunkAllocator"), + "SharedChunkAllocator was destroyed before RowRef was released. StackTrace: " + << StackTrace().toString()); + + return; + } + } + private: std::vector chunks; std::vector free_chunks; - void release(SharedChunk * ptr) + void release(SharedChunk * ptr) noexcept { + if (chunks.empty()) + { + /// This may happen if allocator was removed before chunks. + /// Log message and exit, because we don't want to throw exception in destructor. + + LOG_ERROR(&Logger::get("SharedChunkAllocator"), + "SharedChunkAllocator was destroyed before RowRef was released. StackTrace: " + << StackTrace().toString()); + + return; + } + /// Release memory. It is not obligatory. ptr->clear(); ptr->all_columns.clear(); diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index 04a18142072..214fb3b2f0d 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -53,14 +53,15 @@ private: SortingHeap queue; bool is_queue_initialized = false; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + using RowRef = detail::RowRefWithOwnedChunk; const size_t max_rows_in_queue; /// Rows with the same primary key and sign. FixedSizeDequeWithGaps current_keys; Int8 sign_in_queue = 0; - detail::SharedChunkAllocator chunk_allocator; - std::queue current_row_sources; /// Sources of rows with the current primary key void insertGap(size_t gap_size); From f96a5fe923f6eb7beb5d2e07909cb80758ccba15 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 19:57:49 +0300 Subject: [PATCH 195/752] Fix build, --- src/Processors/Merges/CollapsingSortedTransform.cpp | 2 +- src/Processors/Merges/CollapsingSortedTransform.h | 6 +++--- .../Merges/GraphiteRollupSortedTransform.cpp | 2 +- src/Processors/Merges/GraphiteRollupSortedTransform.h | 6 +++--- src/Processors/Merges/ReplacingSortedTransform.cpp | 2 +- src/Processors/Merges/ReplacingSortedTransform.h | 6 +++--- .../Merges/VersionedCollapsingTransform.cpp | 4 ++-- src/Processors/Merges/VersionedCollapsingTransform.h | 11 ++++++----- 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/Processors/Merges/CollapsingSortedTransform.cpp b/src/Processors/Merges/CollapsingSortedTransform.cpp index 82787ec5e18..d1099184952 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/src/Processors/Merges/CollapsingSortedTransform.cpp @@ -29,9 +29,9 @@ CollapsingSortedTransform::CollapsingSortedTransform( , description(std::move(description_)) , sign_column_number(header.getPositionByName(sign_column)) , out_row_sources_buf(out_row_sources_buf_) + , chunk_allocator(num_inputs + max_row_refs) , source_chunks(num_inputs) , cursors(num_inputs) - , chunk_allocator(num_inputs + max_row_refs) { } diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 7e64d3253fc..e35df8ac6d7 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -56,6 +56,9 @@ private: /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf = nullptr; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + /// Chunks currently being merged. using SourceChunks = std::vector; SourceChunks source_chunks; @@ -64,9 +67,6 @@ private: SortingHeap queue; bool is_queue_initialized = false; - /// Allocator must be destroyed after all RowRefs. - detail::SharedChunkAllocator chunk_allocator; - using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 4; /// first_negative, last_positive, last, current. RowRef first_negative_row; diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.cpp b/src/Processors/Merges/GraphiteRollupSortedTransform.cpp index 5ada52790a3..564b697139b 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.cpp +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.cpp @@ -33,10 +33,10 @@ GraphiteRollupSortedTransform::GraphiteRollupSortedTransform( : IMergingTransform(num_inputs, header, header, true) , merged_data(header.cloneEmptyColumns(), false, max_block_size) , description(std::move(description_)) + , chunk_allocator(num_inputs + max_row_refs) , source_chunks(num_inputs) , cursors(num_inputs) , params(std::move(params_)), time_of_merge(time_of_merge_) - , chunk_allocator(num_inputs + max_row_refs) { size_t max_size_of_aggregate_state = 0; size_t max_alignment_of_aggregate_state = 1; diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index ce9fb6e019c..70112dd853a 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -204,6 +204,9 @@ private: GraphiteRollupMergedData merged_data; SortDescription description; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + /// Chunks currently being merged. using SourceChunks = std::vector; SourceChunks source_chunks; @@ -237,9 +240,6 @@ private: /// Path name of current bucket StringRef current_group_path; - /// Allocator must be destroyed after all RowRefs. - detail::SharedChunkAllocator chunk_allocator; - static constexpr size_t max_row_refs = 2; /// current_subgroup_newest_row, current_row. /// Last row with maximum version for current primary key (time bucket). RowRef current_subgroup_newest_row; diff --git a/src/Processors/Merges/ReplacingSortedTransform.cpp b/src/Processors/Merges/ReplacingSortedTransform.cpp index d45e6f04aa6..1b29b4842ed 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/src/Processors/Merges/ReplacingSortedTransform.cpp @@ -19,9 +19,9 @@ ReplacingSortedTransform::ReplacingSortedTransform( , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) + , chunk_allocator(num_inputs + max_row_refs) , source_chunks(num_inputs) , cursors(num_inputs) - , chunk_allocator(num_inputs + max_row_refs) { if (!version_column.empty()) version_column_number = header.getPositionByName(version_column); diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index ffbfe92be96..0b4c2b2feac 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -42,6 +42,9 @@ private: /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf = nullptr; + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + /// Chunks currently being merged. using SourceChunks = std::vector; SourceChunks source_chunks; @@ -50,9 +53,6 @@ private: SortingHeap queue; bool is_queue_initialized = false; - /// Allocator must be destroyed after all RowRefs. - detail::SharedChunkAllocator chunk_allocator; - using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 3; /// last, current, selected. RowRef last_row; diff --git a/src/Processors/Merges/VersionedCollapsingTransform.cpp b/src/Processors/Merges/VersionedCollapsingTransform.cpp index c11cf46a6ca..cb0e085c8d1 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.cpp +++ b/src/Processors/Merges/VersionedCollapsingTransform.cpp @@ -22,11 +22,11 @@ VersionedCollapsingTransform::VersionedCollapsingTransform( , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) , description(std::move(description_)) , out_row_sources_buf(out_row_sources_buf_) + , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 1) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer + , chunk_allocator(num_inputs + max_rows_in_queue + 1) /// +1 just in case (for current_row) , source_chunks(num_inputs) , cursors(num_inputs) - , max_rows_in_queue(MAX_ROWS_IN_MULTIVERSION_QUEUE - 1) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer , current_keys(max_rows_in_queue) - , chunk_allocator(num_inputs + max_rows_in_queue + 1) /// +1 just in case (for current_row) { sign_column_number = header.getPositionByName(sign_column_); } diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index 214fb3b2f0d..85993bde134 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -45,6 +45,12 @@ private: /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf = nullptr; + using RowRef = detail::RowRefWithOwnedChunk; + const size_t max_rows_in_queue; + + /// Allocator must be destroyed after all RowRefs. + detail::SharedChunkAllocator chunk_allocator; + /// Chunks currently being merged. using SourceChunks = std::vector; SourceChunks source_chunks; @@ -53,11 +59,6 @@ private: SortingHeap queue; bool is_queue_initialized = false; - /// Allocator must be destroyed after all RowRefs. - detail::SharedChunkAllocator chunk_allocator; - - using RowRef = detail::RowRefWithOwnedChunk; - const size_t max_rows_in_queue; /// Rows with the same primary key and sign. FixedSizeDequeWithGaps current_keys; Int8 sign_in_queue = 0; From cbd1f6a524c0332f679da3551cdfce19e623a5fc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 22:27:50 +0300 Subject: [PATCH 196/752] Added comments. --- src/Processors/Merges/AggregatingSortedTransform.h | 6 ++++++ src/Processors/Merges/CollapsingSortedTransform.h | 2 +- src/Processors/Merges/ReplacingSortedTransform.h | 4 ++++ src/Processors/Merges/SummingSortedTransform.h | 5 +++++ src/Processors/Merges/VersionedCollapsingTransform.h | 5 +++++ 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index 247d92d99e9..da26527d435 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -15,6 +15,12 @@ namespace DB class ColumnAggregateFunction; +/** Merges several sorted ports to one. + * During this for each group of consecutive identical values of the primary key (the columns by which the data is sorted), + * merges them into one row. When merging, the data is pre-aggregated - merge of states of aggregate functions, + * corresponding to a one value of the primary key. For columns that are not part of the primary key and which do not have the AggregateFunction type, + * when merged, the first value is selected. + */ class AggregatingSortedTransform : public IMergingTransform { public: diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index e35df8ac6d7..3a3c1540c06 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -12,7 +12,7 @@ namespace DB { -/** Merges several sorted inputs to one. +/** Merges several sorted ports to one. * For each group of consecutive identical values of the primary key (the columns by which the data is sorted), * keeps no more than one row with the value of the column `sign_column = -1` ("negative row") * and no more than a row with the value of the column `sign_column = 1` ("positive row"). diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 0b4c2b2feac..194e81ab6db 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -13,6 +13,10 @@ namespace DB { +/** Merges several sorted ports into one. + * For each group of consecutive identical values of the primary key (the columns by which the data is sorted), + * keeps row with max `version` value. + */ class ReplacingSortedTransform final : public IMergingTransform { public: diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index 2e07ae12115..cc651197647 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -14,6 +14,11 @@ namespace DB { +/** Merges several sorted ports into one. + * For each group of consecutive identical values of the primary key (the columns by which the data is sorted), + * collapses them into one row, summing all the numeric columns except the primary key. + * If in all numeric columns, except for the primary key, the result is zero, it deletes the row. + */ class SummingSortedTransform final : public IMergingTransform { public: diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index 85993bde134..875377a544b 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -15,6 +15,11 @@ namespace DB { +/** Merges several sorted ports to one. + * For each group of consecutive identical values of the sorting key + * (the columns by which the data is sorted, including specially specified version column), + * merges any pair of consecutive rows with opposite sign. + */ class VersionedCollapsingTransform final : public IMergingTransform { public: From 2ce753a3165b1f5136bfdb5e5b517b793f59b2df Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Apr 2020 16:54:15 +0300 Subject: [PATCH 197/752] Fix tests. --- src/Processors/Merges/IMergingTransform.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 23985e286ff..2f815d69d9c 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -55,6 +55,9 @@ void IMergingTransform::requestDataForInput(size_t input_number) void IMergingTransform::prepareOutputChunk(MergedData & merged_data) { + if (need_data) + return; + has_output_chunk = (is_finished && merged_data.mergedRows()) || merged_data.hasEnoughRows(); if (has_output_chunk) output_chunk = merged_data.pull(); From fc605d411d0eb534d6d8c3883d423d33a534172f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Apr 2020 20:08:25 +0300 Subject: [PATCH 198/752] Added IMergingAlgorithm. --- src/Processors/Merges/IMergingAlgorithm.h | 33 +++ src/Processors/Merges/IMergingTransform.cpp | 198 +++++++++++++++++ src/Processors/Merges/IMergingTransform.h | 56 +++++ .../Merges/MergingSortedAlgorithm.cpp | 198 +++++++++++++++++ .../Merges/MergingSortedAlgorithm.h | 59 +++++ .../Merges/MergingSortedTransform.cpp | 201 +----------------- .../Merges/MergingSortedTransform.h | 34 +-- 7 files changed, 553 insertions(+), 226 deletions(-) create mode 100644 src/Processors/Merges/IMergingAlgorithm.h create mode 100644 src/Processors/Merges/MergingSortedAlgorithm.cpp create mode 100644 src/Processors/Merges/MergingSortedAlgorithm.h diff --git a/src/Processors/Merges/IMergingAlgorithm.h b/src/Processors/Merges/IMergingAlgorithm.h new file mode 100644 index 00000000000..1dbe88e1370 --- /dev/null +++ b/src/Processors/Merges/IMergingAlgorithm.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class IMergingAlgorithm +{ +public: + struct Status + { + Chunk chunk; + bool is_finished = false; + ssize_t required_source = -1; + + explicit Status(Chunk chunk_) : chunk(std::move(chunk_)) {} + explicit Status(Chunk chunk_, bool is_finished_) : chunk(std::move(chunk_)), is_finished(is_finished_) {} + explicit Status(size_t source) : required_source(source) {} + }; + + virtual void initialize(Chunks chunks) = 0; + virtual void consume(Chunk chunk, size_t source_num) = 0; + virtual Status merge() = 0; + + virtual ~IMergingAlgorithm() = default; +}; + +template +concept MergingAlgorithm = std::is_base_of::value && std::is_move_constructible::value; + +} diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 2f815d69d9c..295d7361d19 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -200,4 +200,202 @@ IProcessor::Status IMergingTransform::prepare() return Status::Ready; } + +template +IMergingTransform2::IMergingTransform2( + Algorithm algorithm, + size_t num_inputs, + const Block & input_header, + const Block & output_header, + bool have_all_inputs_) + : IProcessor(InputPorts(num_inputs, input_header), {output_header}) + , algorithm(std::move(algorithm)) + , have_all_inputs(have_all_inputs_) +{ +} + +template +void IMergingTransform2::onNewInput() +{ + throw Exception("onNewInput is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); +} + +template +void IMergingTransform2::addInput() +{ + if (have_all_inputs) + throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + + inputs.emplace_back(outputs.front().getHeader(), this); + onNewInput(); +} + +template +void IMergingTransform2::setHaveAllInputs() +{ + if (have_all_inputs) + throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + + have_all_inputs = true; +} + +template +IProcessor::Status IMergingTransform2::prepareInitializeInputs() +{ + /// Add information about inputs. + if (input_states.empty()) + { + input_states.reserve(inputs.size()); + for (auto & input : inputs) + input_states.emplace_back(input); + + init_chunks.resize(inputs.size()); + } + + /// Check for inputs we need. + bool all_inputs_has_data = true; + auto it = inputs.begin(); + for (size_t i = 0; it != inputs.end(); ++i, ++it) + { + auto & input = *it; + if (input.isFinished()) + continue; + + if (input_states[i].is_initialized) + { + // input.setNotNeeded(); + continue; + } + + input.setNeeded(); + + if (!input.hasData()) + { + all_inputs_has_data = false; + continue; + } + + auto chunk = input.pull(); + if (!chunk.hasRows()) + { + + if (!input.isFinished()) + all_inputs_has_data = false; + + continue; + } + + init_chunks[i] = std::move(chunk); + input_states[i].is_initialized = true; + } + + if (!all_inputs_has_data) + return Status::NeedData; + + is_initialized = true; + return Status::Ready; +} + +template +IProcessor::Status IMergingTransform2::prepare() +{ + if (!have_all_inputs) + return Status::NeedData; + + auto & output = outputs.front(); + + /// Special case for no inputs. + if (inputs.empty()) + { + output.finish(); + onFinish(); + return Status::Finished; + } + + /// Check can output. + + if (output.isFinished()) + { + for (auto & in : inputs) + in.close(); + + onFinish(); + return Status::Finished; + } + + /// Do not disable inputs, so it will work in the same way as with AsynchronousBlockInputStream, like before. + bool is_port_full = !output.canPush(); + + /// Push if has data. + if (has_output_chunk && !is_port_full) + { + output.push(std::move(output_chunk)); + has_output_chunk = false; + } + + if (!is_initialized) + return prepareInitializeInputs(); + + if (is_finished) + { + + if (is_port_full) + return Status::PortFull; + + for (auto & input : inputs) + input.close(); + + outputs.front().finish(); + + onFinish(); + return Status::Finished; + } + + if (need_data) + { + auto & input = input_states[next_input_to_read].port; + if (!input.isFinished()) + { + input.setNeeded(); + + if (!input.hasData()) + return Status::NeedData; + + auto chunk = input.pull(); + if (!chunk.hasRows() && !input.isFinished()) + return Status::NeedData; + + algorithm.consume(std::move(chunk), next_input_to_read); + } + + need_data = false; + } + + if (is_port_full) + return Status::PortFull; + + return Status::Ready; +} + +template +void IMergingTransform2::work() +{ + if (!init_chunks.empty()) + algorithm.initialize(std::move(init_chunks)); + + IMergingAlgorithm::Status status = algorithm.merge(); + + if (status.chunk && status.chunk.hasRows()) + { + has_output_chunk = true; + output_chunk = std::move(status.chunk); + } + + if (status.required_source >= 0) + next_input_to_read = status.required_source; + + if (status.is_finished) + is_finished = true; +} + } diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 72a9d043046..9122e28a913 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -65,4 +66,59 @@ private: Status prepareInitializeInputs(); }; +/// Base class for merging transforms. +template +class IMergingTransform2 : public IProcessor +{ +public: + IMergingTransform2( + Algorithm algorithm, + size_t num_inputs, + const Block & input_header, + const Block & output_header, + //size_t max_block_size, + //bool use_average_block_size, /// For adaptive granularity. Return chunks with the same avg size as inputs. + bool have_all_inputs_); + + /// Methods to add additional input port. It is possible to do only before the first call of `prepare`. + void addInput(); + /// Need to be called after all inputs are added. (only if have_all_inputs was not specified). + void setHaveAllInputs(); + + Status prepare() override; + void work() override; + +protected: + virtual void onNewInput(); /// Is called when new input is added. Only if have_all_inputs = false. + virtual void onFinish() {} /// Is called when all data is processed. + + /// Profile info. + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + Algorithm algorithm; + +private: + /// Processor state. + Chunk output_chunk; + bool has_output_chunk = false; + bool is_finished = false; + bool is_initialized = false; + bool need_data = false; + size_t next_input_to_read = 0; + + std::atomic have_all_inputs; + + struct InputState + { + explicit InputState(InputPort & port_) : port(port_) {} + + InputPort & port; + bool is_initialized = false; + }; + + std::vector input_states; + Chunks init_chunks; + + Status prepareInitializeInputs(); +}; + } diff --git a/src/Processors/Merges/MergingSortedAlgorithm.cpp b/src/Processors/Merges/MergingSortedAlgorithm.cpp new file mode 100644 index 00000000000..556bde169d3 --- /dev/null +++ b/src/Processors/Merges/MergingSortedAlgorithm.cpp @@ -0,0 +1,198 @@ +#include +#include +#include + +namespace DB +{ + +MergingSortedAlgorithm::MergingSortedAlgorithm( + const Block & header, + size_t num_inputs, + SortDescription description_, + size_t max_block_size, + UInt64 limit_, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes) + : merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) + , description(std::move(description_)) + , limit(limit_) + , out_row_sources_buf(out_row_sources_buf_) + , source_chunks(num_inputs) + , cursors(num_inputs) +{ + /// Replace column names in description to positions. + for (auto & column_description : description) + { + has_collation |= column_description.collator != nullptr; + if (!column_description.column_name.empty()) + { + column_description.column_number = header.getPositionByName(column_description.column_name); + column_description.column_name.clear(); + } + } +} + +void MergingSortedAlgorithm::addInput() +{ + source_chunks.emplace_back(); + cursors.emplace_back(); +} + +static void prepareChunk(Chunk & chunk) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); +} + +void MergingSortedAlgorithm::updateCursor(size_t source_num) +{ + auto & source_chunk = source_chunks[source_num]; + cursors[source_num].reset(source_chunk.getColumns(), {}); +} + +void MergingSortedAlgorithm::initialize(Chunks chunks) +{ + source_chunks = std::move(chunks); + + for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) + { + auto & chunk = source_chunks[source_num]; + + if (!chunk) + continue; + + prepareChunk(chunk); + cursors[source_num] = SortCursorImpl(chunk.getColumns(), description, source_num); + } + + if (has_collation) + queue_with_collation = SortingHeap(cursors); + else + queue_without_collation = SortingHeap(cursors); +} + +void MergingSortedAlgorithm::consume(Chunk chunk, size_t source_num) +{ + prepareChunk(chunk); + source_chunks[source_num] = std::move(chunk); + cursors[source_num].reset(source_chunks[source_num].getColumns(), {}); +} + +IMergingAlgorithm::Status MergingSortedAlgorithm::merge() +{ + if (has_collation) + return mergeImpl(queue_with_collation); + else + return mergeImpl(queue_without_collation); +} + +template +IMergingAlgorithm::Status MergingSortedAlgorithm::mergeImpl(TSortingHeap & queue) +{ + /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` + while (queue.isValid()) + { + if (merged_data.hasEnoughRows()) + return Status(merged_data.pull()); + + auto current = queue.current(); + + /** And what if the block is totally less or equal than the rest for the current cursor? + * Or is there only one data source left in the queue? Then you can take the entire block on current cursor. + */ + if (current.impl->isFirst() + && (queue.size() == 1 + || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild())))) + { + //std::cerr << "current block is totally less or equals\n"; + + /// If there are already data in the current block, we first return it. + /// We'll get here again the next time we call the merge function. + if (merged_data.mergedRows() != 0) + { + //std::cerr << "merged rows is non-zero\n"; + // merged_data.flush(); + return Status(merged_data.pull()); + } + + /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl) + size_t source_num = current.impl->order; + queue.removeTop(); + return insertFromChunk(source_num); + } + + //std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n"; + //std::cerr << "Inserting row\n"; + merged_data.insertRow(current->all_columns, current->pos, current->rows); + + if (out_row_sources_buf) + { + /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl) + RowSourcePart row_source(current.impl->order); + out_row_sources_buf->write(row_source.data); + } + + if (limit && merged_data.totalMergedRows() >= limit) + return Status(merged_data.pull(), true); + + if (!current->isLast()) + { + //std::cerr << "moving to next row\n"; + queue.next(); + } + else + { + /// We will get the next block from the corresponding source, if there is one. + queue.removeTop(); + //std::cerr << "It was last row, fetching next block\n"; + return Status(current.impl->order); + } + } + + return Status(merged_data.pull(), true); +} + +IMergingAlgorithm::Status MergingSortedAlgorithm::insertFromChunk(size_t source_num) +{ + if (source_num >= cursors.size()) + throw Exception("Logical error in MergingSortedTransform", ErrorCodes::LOGICAL_ERROR); + + //std::cerr << "copied columns\n"; + + auto num_rows = source_chunks[source_num].getNumRows(); + + UInt64 total_merged_rows_after_insertion = merged_data.mergedRows() + num_rows; + bool is_finished = limit && total_merged_rows_after_insertion > limit; + + if (is_finished) + { + num_rows = total_merged_rows_after_insertion - limit; + merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); + } + else + merged_data.insertFromChunk(std::move(source_chunks[source_num]), 0); + + source_chunks[source_num] = Chunk(); + + /// Write order of rows for other columns + /// this data will be used in gather stream + if (out_row_sources_buf) + { + RowSourcePart row_source(source_num); + for (size_t i = 0; i < num_rows; ++i) + out_row_sources_buf->write(row_source.data); + } + + auto status = Status(merged_data.pull(), is_finished); + + if (!is_finished) + status.required_source = source_num; + + return status; +} + +} diff --git a/src/Processors/Merges/MergingSortedAlgorithm.h b/src/Processors/Merges/MergingSortedAlgorithm.h new file mode 100644 index 00000000000..5c4087ed0bd --- /dev/null +++ b/src/Processors/Merges/MergingSortedAlgorithm.h @@ -0,0 +1,59 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +class MergingSortedAlgorithm final : public IMergingAlgorithm +{ +public: + MergingSortedAlgorithm( + const Block & header, + size_t num_inputs, + SortDescription description_, + size_t max_block_size, + UInt64 limit_, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes); + + MergingSortedAlgorithm(MergingSortedAlgorithm && other) = default; + + void addInput(); + + void initialize(Chunks chunks) override; + void consume(Chunk chunk, size_t source_num) override; + Status merge() override; + + const MergedData & getMergedData() const { return merged_data; } + +private: + MergedData merged_data; + + /// Settings + SortDescription description; + UInt64 limit; + bool has_collation = false; + + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) + /// If it is not nullptr then it should be populated during execution + WriteBuffer * out_row_sources_buf = nullptr; + + /// Chunks currently being merged. + std::vector source_chunks; + + SortCursorImpls cursors; + + SortingHeap queue_without_collation; + SortingHeap queue_with_collation; + + void updateCursor(size_t source_num); + Status insertFromChunk(size_t source_num); + + template + Status mergeImpl(TSortingHeap & queue); +}; + +} diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 7b7e4fcf62c..2657077c143 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -23,206 +23,17 @@ MergingSortedTransform::MergingSortedTransform( bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) - : IMergingTransform(num_inputs, header, header, have_all_inputs_) - , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) - , description(std::move(description_)) - , limit(limit_) + : IMergingTransform2( + MergingSortedAlgorithm(header, num_inputs, std::move(description_), max_block_size, + limit_, out_row_sources_buf_, use_average_block_sizes), + num_inputs, header, header, have_all_inputs_) , quiet(quiet_) - , out_row_sources_buf(out_row_sources_buf_) - , source_chunks(num_inputs) - , cursors(num_inputs) { - auto & sample = outputs.front().getHeader(); - /// Replace column names in description to positions. - for (auto & column_description : description) - { - has_collation |= column_description.collator != nullptr; - if (!column_description.column_name.empty()) - { - column_description.column_number = sample.getPositionByName(column_description.column_name); - column_description.column_name.clear(); - } - } } void MergingSortedTransform::onNewInput() { - source_chunks.emplace_back(); - cursors.emplace_back(); -} - -void MergingSortedTransform::initializeInputs() -{ - if (has_collation) - queue_with_collation = SortingHeap(cursors); - else - queue_without_collation = SortingHeap(cursors); - - is_queue_initialized = true; -} - -void MergingSortedTransform::consume(Chunk chunk, size_t input_number) -{ - updateCursor(std::move(chunk), input_number); - - if (is_queue_initialized) - { - if (has_collation) - queue_with_collation.push(cursors[input_number]); - else - queue_without_collation.push(cursors[input_number]); - } -} - -void MergingSortedTransform::updateCursor(Chunk chunk, size_t source_num) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); - - auto & source_chunk = source_chunks[source_num]; - - if (source_chunk.empty()) - { - source_chunk = std::move(chunk); - cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); - has_collation |= cursors[source_num].has_collation; - } - else - { - source_chunk = std::move(chunk); - cursors[source_num].reset(source_chunk.getColumns(), {}); - } -} - -void MergingSortedTransform::work() -{ - if (has_collation) - merge(queue_with_collation); - else - merge(queue_without_collation); - - prepareOutputChunk(merged_data); -} - -template -void MergingSortedTransform::merge(TSortingHeap & queue) -{ - /// Returns MergeStatus which we should return if we are going to finish now. - auto can_read_another_row = [&, this]() - { - if (limit && merged_data.totalMergedRows() >= limit) - { - //std::cerr << "Limit reached\n"; - is_finished = true; - return false; - } - - return !merged_data.hasEnoughRows(); - }; - - /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` - while (queue.isValid()) - { - if (!can_read_another_row()) - return; - - auto current = queue.current(); - - /** And what if the block is totally less or equal than the rest for the current cursor? - * Or is there only one data source left in the queue? Then you can take the entire block on current cursor. - */ - if (current.impl->isFirst() - && (queue.size() == 1 - || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild())))) - { - //std::cerr << "current block is totally less or equals\n"; - - /// If there are already data in the current block, we first return it. - /// We'll get here again the next time we call the merge function. - if (merged_data.mergedRows() != 0) - { - //std::cerr << "merged rows is non-zero\n"; - merged_data.flush(); - return; - } - - /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl) - size_t source_num = current.impl->order; - insertFromChunk(source_num); - queue.removeTop(); - return; - } - - //std::cerr << "total_merged_rows: " << total_merged_rows << ", merged_rows: " << merged_rows << "\n"; - //std::cerr << "Inserting row\n"; - merged_data.insertRow(current->all_columns, current->pos, current->rows); - - if (out_row_sources_buf) - { - /// Actually, current.impl->order stores source number (i.e. cursors[current.impl->order] == current.impl) - RowSourcePart row_source(current.impl->order); - out_row_sources_buf->write(row_source.data); - } - - if (!current->isLast()) - { - //std::cerr << "moving to next row\n"; - queue.next(); - } - else - { - /// We will get the next block from the corresponding source, if there is one. - queue.removeTop(); - - //std::cerr << "It was last row, fetching next block\n"; - requestDataForInput(current.impl->order); - - if (limit && merged_data.totalMergedRows() >= limit) - is_finished = true; - - return; - } - } - - is_finished = true; -} - -void MergingSortedTransform::insertFromChunk(size_t source_num) -{ - if (source_num >= cursors.size()) - throw Exception("Logical error in MergingSortedTransform", ErrorCodes::LOGICAL_ERROR); - - //std::cerr << "copied columns\n"; - - auto num_rows = source_chunks[source_num].getNumRows(); - - UInt64 total_merged_rows_after_insertion = merged_data.mergedRows() + num_rows; - if (limit && total_merged_rows_after_insertion > limit) - { - num_rows = total_merged_rows_after_insertion - limit; - merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); - is_finished = true; - } - else - { - merged_data.insertFromChunk(std::move(source_chunks[source_num]), 0); - requestDataForInput(source_num); - } - - source_chunks[source_num] = Chunk(); - - /// Write order of rows for other columns - /// this data will be used in gather stream - if (out_row_sources_buf) - { - RowSourcePart row_source(source_num); - for (size_t i = 0; i < num_rows; ++i) - out_row_sources_buf->write(row_source.data); - } + algorithm.addInput(); } void MergingSortedTransform::onFinish() @@ -230,6 +41,8 @@ void MergingSortedTransform::onFinish() if (quiet) return; + auto & merged_data = algorithm.getMergedData(); + auto * log = &Logger::get("MergingSortedTransform"); double seconds = total_stopwatch.elapsedSeconds(); diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h index 49bfe228106..8763fbe1aa2 100644 --- a/src/Processors/Merges/MergingSortedTransform.h +++ b/src/Processors/Merges/MergingSortedTransform.h @@ -1,16 +1,14 @@ #pragma once #include -#include -#include -#include +#include namespace DB { /// Merges several sorted inputs into one sorted output. -class MergingSortedTransform final : public IMergingTransform +class MergingSortedTransform final : public IMergingTransform2 { public: MergingSortedTransform( @@ -25,41 +23,13 @@ public: bool have_all_inputs_ = true); String getName() const override { return "MergingSortedTransform"; } - void work() override; protected: void onNewInput() override; - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; void onFinish() override; private: - MergedData merged_data; - - /// Settings - SortDescription description; - UInt64 limit; - bool has_collation = false; bool quiet = false; - - /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) - /// If it is not nullptr then it should be populated during execution - WriteBuffer * out_row_sources_buf = nullptr; - - /// Chunks currently being merged. - std::vector source_chunks; - - SortCursorImpls cursors; - - SortingHeap queue_without_collation; - SortingHeap queue_with_collation; - bool is_queue_initialized = false; - - template - void merge(TSortingHeap & queue); - - void insertFromChunk(size_t source_num); - void updateCursor(Chunk chunk, size_t source_num); }; } From f7fdfe4ed2b32d975a72d2948bc8d9024816bf9b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Apr 2020 20:25:44 +0300 Subject: [PATCH 199/752] Fix build. --- src/Processors/Merges/IMergingAlgorithm.h | 6 +- src/Processors/Merges/IMergingTransform.cpp | 61 +++---------- src/Processors/Merges/IMergingTransform.h | 86 ++++++++++++++----- .../Merges/MergingSortedAlgorithm.cpp | 9 +- 4 files changed, 90 insertions(+), 72 deletions(-) diff --git a/src/Processors/Merges/IMergingAlgorithm.h b/src/Processors/Merges/IMergingAlgorithm.h index 1dbe88e1370..51ee58cedd2 100644 --- a/src/Processors/Merges/IMergingAlgorithm.h +++ b/src/Processors/Merges/IMergingAlgorithm.h @@ -24,10 +24,12 @@ public: virtual void consume(Chunk chunk, size_t source_num) = 0; virtual Status merge() = 0; + IMergingAlgorithm(IMergingAlgorithm &&) = default; virtual ~IMergingAlgorithm() = default; }; -template -concept MergingAlgorithm = std::is_base_of::value && std::is_move_constructible::value; +// TODO: use when compile with clang which could support it +// template +// concept MergingAlgorithm = std::is_base_of::value && std::is_move_constructible::value; } diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 295d7361d19..11ab063dce7 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -200,28 +200,22 @@ IProcessor::Status IMergingTransform::prepare() return Status::Ready; } - -template -IMergingTransform2::IMergingTransform2( - Algorithm algorithm, - size_t num_inputs, - const Block & input_header, - const Block & output_header, - bool have_all_inputs_) - : IProcessor(InputPorts(num_inputs, input_header), {output_header}) - , algorithm(std::move(algorithm)) - , have_all_inputs(have_all_inputs_) +IMergingTransformBase::IMergingTransformBase( + size_t num_inputs, + const Block & input_header, + const Block & output_header, + bool have_all_inputs_) + : IProcessor(InputPorts(num_inputs, input_header), {output_header}) + , have_all_inputs(have_all_inputs_) { } -template -void IMergingTransform2::onNewInput() +void IMergingTransformBase::onNewInput() { throw Exception("onNewInput is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } -template -void IMergingTransform2::addInput() +void IMergingTransformBase::addInput() { if (have_all_inputs) throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); @@ -230,8 +224,7 @@ void IMergingTransform2::addInput() onNewInput(); } -template -void IMergingTransform2::setHaveAllInputs() +void IMergingTransformBase::setHaveAllInputs() { if (have_all_inputs) throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); @@ -239,8 +232,7 @@ void IMergingTransform2::setHaveAllInputs() have_all_inputs = true; } -template -IProcessor::Status IMergingTransform2::prepareInitializeInputs() +IProcessor::Status IMergingTransformBase::prepareInitializeInputs() { /// Add information about inputs. if (input_states.empty()) @@ -296,8 +288,7 @@ IProcessor::Status IMergingTransform2::prepareInitializeInputs() return Status::Ready; } -template -IProcessor::Status IMergingTransform2::prepare() +IProcessor::Status IMergingTransformBase::prepare() { if (!have_all_inputs) return Status::NeedData; @@ -327,11 +318,8 @@ IProcessor::Status IMergingTransform2::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if (has_output_chunk && !is_port_full) - { + if (output_chunk && !is_port_full) output.push(std::move(output_chunk)); - has_output_chunk = false; - } if (!is_initialized) return prepareInitializeInputs(); @@ -365,7 +353,7 @@ IProcessor::Status IMergingTransform2::prepare() if (!chunk.hasRows() && !input.isFinished()) return Status::NeedData; - algorithm.consume(std::move(chunk), next_input_to_read); + input_chunk = std::move(chunk); } need_data = false; @@ -377,25 +365,4 @@ IProcessor::Status IMergingTransform2::prepare() return Status::Ready; } -template -void IMergingTransform2::work() -{ - if (!init_chunks.empty()) - algorithm.initialize(std::move(init_chunks)); - - IMergingAlgorithm::Status status = algorithm.merge(); - - if (status.chunk && status.chunk.hasRows()) - { - has_output_chunk = true; - output_chunk = std::move(status.chunk); - } - - if (status.required_source >= 0) - next_input_to_read = status.required_source; - - if (status.is_finished) - is_finished = true; -} - } diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 9122e28a913..3493ea46af1 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -67,18 +67,14 @@ private: }; /// Base class for merging transforms. -template -class IMergingTransform2 : public IProcessor +class IMergingTransformBase : public IProcessor { public: - IMergingTransform2( - Algorithm algorithm, - size_t num_inputs, - const Block & input_header, - const Block & output_header, - //size_t max_block_size, - //bool use_average_block_size, /// For adaptive granularity. Return chunks with the same avg size as inputs. - bool have_all_inputs_); + IMergingTransformBase( + size_t num_inputs, + const Block & input_header, + const Block & output_header, + bool have_all_inputs_); /// Methods to add additional input port. It is possible to do only before the first call of `prepare`. void addInput(); @@ -86,27 +82,21 @@ public: void setHaveAllInputs(); Status prepare() override; - void work() override; protected: virtual void onNewInput(); /// Is called when new input is added. Only if have_all_inputs = false. virtual void onFinish() {} /// Is called when all data is processed. - /// Profile info. - Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; - Algorithm algorithm; - -private: /// Processor state. Chunk output_chunk; - bool has_output_chunk = false; + Chunk input_chunk; bool is_finished = false; - bool is_initialized = false; bool need_data = false; size_t next_input_to_read = 0; - std::atomic have_all_inputs; + Chunks init_chunks; +private: struct InputState { explicit InputState(InputPort & port_) : port(port_) {} @@ -116,9 +106,63 @@ private: }; std::vector input_states; - Chunks init_chunks; + std::atomic have_all_inputs; + bool is_initialized = false; - Status prepareInitializeInputs(); + IProcessor::Status prepareInitializeInputs(); +}; + +template +class IMergingTransform2 : public IMergingTransformBase +{ +public: + IMergingTransform2( + Algorithm algorithm_, + size_t num_inputs, + const Block & input_header, + const Block & output_header, + bool have_all_inputs_) + : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_) + , algorithm(std::move(algorithm_)) + { + } + + void work() override + { + if (!init_chunks.empty()) + algorithm.initialize(std::move(init_chunks)); + + if (input_chunk) + algorithm.consume(std::move(input_chunk), next_input_to_read); + + IMergingAlgorithm::Status status = algorithm.merge(); + + if (status.chunk && status.chunk.hasRows()) + output_chunk = std::move(status.chunk); + + if (status.required_source >= 0) + { + next_input_to_read = status.required_source; + need_data = true; + } + + if (status.is_finished) + is_finished = true; + } + +protected: + Algorithm algorithm; + + /// Profile info. + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + +private: + using IMergingTransformBase::output_chunk; + using IMergingTransformBase::input_chunk; + using IMergingTransformBase::is_finished; + using IMergingTransformBase::need_data; + using IMergingTransformBase::next_input_to_read; + using IMergingTransformBase::init_chunks; }; } diff --git a/src/Processors/Merges/MergingSortedAlgorithm.cpp b/src/Processors/Merges/MergingSortedAlgorithm.cpp index 556bde169d3..3be9f3f64ef 100644 --- a/src/Processors/Merges/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/MergingSortedAlgorithm.cpp @@ -80,6 +80,11 @@ void MergingSortedAlgorithm::consume(Chunk chunk, size_t source_num) prepareChunk(chunk); source_chunks[source_num] = std::move(chunk); cursors[source_num].reset(source_chunks[source_num].getColumns(), {}); + + if (has_collation) + queue_with_collation.push(cursors[source_num]); + else + queue_without_collation.push(cursors[source_num]); } IMergingAlgorithm::Status MergingSortedAlgorithm::merge() @@ -166,9 +171,9 @@ IMergingAlgorithm::Status MergingSortedAlgorithm::insertFromChunk(size_t source_ auto num_rows = source_chunks[source_num].getNumRows(); UInt64 total_merged_rows_after_insertion = merged_data.mergedRows() + num_rows; - bool is_finished = limit && total_merged_rows_after_insertion > limit; + bool is_finished = limit && total_merged_rows_after_insertion >= limit; - if (is_finished) + if (limit && total_merged_rows_after_insertion > limit) { num_rows = total_merged_rows_after_insertion - limit; merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); From 868f7e9aea34b01f5d7590aa8f9f313afcf92448 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:16:23 +0300 Subject: [PATCH 200/752] Move Graphite params to separate file. --- src/Processors/Merges/Graphite.h | 126 ++++++++++++++++++ .../Merges/GraphiteRollupSortedTransform.h | 122 +---------------- src/Storages/MergeTree/MergeTreeData.h | 2 +- 3 files changed, 128 insertions(+), 122 deletions(-) create mode 100644 src/Processors/Merges/Graphite.h diff --git a/src/Processors/Merges/Graphite.h b/src/Processors/Merges/Graphite.h new file mode 100644 index 00000000000..03269de35d0 --- /dev/null +++ b/src/Processors/Merges/Graphite.h @@ -0,0 +1,126 @@ +#pragma once + +namespace DB +{ +/** Intended for implementation of "rollup" - aggregation (rounding) of older data + * for a table with Graphite data (Graphite is the system for time series monitoring). + * + * Table with graphite data has at least the following columns (accurate to the name): + * Path, Time, Value, Version + * + * Path - name of metric (sensor); + * Time - time of measurement; + * Value - value of measurement; + * Version - a number, that for equal pairs of Path and Time, need to leave only record with maximum version. + * + * Each row in a table correspond to one value of one sensor. + * + * Pattern should contain function, retention scheme, or both of them. The order of patterns does mean as well: + * * Aggregation OR retention patterns should be first + * * Then aggregation AND retention full patterns have to be placed + * * default pattern without regexp must be the last + * + * Rollup rules are specified in the following way: + * + * pattern + * regexp + * function + * pattern + * regexp + * age -> precision + * age -> precision + * ... + * pattern + * regexp + * function + * age -> precision + * age -> precision + * ... + * pattern + * ... + * default + * function + * age -> precision + * ... + * + * regexp - pattern for sensor name + * default - if no pattern has matched + * + * age - minimal data age (in seconds), to start rounding with specified precision. + * precision - rounding precision (in seconds) + * + * function - name of aggregate function to be applied for values, that time was rounded to same. + * + * Example: + * + * + * + * \.max$ + * max + * + * + * click_cost + * any + * + * 0 + * 5 + * + * + * 86400 + * 60 + * + * + * + * max + * + * 0 + * 60 + * + * + * 3600 + * 300 + * + * + * 86400 + * 3600 + * + * + * + */ +namespace Graphite +{ +struct Retention +{ + UInt32 age; + UInt32 precision; +}; + +using Retentions = std::vector; + +struct Pattern +{ + std::shared_ptr regexp; + std::string regexp_str; + AggregateFunctionPtr function; + Retentions retentions; /// Must be ordered by 'age' descending. + enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically +}; + +using Patterns = std::vector; +using RetentionPattern = Pattern; +using AggregationPattern = Pattern; + +struct Params +{ + String config_name; + String path_column_name; + String time_column_name; + String value_column_name; + String version_column_name; + Graphite::Patterns patterns; +}; + +using RollupRule = std::pair; +} + +} diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index 70112dd853a..6b1cda1011f 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -15,127 +16,6 @@ namespace DB { -/** Intended for implementation of "rollup" - aggregation (rounding) of older data - * for a table with Graphite data (Graphite is the system for time series monitoring). - * - * Table with graphite data has at least the following columns (accurate to the name): - * Path, Time, Value, Version - * - * Path - name of metric (sensor); - * Time - time of measurement; - * Value - value of measurement; - * Version - a number, that for equal pairs of Path and Time, need to leave only record with maximum version. - * - * Each row in a table correspond to one value of one sensor. - * - * Pattern should contain function, retention scheme, or both of them. The order of patterns does mean as well: - * * Aggregation OR retention patterns should be first - * * Then aggregation AND retention full patterns have to be placed - * * default pattern without regexp must be the last - * - * Rollup rules are specified in the following way: - * - * pattern - * regexp - * function - * pattern - * regexp - * age -> precision - * age -> precision - * ... - * pattern - * regexp - * function - * age -> precision - * age -> precision - * ... - * pattern - * ... - * default - * function - * age -> precision - * ... - * - * regexp - pattern for sensor name - * default - if no pattern has matched - * - * age - minimal data age (in seconds), to start rounding with specified precision. - * precision - rounding precision (in seconds) - * - * function - name of aggregate function to be applied for values, that time was rounded to same. - * - * Example: - * - * - * - * \.max$ - * max - * - * - * click_cost - * any - * - * 0 - * 5 - * - * - * 86400 - * 60 - * - * - * - * max - * - * 0 - * 60 - * - * - * 3600 - * 300 - * - * - * 86400 - * 3600 - * - * - * - */ -namespace Graphite -{ - struct Retention - { - UInt32 age; - UInt32 precision; - }; - - using Retentions = std::vector; - - struct Pattern - { - std::shared_ptr regexp; - std::string regexp_str; - AggregateFunctionPtr function; - Retentions retentions; /// Must be ordered by 'age' descending. - enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically - }; - - using Patterns = std::vector; - using RetentionPattern = Pattern; - using AggregationPattern = Pattern; - - struct Params - { - String config_name; - String path_column_name; - String time_column_name; - String value_column_name; - String version_column_name; - Graphite::Patterns patterns; - }; - - using RollupRule = std::pair; -} - /** Merges several sorted ports into one. * * For each group of consecutive identical values of the `path` column, diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8f4ac7cc79a..30bfde17df3 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include From 27f3d2711ae25b0ade62f33fc8176e1786540ff9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:20:43 +0300 Subject: [PATCH 201/752] Move Graphite params to separate file. --- src/Processors/Merges/Graphite.h | 11 +++++++++-- src/Processors/Merges/GraphiteRollupSortedTransform.h | 1 - 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Processors/Merges/Graphite.h b/src/Processors/Merges/Graphite.h index 03269de35d0..3ff297987de 100644 --- a/src/Processors/Merges/Graphite.h +++ b/src/Processors/Merges/Graphite.h @@ -1,7 +1,14 @@ #pragma once +#include namespace DB { + +class IAggregateFunction; +using AggregateFunctionPtr = std::shared_ptr; + +} + /** Intended for implementation of "rollup" - aggregation (rounding) of older data * for a table with Graphite data (Graphite is the system for time series monitoring). * @@ -87,8 +94,9 @@ namespace DB * * */ -namespace Graphite +namespace DB::Graphite { + struct Retention { UInt32 age; @@ -121,6 +129,5 @@ struct Params }; using RollupRule = std::pair; -} } diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index 6b1cda1011f..ee6610935f3 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -7,7 +7,6 @@ #include #include -#include #include #include #include From 16137068b20e90378cae085d08bec9eb5d51f31d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:22:51 +0300 Subject: [PATCH 202/752] Move Graphite params to separate file. --- src/DataStreams/TTLBlockInputStream.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index 5279ebca948..28e650dd969 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { From 71c72a75d73fe0a7fc1dc199646549f48884f5e5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:24:16 +0300 Subject: [PATCH 203/752] Move Graphite params to separate file. --- src/Storages/StorageDistributed.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 2790d6e9bce..45e1aeff6f4 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -14,6 +14,8 @@ #include #include +#include + #include #include #include From 978576d0c51885ed956db410b5254f7096706d08 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:25:52 +0300 Subject: [PATCH 204/752] Move Graphite params to separate file. --- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c560583259c..9fca45725d9 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include From 97158f765663bc7bcc3d4564bd37e66ca1a3ef60 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:30:53 +0300 Subject: [PATCH 205/752] Fix build. --- src/Processors/Merges/IMergingAlgorithm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Merges/IMergingAlgorithm.h b/src/Processors/Merges/IMergingAlgorithm.h index 51ee58cedd2..c01152eb39f 100644 --- a/src/Processors/Merges/IMergingAlgorithm.h +++ b/src/Processors/Merges/IMergingAlgorithm.h @@ -24,6 +24,7 @@ public: virtual void consume(Chunk chunk, size_t source_num) = 0; virtual Status merge() = 0; + IMergingAlgorithm(const IMergingAlgorithm &) = default; IMergingAlgorithm(IMergingAlgorithm &&) = default; virtual ~IMergingAlgorithm() = default; }; From 2aba662d2cae063a0db8c6d58ec960918e040e64 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 12:32:34 +0300 Subject: [PATCH 206/752] Fix build. --- src/Processors/Merges/IMergingAlgorithm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Merges/IMergingAlgorithm.h b/src/Processors/Merges/IMergingAlgorithm.h index c01152eb39f..edd5b7d883c 100644 --- a/src/Processors/Merges/IMergingAlgorithm.h +++ b/src/Processors/Merges/IMergingAlgorithm.h @@ -24,7 +24,7 @@ public: virtual void consume(Chunk chunk, size_t source_num) = 0; virtual Status merge() = 0; - IMergingAlgorithm(const IMergingAlgorithm &) = default; + IMergingAlgorithm() = default; IMergingAlgorithm(IMergingAlgorithm &&) = default; virtual ~IMergingAlgorithm() = default; }; From a4f5280d80e6459dcaba5b34b542f41512207df6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 12:32:37 +0300 Subject: [PATCH 207/752] Remove excessive } --- website/templates/footer.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/footer.html b/website/templates/footer.html index 42f1e4263be..765ea63d528 100644 --- a/website/templates/footer.html +++ b/website/templates/footer.html @@ -8,7 +8,7 @@ {{ _('ClickHouse source code is published under the Apache 2.0 License.') }} {{ _('Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.') }}
- © 2016–2020 {{ _('Yandex LLC') }}} + © 2016–2020 {{ _('Yandex LLC') }}
From ba50519af8bda35a7afc2a6047dde4eb40b29d36 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Fri, 10 Apr 2020 13:28:44 +0300 Subject: [PATCH 208/752] workaround for function resolver --- src/Interpreters/DictionaryReader.h | 22 +++++++++---------- .../00065_loyalty_with_storage_join.sql | 7 +++--- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/DictionaryReader.h b/src/Interpreters/DictionaryReader.h index bb13758f46c..823a3690669 100644 --- a/src/Interpreters/DictionaryReader.h +++ b/src/Interpreters/DictionaryReader.h @@ -29,21 +29,21 @@ public: ColumnNumbers arg_positions; size_t result_pos = 0; - FunctionWrapper(const IFunctionOverloadResolver & resolver, const ColumnsWithTypeAndName & arguments, Block & block, + FunctionWrapper(FunctionOverloadResolverPtr resolver, const ColumnsWithTypeAndName & arguments, Block & block, const ColumnNumbers & arg_positions_, const String & column_name, TypeIndex expected_type) : arg_positions(arg_positions_) + , result_pos(block.columns()) { - FunctionBasePtr prepare_function = resolver.build(arguments); - result_pos = block.columns(); + FunctionBasePtr prepared_function = resolver->build(arguments); ColumnWithTypeAndName result; result.name = "get_" + column_name; - result.type = prepare_function->getReturnType(); + result.type = prepared_function->getReturnType(); if (result.type->getTypeId() != expected_type) throw Exception("Type mismatch in dictionary reader for: " + column_name, ErrorCodes::TYPE_MISMATCH); block.insert(result); - function = prepare_function->prepare(block, arg_positions, result_pos); + function = prepared_function->prepare(block, arg_positions, result_pos); } void execute(Block & block, size_t rows) const @@ -60,9 +60,6 @@ public: if (src_column_names.size() != result_columns.size()) throw Exception("Columns number mismatch in dictionary reader", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - FunctionOverloadResolverPtr dict_has(FunctionFactory::instance().get("dictHas", context)); - FunctionOverloadResolverPtr dict_get(FunctionFactory::instance().get("dictGet", context)); - ColumnWithTypeAndName dict_name; ColumnWithTypeAndName key; ColumnWithTypeAndName column_name; @@ -106,8 +103,8 @@ public: sample_block.insert(key); ColumnNumbers positions_has{0, key_position}; - function_has = std::make_unique( - *dict_has, arguments_has, sample_block, positions_has, "has", DataTypeUInt8().getTypeId()); + function_has = std::make_unique(FunctionFactory::instance().get("dictHas", context), + arguments_has, sample_block, positions_has, "has", DataTypeUInt8().getTypeId()); functions_get.reserve(result_header.columns()); for (size_t i = 0; i < result_header.columns(); ++i) @@ -116,8 +113,9 @@ public: auto & column = result_header.getByPosition(i); arguments_get[1].column = DataTypeString().createColumnConst(1, src_column_names[i]); ColumnNumbers positions_get{0, column_name_pos, key_position}; - functions_get.emplace_back(FunctionWrapper( - *dict_get, arguments_get, sample_block, positions_get, column.name, column.type->getTypeId())); + functions_get.emplace_back( + FunctionWrapper(FunctionFactory::instance().get("dictGet", context), + arguments_get, sample_block, positions_get, column.name, column.type->getTypeId())); } } diff --git a/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql b/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql index d3e73faa7be..2fa337b6ebc 100644 --- a/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql +++ b/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql @@ -3,14 +3,15 @@ SET any_join_distinct_right_table_keys = 1; USE test; DROP TABLE IF EXISTS join; -CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID); +CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID) +SETTINGS any_join_distinct_right_table_keys = 1; INSERT INTO join SELECT UserID, toInt8(if((sum(SearchEngineID = 2) AS yandex) > (sum(SearchEngineID = 3) AS google), - yandex / (yandex + google), - -google / (yandex + google)) * 10) AS loyalty + yandex / (yandex + google), + -google / (yandex + google)) * 10) AS loyalty FROM hits WHERE (SearchEngineID = 2) OR (SearchEngineID = 3) GROUP BY UserID From 564bec176ead92e937efa689b4c3b139264da4e5 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 13:40:59 +0300 Subject: [PATCH 209/752] Update codebrowser links (#10166) * Put single-page content into a separate js file * move break comments * Update codebrowser links --- docs/en/development/browse_code.md | 2 +- docs/en/whats_new/changelog/index.md | 2 +- docs/es/development/browse_code.md | 2 +- docs/es/whats_new/changelog/index.md | 2 +- docs/fa/development/browse_code.md | 2 +- docs/fa/whats_new/changelog/index.md | 2 +- docs/fr/development/browse_code.md | 2 +- docs/fr/whats_new/changelog/index.md | 2 +- docs/ja/development/browse_code.md | 2 +- docs/ja/whats_new/changelog/index.md | 2 +- docs/ru/development/browse_code.md | 2 +- docs/ru/whats_new/changelog/index.md | 2 +- docs/zh/changelog/index.md | 2 +- docs/zh/development/browse_code.md | 2 +- docs/zh/whats_new/changelog/index.md | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/en/development/browse_code.md b/docs/en/development/browse_code.md index 69b15df3629..8e98e3f5f0f 100644 --- a/docs/en/development/browse_code.md +++ b/docs/en/development/browse_code.md @@ -5,7 +5,7 @@ toc_title: Browse ClickHouse Source Code # Browse ClickHouse Source Code {#browse-clickhouse-source-code} -You can use **Woboq** online code browser available [here](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. +You can use **Woboq** online code browser available [here](https://clickhouse.tech/codebrowser/html_report///ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. diff --git a/docs/en/whats_new/changelog/index.md b/docs/en/whats_new/changelog/index.md index bcfe62cbd0b..969e71fbf42 100644 --- a/docs/en/whats_new/changelog/index.md +++ b/docs/en/whats_new/changelog/index.md @@ -240,7 +240,7 @@ toc_title: '2020' * Updated checking for hung queries in clickhouse-test script [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) * Removed some useless files from repository. [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Changed type of math perftests from `once` to `loop`. [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html) +* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/src/index.html) * Suppress some test failures under MSan. [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) * Speedup "exception while insert" test. This test often time out in debug-with-coverage build. [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Updated `libcxx` and `libcxxabi` to master. In preparation to [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/docs/es/development/browse_code.md b/docs/es/development/browse_code.md index 6bbc9d57729..393577e8dca 100644 --- a/docs/es/development/browse_code.md +++ b/docs/es/development/browse_code.md @@ -7,7 +7,7 @@ toc_title: "Examinar el c\xF3digo fuente de ClickHouse" # Examinar el código fuente de ClickHouse {#browse-clickhouse-source-code} -Usted puede utilizar **Woboq** navegador de código en línea disponible [aqui](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/src/index.html). Proporciona navegación de código y resaltado semántico, búsqueda e indexación. La instantánea de código se actualiza diariamente. +Usted puede utilizar **Woboq** navegador de código en línea disponible [aqui](https://clickhouse.tech/codebrowser/html_report///ClickHouse/src/index.html). Proporciona navegación de código y resaltado semántico, búsqueda e indexación. La instantánea de código se actualiza diariamente. Además, puede navegar por las fuentes en [GitHub](https://github.com/ClickHouse/ClickHouse) como de costumbre. diff --git a/docs/es/whats_new/changelog/index.md b/docs/es/whats_new/changelog/index.md index 03c8ea00cbe..053f924099a 100644 --- a/docs/es/whats_new/changelog/index.md +++ b/docs/es/whats_new/changelog/index.md @@ -249,7 +249,7 @@ toc_title: '2020' - Comprobación actualizada de consultas colgadas en el script de prueba de clickhouse [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alejandro Kazakov](https://github.com/Akazz)) - Se eliminaron algunos archivos inútiles del repositorio. [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) - Tipo cambiado de perftests matemáticos de `once` a `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Agregue una imagen acoplable que permite construir un informe HTML del navegador de código interactivo para nuestra base de código. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) Ver [Navegador de código Woboq](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html) +- Agregue una imagen acoplable que permite construir un informe HTML del navegador de código interactivo para nuestra base de código. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) Ver [Navegador de código Woboq](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/src/index.html) - Suprima algunas fallas de prueba bajo MSan. [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alejandro Kuzmenkov](https://github.com/akuzm)) - Aceleración “exception while insert” prueba. Esta prueba a menudo se agota en la compilación de depuración con cobertura. [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) - Actualizar `libcxx` y `libcxxabi` dominar. En preparación para [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/docs/fa/development/browse_code.md b/docs/fa/development/browse_code.md index 1609e4b1d77..0338511c44c 100644 --- a/docs/fa/development/browse_code.md +++ b/docs/fa/development/browse_code.md @@ -8,7 +8,7 @@ toc_title: "\u0645\u0631\u0648\u0631 \u06A9\u062F \u0645\u0646\u0628\u0639 \u06A # فهرست clickhouse کد منبع {#browse-clickhouse-source-code} -شما می توانید استفاده کنید **ووبوک** آنلاین کد مرورگر موجود [اینجا](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html). این فراهم می کند ناوبری کد و برجسته معنایی, جستجو و نمایه سازی. عکس فوری کد روزانه به روز می شود. +شما می توانید استفاده کنید **ووبوک** آنلاین کد مرورگر موجود [اینجا](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/index.html). این فراهم می کند ناوبری کد و برجسته معنایی, جستجو و نمایه سازی. عکس فوری کد روزانه به روز می شود. همچنین شما می توانید فهرست منابع در [گیتهاب](https://github.com/ClickHouse/ClickHouse) به عنوان معمول است. diff --git a/docs/fa/whats_new/changelog/index.md b/docs/fa/whats_new/changelog/index.md index c7eb257e5a9..8dcc20e9409 100644 --- a/docs/fa/whats_new/changelog/index.md +++ b/docs/fa/whats_new/changelog/index.md @@ -249,7 +249,7 @@ toc_title: '2020' - به روز شده در چک کردن برای نمایش داده شد را قطع کرد در اسکریپت کلیک تست [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([الکساندر کازاکوف](https://github.com/Akazz)) - حذف برخی از فایل های بی فایده از مخزن. [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([الکسی میلویدو](https://github.com/alexey-milovidov)) - نوع تغییر کامل ریاضی از `once` به `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([نیکولای کوچتو](https://github.com/KochetovNicolai)) -- اضافه کردن تصویر کارگر بارانداز که اجازه می دهد تا برای ساخت کد تعاملی مرورگر گزارش متنی برای کدهای ما. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([الساپین](https://github.com/alesapin)) ببینید [مرورگر کد ووبوک](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html) +- اضافه کردن تصویر کارگر بارانداز که اجازه می دهد تا برای ساخت کد تعاملی مرورگر گزارش متنی برای کدهای ما. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([الساپین](https://github.com/alesapin)) ببینید [مرورگر کد ووبوک](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/src/index.html) - سرکوب برخی از شکست تست تحت مسان. [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([الکساندر کوزمنکوف](https://github.com/akuzm)) - افزایش سرعت “exception while insert” امتحان این تست اغلب زمان در اشکال زدایی با پوشش ساخت. [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([الکسی میلویدو](https://github.com/alexey-milovidov)) - به روز شده `libcxx` و `libcxxabi` به سلامتی استاد در تهیه به [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([الکسی میلویدو](https://github.com/alexey-milovidov)) diff --git a/docs/fr/development/browse_code.md b/docs/fr/development/browse_code.md index 62caa530b5d..0d708da5beb 100644 --- a/docs/fr/development/browse_code.md +++ b/docs/fr/development/browse_code.md @@ -7,7 +7,7 @@ toc_title: Parcourir Le Code Source De ClickHouse # Parcourir Le Code Source De ClickHouse {#browse-clickhouse-source-code} -Vous pouvez utiliser **Woboq** navigateur de code en ligne disponible [ici](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html). Il fournit la navigation de code et la mise en évidence sémantique, la recherche et l'indexation. L'instantané de code est mis à jour quotidiennement. +Vous pouvez utiliser **Woboq** navigateur de code en ligne disponible [ici](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/index.html). Il fournit la navigation de code et la mise en évidence sémantique, la recherche et l'indexation. L'instantané de code est mis à jour quotidiennement. Aussi, vous pouvez parcourir les sources sur [GitHub](https://github.com/ClickHouse/ClickHouse) comme à l'habitude. diff --git a/docs/fr/whats_new/changelog/index.md b/docs/fr/whats_new/changelog/index.md index d45e36b1d8f..fe49ce8244c 100644 --- a/docs/fr/whats_new/changelog/index.md +++ b/docs/fr/whats_new/changelog/index.md @@ -249,7 +249,7 @@ toc_title: '2020' - Mise à jour de la vérification des requêtes suspendues dans le script clickhouse-test [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) - Suppression de certains fichiers inutiles du référentiel. [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexeï-milovidov](https://github.com/alexey-milovidov)) - Changement de type de math perftests de `once` de `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Ajouter une image docker qui permet de créer un rapport HTML interactif du navigateur de code pour notre base de code. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alésapine](https://github.com/alesapin)) Voir [Navigateur De Code Woboq](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/src/src/index.html) +- Ajouter une image docker qui permet de créer un rapport HTML interactif du navigateur de code pour notre base de code. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alésapine](https://github.com/alesapin)) Voir [Navigateur De Code Woboq](https://clickhouse.tech/codebrowser/html_report///ClickHouse/src/src/index.html) - Supprimer certains échecs de test sous MSan. [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) - SpeedUp “exception while insert” test. Ce test expire souvent dans la construction debug-with-coverage. [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexeï-milovidov](https://github.com/alexey-milovidov)) - Mettre `libcxx` et `libcxxabi` maîtriser. En préparation à [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexeï-milovidov](https://github.com/alexey-milovidov)) diff --git a/docs/ja/development/browse_code.md b/docs/ja/development/browse_code.md index f8357fcca27..d66b14e400f 100644 --- a/docs/ja/development/browse_code.md +++ b/docs/ja/development/browse_code.md @@ -7,7 +7,7 @@ toc_title: "ClickHouse\u306E\u30BD\u30FC\u30B9\u30B3\u30FC\u30C9\u3092\u53C2\u71 # ClickHouseのソースコードを参照 {#browse-clickhouse-source-code} -を使用することができ **Woboq** オンラインのコードブラウザをご利用 [ここに](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html). このコードナビゲーションや意味のハイライト表示、検索インデックス. コードのスナップショットは随時更新中です。 +を使用することができ **Woboq** オンラインのコードブラウザをご利用 [ここに](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/index.html). このコードナビゲーションや意味のハイライト表示、検索インデックス. コードのスナップショットは随時更新中です。 また、ソースを閲覧することもできます [GitHub](https://github.com/ClickHouse/ClickHouse) いつものように diff --git a/docs/ja/whats_new/changelog/index.md b/docs/ja/whats_new/changelog/index.md index dd382ebf5ce..0d90862ee11 100644 --- a/docs/ja/whats_new/changelog/index.md +++ b/docs/ja/whats_new/changelog/index.md @@ -249,7 +249,7 @@ toc_title: '2020' - Clickhouseテストスクリプトでハングクエリのチェックを更新 [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) - リポジトリか [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) - から数学perftestsの変更タイプ `once` に `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- 追加docker画像を構築ィコードのブラウザのhtmlレポート当社のコードベース. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin))見る [Woboqコードブラウザ](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html) +- 追加docker画像を構築ィコードのブラウザのhtmlレポート当社のコードベース. [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin))見る [Woboqコードブラウザ](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/src/index.html) - MSanの下でいくつかのテストの失敗を抑制. [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) - スピードアップ “exception while insert” テスト。 このテス [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) - 更新 `libcxx` と `libcxxabi` マスターに。 準備のために [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/docs/ru/development/browse_code.md b/docs/ru/development/browse_code.md index f87e3be7f4b..814b213a6a7 100644 --- a/docs/ru/development/browse_code.md +++ b/docs/ru/development/browse_code.md @@ -1,6 +1,6 @@ # Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse} -Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. +Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.tech/codebrowser/html_report///ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse). diff --git a/docs/ru/whats_new/changelog/index.md b/docs/ru/whats_new/changelog/index.md index bcfe62cbd0b..969e71fbf42 100644 --- a/docs/ru/whats_new/changelog/index.md +++ b/docs/ru/whats_new/changelog/index.md @@ -240,7 +240,7 @@ toc_title: '2020' * Updated checking for hung queries in clickhouse-test script [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) * Removed some useless files from repository. [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Changed type of math perftests from `once` to `loop`. [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/src/index.html) +* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/src/index.html) * Suppress some test failures under MSan. [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) * Speedup "exception while insert" test. This test often time out in debug-with-coverage build. [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Updated `libcxx` and `libcxxabi` to master. In preparation to [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index 90bb7abe0b0..33bb7bfd5f1 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -246,7 +246,7 @@ machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 - 更新了clickhouse-test脚本中挂起查询的检查 [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) - 从存储库中删除了一些无用的文件。 [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更改类型的数学perftests从 `once` 到 `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) -- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/index.html) - 抑制MSan下的一些测试失败。 [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) - 加速 “exception while insert” 测试 此测试通常在具有复盖率的调试版本中超时。 [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更新 `libcxx` 和 `libcxxabi` 为了主人 在准备 [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) diff --git a/docs/zh/development/browse_code.md b/docs/zh/development/browse_code.md index 10d3ffecd15..d098675b6a0 100644 --- a/docs/zh/development/browse_code.md +++ b/docs/zh/development/browse_code.md @@ -7,7 +7,7 @@ toc_title: "\u6D4F\u89C8ClickHouse\u6E90\u4EE3\u7801" # 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -您可以使用 **Woboq** 在线代码浏览器可用 [这里](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/src/index.html). 它提供了代码导航和语义突出显示,搜索和索引。 代码快照每天更新。 +您可以使用 **Woboq** 在线代码浏览器可用 [这里](https://clickhouse.tech/codebrowser/html_report///ClickHouse/src/index.html). 它提供了代码导航和语义突出显示,搜索和索引。 代码快照每天更新。 此外,您还可以浏览源 [GitHub](https://github.com/ClickHouse/ClickHouse) 像往常一样 diff --git a/docs/zh/whats_new/changelog/index.md b/docs/zh/whats_new/changelog/index.md index 90bb7abe0b0..33bb7bfd5f1 100644 --- a/docs/zh/whats_new/changelog/index.md +++ b/docs/zh/whats_new/changelog/index.md @@ -246,7 +246,7 @@ machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 - 更新了clickhouse-test脚本中挂起查询的检查 [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) - 从存储库中删除了一些无用的文件。 [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更改类型的数学perftests从 `once` 到 `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) -- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse.tech/codebrowser/html_report///ClickHouse/dbms/index.html) - 抑制MSan下的一些测试失败。 [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) - 加速 “exception while insert” 测试 此测试通常在具有复盖率的调试版本中超时。 [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更新 `libcxx` 和 `libcxxabi` 为了主人 在准备 [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) From 91a33e1eec27b31a262ac409ecd3f3dd976e1516 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 10 Apr 2020 12:45:25 +0200 Subject: [PATCH 210/752] Fix code wrapping for non-code part (#10129) * Fix code wrapping for non-code part * Fix links, fix formatting --- docs/en/sql_reference/statements/create.md | 24 ++++++++-------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/docs/en/sql_reference/statements/create.md b/docs/en/sql_reference/statements/create.md index fbfe3eb2cfb..36dd3aced8d 100644 --- a/docs/en/sql_reference/statements/create.md +++ b/docs/en/sql_reference/statements/create.md @@ -15,24 +15,18 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(.. ### Clauses {#clauses} -- `IF NOT EXISTS` +- `IF NOT EXISTS` + If the `db_name` database already exists, then ClickHouse doesn't create a new database and: + - Doesn't throw an exception if clause is specified. + - Throws an exception if clause isn't specified. - If the `db_name` database already exists, then ClickHouse doesn't create a new database and: - - - Doesn't throw an exception if clause is specified. - - Throws an exception if clause isn't specified. - -- `ON CLUSTER` - - ClickHouse creates the `db_name` database on all the servers of a specified cluster. +- `ON CLUSTER` + ClickHouse creates the `db_name` database on all the servers of a specified cluster. - `ENGINE` - - - [MySQL](../engines/database_engines/mysql.md) - - Allows you to retrieve data from the remote MySQL server. - - By default, ClickHouse uses its own [database engine](../engines/database_engines/index.md). + - [MySQL](../../engines/database_engines/mysql.md) + Allows you to retrieve data from the remote MySQL server. + By default, ClickHouse uses its own [database engine](../../engines/database_engines/index.md). ## CREATE TABLE {#create-table-query} From 9901bf0f6e7c9dd517b0c1927569d59f4365af04 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Fri, 10 Apr 2020 14:03:08 +0300 Subject: [PATCH 211/752] fix column size in KeyGetterForDict --- src/Interpreters/HashJoin.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 16187f10fa1..1c0adb96f66 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -291,16 +291,15 @@ public: using Mapped = JoinStuff::MappedOne; using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl; - KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes & key_sizes_, void *) + KeyGetterForDict(const ColumnRawPtrs & key_columns_, const Sizes &, void *) : key_columns(key_columns_) - , key_sizes(key_sizes_) {} FindResult findKey(const DictionaryReader & reader, size_t i, const Arena &) { if (!read_result) { - reader.readKeys(*key_columns[0], key_sizes[0], read_result, found, positions); + reader.readKeys(*key_columns[0], key_columns[0]->size(), read_result, found, positions); result.block = &read_result; /// TODO: check types and correct nullability } @@ -311,7 +310,6 @@ public: private: const ColumnRawPtrs & key_columns; - const Sizes & key_sizes; Block read_result; Mapped result; ColumnVector::Container found; From 9b0640a3b53aff4de72c0bd85c45a8440d68193c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Apr 2020 14:29:56 +0300 Subject: [PATCH 212/752] Fix odbc round trip test --- .../0_stateless/01086_odbc_roundtrip.sh | 24 +++++++++++++++++++ .../0_stateless/01086_odbc_roundtrip.sql | 14 ----------- 2 files changed, 24 insertions(+), 14 deletions(-) create mode 100755 tests/queries/0_stateless/01086_odbc_roundtrip.sh delete mode 100644 tests/queries/0_stateless/01086_odbc_roundtrip.sql diff --git a/tests/queries/0_stateless/01086_odbc_roundtrip.sh b/tests/queries/0_stateless/01086_odbc_roundtrip.sh new file mode 100755 index 00000000000..827376395d1 --- /dev/null +++ b/tests/queries/0_stateless/01086_odbc_roundtrip.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CUR_DIR/../shell_config.sh + + +for i in $(seq 1 10); do + ${CLICKHOUSE_CLIENT} -q "select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (ANSI)}','system','tables'))" 2>/dev/null && break +done + +${CLICKHOUSE_CLIENT} --query "select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (Unicode)}','system','tables'))" + +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS test_01086" +${CLICKHOUSE_CLIENT} --query "CREATE DATABASE test_01086" + + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_01086.t (x UInt8, y Float32, z String) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query "INSERT INTO test_01086.t VALUES (1,0.1,'a я'),(2,0.2,'b ą'),(3,0.3,'c d')" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM odbc('DSN={ClickHouse DSN (ANSI)}','test_01086','t') ORDER BY x" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM odbc('DSN={ClickHouse DSN (Unicode)}','test_01086','t') ORDER BY x" + +${CLICKHOUSE_CLIENT} --query "DROP DATABASE test_01086;" diff --git a/tests/queries/0_stateless/01086_odbc_roundtrip.sql b/tests/queries/0_stateless/01086_odbc_roundtrip.sql deleted file mode 100644 index 2c31711d895..00000000000 --- a/tests/queries/0_stateless/01086_odbc_roundtrip.sql +++ /dev/null @@ -1,14 +0,0 @@ -select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (ANSI)}','system','tables')); -select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (Unicode)}','system','tables')); - -DROP DATABASE IF EXISTS test_01086; -CREATE DATABASE test_01086; -USE test_01086; - -CREATE TABLE t (x UInt8, y Float32, z String) ENGINE = Memory; -INSERT INTO t VALUES (1,0.1,'a я'),(2,0.2,'b ą'),(3,0.3,'c d'); - -select * from odbc('DSN={ClickHouse DSN (ANSI)}','test_01086','t') ORDER BY x; -select * from odbc('DSN={ClickHouse DSN (Unicode)}','test_01086','t') ORDER BY x; - -DROP DATABASE test_01086; From 2108f621ddb5497ba22b8feba2edff85b503c14e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 14:31:21 +0300 Subject: [PATCH 213/752] Disable webvisor on single-page docs --- website/js/base.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/website/js/base.js b/website/js/base.js index 2c43e435f48..ae8b3c01573 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -50,13 +50,14 @@ (function (d, w, c) { (w[c] = w[c] || []).push(function() { + var is_single_page = $('html').attr('data-single-page') === 'true'; try { w.yaCounter18343495 = new Ya.Metrika2({ - id:18343495, - clickmap:true, - trackLinks:true, - accurateTrackBounce:true, - webvisor:true + id: 18343495, + clickmap: !is_single_page, + trackLinks: !is_single_page, + accurateTrackBounce: !is_single_page, + webvisor: !is_single_page }); } catch(e) { } }); From 88657cfbe1ce48cbcedf469d5a3f16be89f335b9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Apr 2020 15:14:27 +0300 Subject: [PATCH 214/752] Add retries to test --- tests/queries/0_stateless/00646_url_engine.python | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index 960048dbb8f..1b41216b198 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -180,7 +180,14 @@ def main(): if __name__ == "__main__": - try: - main() - except: + exception_text = '' + for i in range(1, 5): + try: + main() + break + except Exception as ex: + exception_text = str(ex) + + if exception_text: + print("Exception: {}".format(exception_text), file=sys.stderr) os._exit(1) From d4bc6662cd441321e9f9170ff83aebdf9cc6ef31 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Fri, 10 Apr 2020 15:29:25 +0300 Subject: [PATCH 215/752] Update extended_roadmap.md --- docs/ru/extended_roadmap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 193e2035b56..1637b54311a 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -761,7 +761,7 @@ ClickHouse предоставляет возможность обратитьс Вместо этого предлагается описывать необходимые данные в конфигурационном файле сервера или в отдельном сервисе и ссылаться на них по именам. ### 9.3. Поддержка TLS для ZooKeeper. {#podderzhka-tls-dlia-zookeeper} - +[#10174](https://github.com/ClickHouse/ClickHouse/issues/10174) ## 10. Внешние словари. {#vneshnie-slovari} ### 10.1. + Исправление зависания в библиотеке доступа к YT. {#ispravlenie-zavisaniia-v-biblioteke-dostupa-k-yt} From 7b85a36975fc76b7d52500877cb1f1ef9ae8fdc6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 15:40:16 +0300 Subject: [PATCH 216/752] Added CollapsingSortedAlgorithm. --- ...form.cpp => CollapsingSortedAlgorithm.cpp} | 81 ++++--------------- .../Merges/CollapsingSortedAlgorithm.h | 59 ++++++++++++++ .../Merges/CollapsingSortedTransform.h | 74 ++++------------- .../IMergingAlgorithmWithSharedChunks.cpp | 66 +++++++++++++++ .../IMergingAlgorithmWithSharedChunks.h | 43 ++++++++++ .../Merges/MergingSortedAlgorithm.cpp | 6 -- .../Merges/MergingSortedAlgorithm.h | 1 - 7 files changed, 201 insertions(+), 129 deletions(-) rename src/Processors/Merges/{CollapsingSortedTransform.cpp => CollapsingSortedAlgorithm.cpp} (69%) create mode 100644 src/Processors/Merges/CollapsingSortedAlgorithm.h create mode 100644 src/Processors/Merges/IMergingAlgorithmWithSharedChunks.cpp create mode 100644 src/Processors/Merges/IMergingAlgorithmWithSharedChunks.h diff --git a/src/Processors/Merges/CollapsingSortedTransform.cpp b/src/Processors/Merges/CollapsingSortedAlgorithm.cpp similarity index 69% rename from src/Processors/Merges/CollapsingSortedTransform.cpp rename to src/Processors/Merges/CollapsingSortedAlgorithm.cpp index d1099184952..a9ee070564c 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.cpp +++ b/src/Processors/Merges/CollapsingSortedAlgorithm.cpp @@ -1,4 +1,5 @@ -#include +#include + #include #include #include @@ -16,70 +17,27 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } -CollapsingSortedTransform::CollapsingSortedTransform( +CollapsingSortedAlgorithm::CollapsingSortedAlgorithm( const Block & header, size_t num_inputs, SortDescription description_, const String & sign_column, size_t max_block_size, WriteBuffer * out_row_sources_buf_, - bool use_average_block_sizes) - : IMergingTransform(num_inputs, header, header, true) + bool use_average_block_sizes, + Logger * log_) + : IMergingAlgorithmWithSharedChunks(num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) - , description(std::move(description_)) , sign_column_number(header.getPositionByName(sign_column)) - , out_row_sources_buf(out_row_sources_buf_) - , chunk_allocator(num_inputs + max_row_refs) - , source_chunks(num_inputs) - , cursors(num_inputs) + , log(log_) { } -void CollapsingSortedTransform::initializeInputs() +void CollapsingSortedAlgorithm::reportIncorrectData() { - queue = SortingHeap(cursors); - is_queue_initialized = true; -} + if (!log) + return; -void CollapsingSortedTransform::consume(Chunk chunk, size_t input_number) -{ - updateCursor(std::move(chunk), input_number); - - if (is_queue_initialized) - queue.push(cursors[input_number]); -} - -void CollapsingSortedTransform::updateCursor(Chunk chunk, size_t source_num) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); - - auto & source_chunk = source_chunks[source_num]; - - if (source_chunk) - { - source_chunk = chunk_allocator.alloc(std::move(chunk)); - cursors[source_num].reset(source_chunk->getColumns(), {}); - } - else - { - if (cursors[source_num].has_collation) - throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); - - source_chunk = chunk_allocator.alloc(std::move(chunk)); - cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); - } - - source_chunk->all_columns = cursors[source_num].all_columns; - source_chunk->sort_columns = cursors[source_num].sort_columns; -} - -void CollapsingSortedTransform::reportIncorrectData() -{ std::stringstream s; s << "Incorrect data: number of rows with sign = 1 (" << count_positive << ") differs with number of rows with sign = -1 (" << count_negative @@ -102,12 +60,12 @@ void CollapsingSortedTransform::reportIncorrectData() LOG_WARNING(log, s.rdbuf()); } -void CollapsingSortedTransform::insertRow(RowRef & row) +void CollapsingSortedAlgorithm::insertRow(RowRef & row) { merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows()); } -void CollapsingSortedTransform::insertRows() +void CollapsingSortedAlgorithm::insertRows() { if (count_positive == 0 && count_negative == 0) { @@ -150,13 +108,7 @@ void CollapsingSortedTransform::insertRows() current_row_sources.size() * sizeof(RowSourcePart)); } -void CollapsingSortedTransform::work() -{ - merge(); - prepareOutputChunk(merged_data); -} - -void CollapsingSortedTransform::merge() +IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() { /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size` while (queue.isValid()) @@ -174,7 +126,7 @@ void CollapsingSortedTransform::merge() /// if there are enough rows and the last one is calculated completely if (key_differs && merged_data.hasEnoughRows()) - return; + Status(merged_data.pull()); if (key_differs) { @@ -229,13 +181,12 @@ void CollapsingSortedTransform::merge() { /// We take next block from the corresponding source, if there is one. queue.removeTop(); - requestDataForInput(current.impl->order); - return; + return Status(current.impl->order); } } insertRows(); - is_finished = true; + return Status(merged_data.pull(), true); } } diff --git a/src/Processors/Merges/CollapsingSortedAlgorithm.h b/src/Processors/Merges/CollapsingSortedAlgorithm.h new file mode 100644 index 00000000000..8deacabb9d6 --- /dev/null +++ b/src/Processors/Merges/CollapsingSortedAlgorithm.h @@ -0,0 +1,59 @@ +#pragma once +#include +#include +#include + +namespace Poco +{ + class Logger; +} + +namespace DB +{ + +class CollapsingSortedAlgorithm : public IMergingAlgorithmWithSharedChunks +{ +public: + CollapsingSortedAlgorithm( + const Block & header, + size_t num_inputs, + SortDescription description_, + const String & sign_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes, + Logger * log_); + + Status merge() override; + +private: + MergedData merged_data; + + const size_t sign_column_number; + + static constexpr size_t max_row_refs = 4; /// first_negative, last_positive, last, current. + RowRef first_negative_row; + RowRef last_positive_row; + RowRef last_row; + + size_t count_positive = 0; /// The number of positive rows for the current primary key. + size_t count_negative = 0; /// The number of negative rows for the current primary key. + bool last_is_positive = false; /// true if the last row for the current primary key is positive. + + /// Fields specific for VERTICAL merge algorithm. + /// Row numbers are relative to the start of current primary key. + size_t current_pos = 0; /// Current row number + size_t first_negative_pos = 0; /// Row number of first_negative + size_t last_positive_pos = 0; /// Row number of last_positive + PODArray current_row_sources; /// Sources of rows with the current primary key + + size_t count_incorrect_data = 0; /// To prevent too many error messages from writing to the log. + Logger * log; + + void reportIncorrectData(); + void insertRow(RowRef & row); + void insertRows(); +}; + +} + diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 3a3c1540c06..3dd052dfcb4 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -1,6 +1,8 @@ #pragma once #include +#include + #include #include #include @@ -24,7 +26,7 @@ namespace DB * If negative by 1 is greater than positive rows, then only the first negative row is written. * Otherwise, a logical error. */ -class CollapsingSortedTransform final : public IMergingTransform +class CollapsingSortedTransform final : public IMergingTransform2 { public: CollapsingSortedTransform( @@ -34,64 +36,22 @@ public: const String & sign_column, size_t max_block_size, WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false); + bool use_average_block_sizes = false) + : IMergingTransform2( + CollapsingSortedAlgorithm( + header, + num_inputs, + std::move(description_), + sign_column, + max_block_size, + out_row_sources_buf_, + use_average_block_sizes, + &Logger::get("CollapsingSortedTransform")), + num_inputs, header, header, true) + { + } String getName() const override { return "CollapsingSortedTransform"; } - void work() override; - -protected: - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; - -private: - Logger * log = &Logger::get("CollapsingSortedTransform"); - - MergedData merged_data; - - /// Settings - SortDescription description; - const size_t sign_column_number; - - /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) - /// If it is not nullptr then it should be populated during execution - WriteBuffer * out_row_sources_buf = nullptr; - - /// Allocator must be destroyed after all RowRefs. - detail::SharedChunkAllocator chunk_allocator; - - /// Chunks currently being merged. - using SourceChunks = std::vector; - SourceChunks source_chunks; - SortCursorImpls cursors; - - SortingHeap queue; - bool is_queue_initialized = false; - - using RowRef = detail::RowRefWithOwnedChunk; - static constexpr size_t max_row_refs = 4; /// first_negative, last_positive, last, current. - RowRef first_negative_row; - RowRef last_positive_row; - RowRef last_row; - - size_t count_positive = 0; /// The number of positive rows for the current primary key. - size_t count_negative = 0; /// The number of negative rows for the current primary key. - bool last_is_positive = false; /// true if the last row for the current primary key is positive. - - /// Fields specific for VERTICAL merge algorithm. - /// Row numbers are relative to the start of current primary key. - size_t current_pos = 0; /// Current row number - size_t first_negative_pos = 0; /// Row number of first_negative - size_t last_positive_pos = 0; /// Row number of last_positive - PODArray current_row_sources; /// Sources of rows with the current primary key - - size_t count_incorrect_data = 0; /// To prevent too many error messages from writing to the log. - - void reportIncorrectData(); - void insertRow(RowRef & row); - void insertRows(); - void merge(); - void updateCursor(Chunk chunk, size_t source_num); - void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } }; } diff --git a/src/Processors/Merges/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/IMergingAlgorithmWithSharedChunks.cpp new file mode 100644 index 00000000000..20e1fcffef3 --- /dev/null +++ b/src/Processors/Merges/IMergingAlgorithmWithSharedChunks.cpp @@ -0,0 +1,66 @@ +#include + +namespace DB +{ + +IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( + size_t num_inputs, + SortDescription description_, + WriteBuffer * out_row_sources_buf_, + size_t max_row_refs) + : description(std::move(description_)) + , chunk_allocator(num_inputs + max_row_refs) + , source_chunks(num_inputs) + , cursors(num_inputs) + , out_row_sources_buf(out_row_sources_buf_) +{ +} + +static void prepareChunk(Chunk & chunk) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); +} + +void IMergingAlgorithmWithSharedChunks::initialize(Chunks chunks) +{ + source_chunks.resize(chunks.size()); + + for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) + { + if (!chunks[source_num]) + continue; + + prepareChunk(chunks[source_num]); + + auto & source_chunk = source_chunks[source_num]; + + source_chunk = chunk_allocator.alloc(std::move(chunks[source_num])); + cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); + + source_chunk->all_columns = cursors[source_num].all_columns; + source_chunk->sort_columns = cursors[source_num].sort_columns; + } + + queue = SortingHeap(cursors); +} + +void IMergingAlgorithmWithSharedChunks::consume(Chunk chunk, size_t source_num) +{ + prepareChunk(chunk); + + auto & source_chunk = source_chunks[source_num]; + source_chunk = chunk_allocator.alloc(std::move(chunk)); + cursors[source_num].reset(source_chunk->getColumns(), {}); + + source_chunk->all_columns = cursors[source_num].all_columns; + source_chunk->sort_columns = cursors[source_num].sort_columns; + + queue.push(cursors[source_num]); +} + +} diff --git a/src/Processors/Merges/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/IMergingAlgorithmWithSharedChunks.h new file mode 100644 index 00000000000..19f78629ac4 --- /dev/null +++ b/src/Processors/Merges/IMergingAlgorithmWithSharedChunks.h @@ -0,0 +1,43 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class IMergingAlgorithmWithSharedChunks : public IMergingAlgorithm +{ +public: + IMergingAlgorithmWithSharedChunks( + size_t num_inputs, + SortDescription description_, + WriteBuffer * out_row_sources_buf_, + size_t max_row_refs); + + void initialize(Chunks chunks) override; + void consume(Chunk chunk, size_t source_num) override; + +private: + SortDescription description; + + /// Allocator must be destroyed after source_chunks. + detail::SharedChunkAllocator chunk_allocator; + + /// Chunks currently being merged. + using SourceChunks = std::vector; + SourceChunks source_chunks; + SortCursorImpls cursors; + +protected: + SortingHeap queue; + + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) + /// If it is not nullptr then it should be populated during execution + WriteBuffer * out_row_sources_buf = nullptr; + + using RowRef = detail::RowRefWithOwnedChunk; + void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } +}; + +} diff --git a/src/Processors/Merges/MergingSortedAlgorithm.cpp b/src/Processors/Merges/MergingSortedAlgorithm.cpp index 3be9f3f64ef..7a74a20e9ee 100644 --- a/src/Processors/Merges/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/MergingSortedAlgorithm.cpp @@ -48,12 +48,6 @@ static void prepareChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } -void MergingSortedAlgorithm::updateCursor(size_t source_num) -{ - auto & source_chunk = source_chunks[source_num]; - cursors[source_num].reset(source_chunk.getColumns(), {}); -} - void MergingSortedAlgorithm::initialize(Chunks chunks) { source_chunks = std::move(chunks); diff --git a/src/Processors/Merges/MergingSortedAlgorithm.h b/src/Processors/Merges/MergingSortedAlgorithm.h index 5c4087ed0bd..17bcdca1ccf 100644 --- a/src/Processors/Merges/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/MergingSortedAlgorithm.h @@ -49,7 +49,6 @@ private: SortingHeap queue_without_collation; SortingHeap queue_with_collation; - void updateCursor(size_t source_num); Status insertFromChunk(size_t source_num); template From d46c228b9c2c9c6cead0923173c5fb44bced891a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 15:47:30 +0300 Subject: [PATCH 217/752] Fix build. --- src/Processors/Merges/CollapsingSortedAlgorithm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/Merges/CollapsingSortedAlgorithm.h b/src/Processors/Merges/CollapsingSortedAlgorithm.h index 8deacabb9d6..6d6f637c9af 100644 --- a/src/Processors/Merges/CollapsingSortedAlgorithm.h +++ b/src/Processors/Merges/CollapsingSortedAlgorithm.h @@ -24,6 +24,8 @@ public: bool use_average_block_sizes, Logger * log_); + CollapsingSortedAlgorithm(CollapsingSortedAlgorithm &&) = default; + Status merge() override; private: From e9587b8f4cd3066eb603ac29f763798658b86c3b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 15:53:51 +0300 Subject: [PATCH 218/752] Fix build. --- src/Processors/Merges/IMergingTransform.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 3493ea46af1..163188241b9 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -117,13 +117,13 @@ class IMergingTransform2 : public IMergingTransformBase { public: IMergingTransform2( - Algorithm algorithm_, + Algorithm && algorithm_, size_t num_inputs, const Block & input_header, const Block & output_header, bool have_all_inputs_) : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_) - , algorithm(std::move(algorithm_)) + , algorithm(std::forward(algorithm_)) { } From 51e5603044cf2300d2ea3a503f43c68bc249e8a7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 16:03:37 +0300 Subject: [PATCH 219/752] Fix build. --- .../Merges/CollapsingSortedAlgorithm.cpp | 2 ++ .../Merges/CollapsingSortedAlgorithm.h | 2 -- .../Merges/CollapsingSortedTransform.h | 27 +++++++------------ src/Processors/Merges/IMergingTransform.h | 7 ++--- .../Merges/MergingSortedAlgorithm.cpp | 5 ++++ .../Merges/MergingSortedAlgorithm.h | 2 -- .../Merges/MergingSortedTransform.cpp | 16 +++++------ 7 files changed, 28 insertions(+), 33 deletions(-) diff --git a/src/Processors/Merges/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/CollapsingSortedAlgorithm.cpp index a9ee070564c..b1e06cd4541 100644 --- a/src/Processors/Merges/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/CollapsingSortedAlgorithm.cpp @@ -5,6 +5,8 @@ #include #include +#include + /// Maximum number of messages about incorrect data in the log. #define MAX_ERROR_MESSAGES 10 diff --git a/src/Processors/Merges/CollapsingSortedAlgorithm.h b/src/Processors/Merges/CollapsingSortedAlgorithm.h index 6d6f637c9af..8deacabb9d6 100644 --- a/src/Processors/Merges/CollapsingSortedAlgorithm.h +++ b/src/Processors/Merges/CollapsingSortedAlgorithm.h @@ -24,8 +24,6 @@ public: bool use_average_block_sizes, Logger * log_); - CollapsingSortedAlgorithm(CollapsingSortedAlgorithm &&) = default; - Status merge() override; private: diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 3dd052dfcb4..5a9ed16ea2a 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -3,14 +3,6 @@ #include #include -#include -#include -#include -#include -#include - -#include - namespace DB { @@ -38,16 +30,15 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform2( - CollapsingSortedAlgorithm( - header, - num_inputs, - std::move(description_), - sign_column, - max_block_size, - out_row_sources_buf_, - use_average_block_sizes, - &Logger::get("CollapsingSortedTransform")), - num_inputs, header, header, true) + num_inputs, header, header, true, + header, + num_inputs, + std::move(description_), + sign_column, + max_block_size, + out_row_sources_buf_, + use_average_block_sizes, + &Logger::get("CollapsingSortedTransform")) { } diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 163188241b9..260f57806a7 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -116,14 +116,15 @@ template class IMergingTransform2 : public IMergingTransformBase { public: + template IMergingTransform2( - Algorithm && algorithm_, size_t num_inputs, const Block & input_header, const Block & output_header, - bool have_all_inputs_) + bool have_all_inputs_, + Args && ... args) : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_) - , algorithm(std::forward(algorithm_)) + , algorithm(std::forward(args) ...) { } diff --git a/src/Processors/Merges/MergingSortedAlgorithm.cpp b/src/Processors/Merges/MergingSortedAlgorithm.cpp index 7a74a20e9ee..383d8fe90de 100644 --- a/src/Processors/Merges/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/MergingSortedAlgorithm.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + MergingSortedAlgorithm::MergingSortedAlgorithm( const Block & header, size_t num_inputs, diff --git a/src/Processors/Merges/MergingSortedAlgorithm.h b/src/Processors/Merges/MergingSortedAlgorithm.h index 17bcdca1ccf..734caec3424 100644 --- a/src/Processors/Merges/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/MergingSortedAlgorithm.h @@ -19,8 +19,6 @@ public: WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes); - MergingSortedAlgorithm(MergingSortedAlgorithm && other) = default; - void addInput(); void initialize(Chunks chunks) override; diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 2657077c143..68af48062ba 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -8,11 +8,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - MergingSortedTransform::MergingSortedTransform( const Block & header, size_t num_inputs, @@ -24,9 +19,14 @@ MergingSortedTransform::MergingSortedTransform( bool use_average_block_sizes, bool have_all_inputs_) : IMergingTransform2( - MergingSortedAlgorithm(header, num_inputs, std::move(description_), max_block_size, - limit_, out_row_sources_buf_, use_average_block_sizes), - num_inputs, header, header, have_all_inputs_) + num_inputs, header, header, have_all_inputs_, + header, + num_inputs, + std::move(description_), + max_block_size, + limit_, + out_row_sources_buf_, + use_average_block_sizes) , quiet(quiet_) { } From d018977f4b0de8c933a6d90995cb0c17ae9a744e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 10 Apr 2020 16:36:51 +0300 Subject: [PATCH 220/752] fix 'ALTER CLEAR INDEX/COLUMN' queries with compact parts --- .../MergeTree/IMergeTreeDataPartWriter.h | 5 ++-- src/Storages/MergeTree/MergeTreeData.h | 5 ---- .../MergeTree/MergeTreeDataMergerMutator.cpp | 24 +++++++++++++++++-- .../MergeTree/MergeTreeDataMergerMutator.h | 6 +++++ .../MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndices.h | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 6 +++-- .../MergeTree/MergedBlockOutputStream.h | 2 ++ 8 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 4eb842f9279..d18b31edc72 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -102,8 +102,7 @@ public: written_offset_columns = written_offset_columns_; } - using SkipIndices = std::vector; - const SkipIndices & getSkipIndices() { return skip_indices; } + const MergeTreeIndices & getSkipIndices() { return skip_indices; } void initSkipIndices(); void initPrimaryIndex(); @@ -126,7 +125,7 @@ protected: CompressionCodecPtr default_codec; - std::vector skip_indices; + MergeTreeIndices skip_indices; MergeTreeWriterSettings settings; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 125a90d26e0..d299d39726e 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -433,11 +433,6 @@ public: DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); - std::vector getSkipIndices() const - { - return std::vector(std::begin(skip_indices), std::end(skip_indices)); - } - /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2b732d879b0..c10a6c6dd59 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -773,6 +773,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor MergedBlockOutputStream to{ new_data_part, merging_columns, + data.skip_indices, compression_codec, merged_column_to_size, data_settings->min_merge_bytes_to_use_direct_io, @@ -991,7 +992,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor splitMutationCommands(source_part, commands_for_part, for_interpreter, for_file_renames); - UInt64 watch_prev_elapsed = 0; MergeStageProgress stage_progress(1.0); @@ -1043,8 +1043,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// All columns from part are changed and may be some more that were missing before in part if (isCompactPart(source_part) || source_part->getColumns().isSubsetOf(updated_header.getNamesAndTypesList())) { + auto part_indices = getIndicesForNewDataPart(data.skip_indices, for_file_renames); mutateAllPartColumns( new_data_part, + part_indices, in, time_of_mutation, compression_codec, @@ -1260,6 +1262,7 @@ void MergeTreeDataMergerMutator::splitMutationCommands( else if (is_compact_part && command.type == MutationCommand::Type::DROP_COLUMN) { removed_columns_from_compact_part.emplace(command.column_name); + for_file_renames.push_back(command); } else if (command.type == MutationCommand::Type::RENAME_COLUMN) { @@ -1439,6 +1442,22 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( return all_columns; } +MergeTreeIndices MergeTreeDataMergerMutator::getIndicesForNewDataPart( + const MergeTreeIndices & all_indices, + const MutationCommands & commands_for_removes) +{ + NameSet removed_indices; + for (const auto & command : commands_for_removes) + if (command.type == MutationCommand::DROP_INDEX) + removed_indices.insert(command.column_name); + + MergeTreeIndices new_indices; + for (const auto & index : all_indices) + if (!removed_indices.count(index->name)) + new_indices.push_back(index); + + return new_indices; +} std::set MergeTreeDataMergerMutator::getIndicesToRecalculate( BlockInputStreamPtr & input_stream, @@ -1503,6 +1522,7 @@ bool MergeTreeDataMergerMutator::shouldExecuteTTL(const Names & columns, const M void MergeTreeDataMergerMutator::mutateAllPartColumns( MergeTreeData::MutableDataPartPtr new_data_part, + const MergeTreeIndices & skip_indices, BlockInputStreamPtr mutating_stream, time_t time_of_mutation, const CompressionCodecPtr & compression_codec, @@ -1524,6 +1544,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( MergedBlockOutputStream out{ new_data_part, new_data_part->getColumns(), + skip_indices, compression_codec}; mutating_stream->readPrefix(); @@ -1560,7 +1581,6 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( if (mutating_stream == nullptr) throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); - if (need_remove_expired_values) mutating_stream = std::make_shared(mutating_stream, data, new_data_part, time_of_mutation, true); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index b24b56a4780..6f4f8a03e9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -160,6 +160,11 @@ private: NamesAndTypesList all_columns, const MutationCommands & commands_for_removes); + /// Get skip indcies, that should exists in the resulting data part. + static MergeTreeIndices getIndicesForNewDataPart( + const MergeTreeIndices & all_indices, + const MutationCommands & commands_for_removes); + bool shouldExecuteTTL(const Names & columns, const MutationCommands & commands) const; /// Return set of indices which should be recalculated during mutation also @@ -173,6 +178,7 @@ private: /// Override all columns of new part using mutating_stream void mutateAllPartColumns( MergeTreeData::MutableDataPartPtr new_data_part, + const MergeTreeIndices & skip_indices, BlockInputStreamPtr mutating_stream, time_t time_of_mutation, const CompressionCodecPtr & codec, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c560583259c..34c615994f0 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -294,7 +294,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa /// either default lz4 or compression method with zero thresholds on absolute and relative part size. auto compression_codec = data.global_context.chooseCompressionCodec(0, 0); - MergedBlockOutputStream out(new_data_part, columns, compression_codec); + MergedBlockOutputStream out(new_data_part, columns, data.skip_indices, compression_codec); out.writePrefix(); out.writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 007851f2912..d871a522e6c 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -125,7 +125,7 @@ public: size_t granularity; }; -using MergeTreeIndices = std::vector; +using MergeTreeIndices = std::vector; class MergeTreeIndexFactory : private boost::noncopyable diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 221170b7a32..2b482ac7c29 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -15,10 +15,11 @@ namespace ErrorCodes MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, const NamesAndTypesList & columns_list_, + const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, bool blocks_are_granules_size) : MergedBlockOutputStream( - data_part, columns_list_, default_codec, {}, + data_part, columns_list_, skip_indices, default_codec, {}, data_part->storage.global_context.getSettings().min_bytes_to_use_direct_io, blocks_are_granules_size) { @@ -27,6 +28,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, const NamesAndTypesList & columns_list_, + const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, const MergeTreeData::DataPart::ColumnToSize & merged_column_to_size, size_t aio_threshold, @@ -49,7 +51,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( disk->createDirectories(part_path); - writer = data_part->getWriter(columns_list, data_part->storage.getSkipIndices(), default_codec, writer_settings); + writer = data_part->getWriter(columns_list, skip_indices, default_codec, writer_settings); writer->initPrimaryIndex(); writer->initSkipIndices(); } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index ee453f41a31..5a92977640e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -16,12 +16,14 @@ public: MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, const NamesAndTypesList & columns_list_, + const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, bool blocks_are_granules_size = false); MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, const NamesAndTypesList & columns_list_, + const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, const MergeTreeData::DataPart::ColumnToSize & merged_column_to_size, size_t aio_threshold, From 6cc1389356a77fb7ed0dffa3ea00e8b101698c31 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 10 Apr 2020 10:38:58 -0300 Subject: [PATCH 221/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index afd88694def..9707eee3964 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -1,17 +1,15 @@ # Настройки MergeTree таблиц {#merge-tree-settings} -Значения по умолчанию (для всех таблиц) задаются в config.xml в секции merge_tree. +Значения по умолчанию (для всех таблиц) можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в config.xml в секции merge_tree, или задать в секции `SETTINGS` у каждой таблицы. -Пример: +Пример переопределения в `config.xml`: ```text 5 ``` -Эти значения можно задать (перекрыть) у таблиц в секции `SETTINGS` у команды `CREATE TABLE`. - -Пример: +Пример для определения в `SETTINGS` у конкретной таблицы: ```sql CREATE TABLE foo ( From 123c654db384c3676558dbd396c1425714553755 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 16:41:00 +0300 Subject: [PATCH 222/752] Fix build. --- src/Processors/Merges/CollapsingSortedAlgorithm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Merges/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/CollapsingSortedAlgorithm.cpp index b1e06cd4541..3d0dab37c4e 100644 --- a/src/Processors/Merges/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/CollapsingSortedAlgorithm.cpp @@ -128,7 +128,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() /// if there are enough rows and the last one is calculated completely if (key_differs && merged_data.hasEnoughRows()) - Status(merged_data.pull()); + return Status(merged_data.pull()); if (key_differs) { From ad0c994b28e98a887c923a3dd51ebfe3aa39952c Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 10 Apr 2020 10:48:19 -0300 Subject: [PATCH 223/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 9707eee3964..628104dda2b 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -1,6 +1,6 @@ # Настройки MergeTree таблиц {#merge-tree-settings} -Значения по умолчанию (для всех таблиц) можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в config.xml в секции merge_tree, или задать в секции `SETTINGS` у каждой таблицы. +Значения настроек merge-tree для (для всех MergeTree таблиц) можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в `config.xml` в секции `merge_tree`, или задать в секции `SETTINGS` у каждой таблицы. Пример переопределения в `config.xml`: ```text @@ -20,9 +20,7 @@ ORDER BY tuple() SETTINGS max_suspicious_broken_parts = 500; ``` -Или изменить с помощью команды `ALTER TABLE ... MODIFY SETTING`. - -Пример: +Пример изменения настроек у таблицы командой `ALTER TABLE ... MODIFY SETTING`: ```sql ALTER TABLE foo MODIFY SETTING max_suspicious_broken_parts = 100; From f32ae36f223d16f62e7bb9c3d9284dfdf3b0638b Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Fri, 10 Apr 2020 10:48:53 -0300 Subject: [PATCH 224/752] Update merge_tree_settings.md --- docs/ru/operations/settings/merge_tree_settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/settings/merge_tree_settings.md b/docs/ru/operations/settings/merge_tree_settings.md index 628104dda2b..5297e359547 100644 --- a/docs/ru/operations/settings/merge_tree_settings.md +++ b/docs/ru/operations/settings/merge_tree_settings.md @@ -1,6 +1,6 @@ # Настройки MergeTree таблиц {#merge-tree-settings} -Значения настроек merge-tree для (для всех MergeTree таблиц) можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в `config.xml` в секции `merge_tree`, или задать в секции `SETTINGS` у каждой таблицы. +Значения настроек merge-tree (для всех MergeTree таблиц) можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в `config.xml` в секции `merge_tree`, или задать в секции `SETTINGS` у каждой таблицы. Пример переопределения в `config.xml`: ```text @@ -20,7 +20,7 @@ ORDER BY tuple() SETTINGS max_suspicious_broken_parts = 500; ``` -Пример изменения настроек у таблицы командой `ALTER TABLE ... MODIFY SETTING`: +Пример изменения настроек у конкретной таблицы командой `ALTER TABLE ... MODIFY SETTING`: ```sql ALTER TABLE foo MODIFY SETTING max_suspicious_broken_parts = 100; From 2034f09b2af9697f90502acb0619749499fdfe5b Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 16:50:38 +0300 Subject: [PATCH 225/752] Grammar in syntax.md --- docs/en/sql_reference/syntax.md | 52 ++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/en/sql_reference/syntax.md b/docs/en/sql_reference/syntax.md index 12a4b9df7ef..10e8c421f6d 100644 --- a/docs/en/sql_reference/syntax.md +++ b/docs/en/sql_reference/syntax.md @@ -16,11 +16,11 @@ INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). Data can have any format. When a query is received, the server calculates no more than [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. -This means the system doesn’t have problems with large `INSERT` queries, like MySQL does. +It allows for avoiding issues with large `INSERT` queries. When using the `Values` format in an `INSERT` query, it may seem that data is parsed the same as expressions in a `SELECT` query, but this is not true. The `Values` format is much more limited. -Next we will cover the full parser. For more information about format parsers, see the [Formats](../interfaces/formats.md) section. +The rest of this article covers the full parser. For more information about format parsers, see the [Formats](../interfaces/formats.md) section. ## Spaces {#spaces} @@ -28,33 +28,33 @@ There may be any number of space symbols between syntactical constructions (incl ## Comments {#comments} -SQL-style and C-style comments are supported. -SQL-style comments: from `--` to the end of the line. The space after `--` can be omitted. -Comments in C-style: from `/*` to `*/`. These comments can be multiline. Spaces are not required here, either. +ClickHouse supports either SQL-style and C-style comments. +SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted. +C-style are from `/*` to `*/`and can be multiline, spaces are not required either. ## Keywords {#syntax-keywords} Keywords are case-insensitive when they correspond to: - SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. -- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is same as `datetime`. +- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is the same as `datetime`. Whether data type name is case-sensitive can be checked in the `system.data_type_families` table. -In contrast to standard SQL all other keywords (including functions names) are **case-sensitive**. +In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -Keywords are not reserved (they are just parsed as keywords in the corresponding context). If you use [identifiers](#syntax-identifiers) the same as the keywords, enclose them into quotes. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. +Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#syntax-identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. ## Identifiers {#syntax-identifiers} Identifiers are: -- Cluster, database, table, partition and column names. +- Cluster, database, table, partition, and column names. - Functions. - Data types. - [Expression aliases](#syntax-expression_aliases). -Identifiers can be quoted or non-quoted. It is recommended to use non-quoted identifiers. +Identifiers can be quoted or non-quoted. The latter is preferred. Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.` @@ -62,34 +62,34 @@ If you want to use identifiers the same as keywords or you want to use other sym ## Literals {#literals} -There are: numeric, string, compound and `NULL` literals. +There are numeric, string, compound, and `NULL` literals. ### Numeric {#numeric} -A numeric literal tries to be parsed: +Numeric literal tries to be parsed: -- First as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. +- First, as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. - If unsuccessful, as a 64-bit unsigned number, using the [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) function. - If unsuccessful, as a floating-point number using the [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) function. -- Otherwise, an error is returned. +- Otherwise, it returns an error. -The corresponding value will have the smallest type that the value fits in. +Literal value has the smallest type that the value fits in. For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more information, see [Data types](../sql_reference/data_types/index.md). Examples: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. ### String {#syntax-string-literal} -Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. This means that you can use the sequences `\'`and`\\`. The value will have the [String](../sql_reference/data_types/string.md) type. +Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. It means that you can use the sequences `\'`and`\\`. The value will have the [String](../sql_reference/data_types/string.md) type. -The minimum set of characters that you need to escape in string literals: `'` and `\`. Single quote can be escaped with the single quote, literals `'It\'s'` and `'It''s'` are equal. +In string literals, you need to escape at least `'` and `\`. Single quotes can be escaped with the single quote, literals `'It\'s'` and `'It''s'` are equal. ### Compound {#compound} -Constructions are supported for arrays: `[1, 2, 3]` and tuples: `(1, 'Hello, world!', 2)`.. -Actually, these are not literals, but expressions with the array creation operator and the tuple creation operator, respectively. +Arrays are constructed with square brackets `[1, 2, 3]`. Nuples are constructed with round brackets `(1, 'Hello, world!', 2)`. +Technically these are not literals, but expressions with the array creation operator and the tuple creation operator, respectively. An array must consist of at least one item, and a tuple must have at least two items. -Tuples have a special purpose for use in the `IN` clause of a `SELECT` query. Tuples can be obtained as the result of a query, but they can’t be saved to a database (with the exception of [Memory](../engines/table_engines/special/memory.md) tables). +There's a separate case when tuples appear in the `IN` clause of a `SELECT` query. Query results can include tuples, but tuples can’t be saved to a database (except of tables with [Memory](../engines/table_engines/special/memory.md) engine). ### NULL {#null-literal} @@ -99,13 +99,13 @@ In order to store `NULL` in a table field, it must be of the [Nullable](../sql_r Depending on the data format (input or output), `NULL` may have a different representation. For more information, see the documentation for [data formats](../interfaces/formats.md#formats). -There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation will also be `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation. +There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation. In queries, you can check `NULL` using the [IS NULL](operators.md#operator-is-null) and [IS NOT NULL](operators.md) operators and the related functions `isNull` and `isNotNull`. ## Functions {#functions} -Functions are written like an identifier with a list of arguments (possibly empty) in brackets. In contrast to standard SQL, the brackets are required, even for an empty arguments list. Example: `now()`. +Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`. There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions. ## Operators {#operators} @@ -115,11 +115,11 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult ## Data Types and Database Table Engines {#data_types-and-database-table-engines} -Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an arguments list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”. +Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”. ## Expression Aliases {#syntax-expression_aliases} -An alias is a user-defined name for an expression in a query. +An alias is a user-defined name for expression in a query. ``` sql expr AS alias @@ -141,7 +141,7 @@ expr AS alias ### Notes on Usage {#notes-on-usage} -Aliases are global for a query or subquery and you can define an alias in any part of a query for any expression. For example, `SELECT (1 AS n) + 2, n`. +Aliases are global for a query or subquery, and you can define an alias in any part of a query for any expression. For example, `SELECT (1 AS n) + 2, n`. Aliases are not visible in subqueries and between subqueries. For example, while executing the query `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ClickHouse generates the exception `Unknown identifier: num`. @@ -182,4 +182,4 @@ An expression is a function, identifier, literal, application of an operator, ex A list of expressions is one or more expressions separated by commas. Functions and operators, in turn, can have expressions as arguments. -[Original article](https://clickhouse.tech/docs/en/query_language/syntax/) +[Original article](https://clickhouse.tech/docs/en/sql_reference/syntax/) From 9d81f896488d5f8496f598f1544d2a754617f906 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Apr 2020 16:51:09 +0300 Subject: [PATCH 226/752] Add sleep to test --- tests/queries/0_stateless/01086_odbc_roundtrip.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01086_odbc_roundtrip.sh b/tests/queries/0_stateless/01086_odbc_roundtrip.sh index 827376395d1..71ea517f4dd 100755 --- a/tests/queries/0_stateless/01086_odbc_roundtrip.sh +++ b/tests/queries/0_stateless/01086_odbc_roundtrip.sh @@ -6,6 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) for i in $(seq 1 10); do ${CLICKHOUSE_CLIENT} -q "select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (ANSI)}','system','tables'))" 2>/dev/null && break + sleep 0.1 done ${CLICKHOUSE_CLIENT} --query "select count() > 1 as ok from (select * from odbc('DSN={ClickHouse DSN (Unicode)}','system','tables'))" From 81a7b4b248ef77e5fb71502fbbeb93dcb135ed59 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 16:55:00 +0300 Subject: [PATCH 227/752] Grammar in operators.md --- docs/en/sql_reference/operators.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/en/sql_reference/operators.md b/docs/en/sql_reference/operators.md index 418a9e32771..8ae9e460d87 100644 --- a/docs/en/sql_reference/operators.md +++ b/docs/en/sql_reference/operators.md @@ -5,8 +5,7 @@ toc_title: Operators # Operators {#operators} -All operators are transformed to their corresponding functions at the query parsing stage in accordance with their precedence and associativity. -Groups of operators are listed in order of priority (the higher it is in the list, the earlier the operator is connected to its arguments). +ClickHouse transforms operators to their corresponding functions at the query parsing stage according to their priority, precedence, and associativity. ## Access Operators {#access-operators} @@ -78,7 +77,7 @@ Groups of operators are listed in order of priority (the higher it is in the lis EXTRACT(part FROM date); ``` -Extracts a part from a given date. For example, you can retrieve a month from a given date, or a second from a time. +Extract parts from a given date. For example, you can retrieve a month from a given date, or a second from a time. The `part` parameter specifies which part of the date to retrieve. The following values are available: @@ -151,7 +150,7 @@ Types of intervals: - `YEAR` !!! warning "Warning" - Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Express intervals in units that are smaller or equal the the smallest unit of the interval, for example `INTERVAL 25 HOUR`. You can use consequtive operations like in the example below. + Intervals with different types can’t be combined. You can’t use expressions like `INTERVAL 4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, `INTERVAL 25 HOUR`. You can use consecutive operations, like in the example below. Example: @@ -214,7 +213,7 @@ The `transform` function does not work with `NULL`. `x -> expr` – The `lambda(x, expr) function.` -The following operators do not have a priority, since they are brackets: +The following operators do not have a priority since they are brackets: ## Array Creation Operator {#array-creation-operator} @@ -229,7 +228,7 @@ The following operators do not have a priority, since they are brackets: All binary operators have left associativity. For example, `1 + 2 + 3` is transformed to `plus(plus(1, 2), 3)`. Sometimes this doesn’t work the way you expect. For example, `SELECT 4 > 2 > 3` will result in 0. -For efficiency, the `and` and `or` functions accept any number of arguments. The corresponding chains of `AND` and `OR` operators are transformed to a single call of these functions. +For efficiency, the `and` and `or` functions accept any number of arguments. The corresponding chains of `AND` and `OR` operators are transformed into a single call of these functions. ## Checking for `NULL` {#checking-for-null} From 0b4019becb61d63e9d20efccc8b7899e9ff778b3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 16:56:25 +0300 Subject: [PATCH 228/752] Website performance improvements (#10175) * workers moved to separate repo https://github.com/ClickHouse/clickhouse-website-worker * support prefetch tag * Prefetch docs from front page + async load of secondary images --- website/index.html | 4 ++ website/js/base.js | 6 +++ website/templates/common_meta.html | 4 ++ website/templates/index/community.html | 18 +++---- website/templates/index/efficient.html | 4 +- website/templates/index/scalable.html | 2 +- website/templates/index/why.html | 8 +-- website/workers/events.js | 34 ------------ website/workers/meet-form.js | 75 -------------------------- website/workers/play-api.js | 24 --------- website/workers/repo.js | 10 ---- 11 files changed, 30 insertions(+), 159 deletions(-) delete mode 100644 website/workers/events.js delete mode 100644 website/workers/meet-form.js delete mode 100644 website/workers/play-api.js delete mode 100644 website/workers/repo.js diff --git a/website/index.html b/website/index.html index b249fc31285..e2ac6e31441 100644 --- a/website/index.html +++ b/website/index.html @@ -1,3 +1,7 @@ +{% set prefetch_items = [ + ('/docs/en/', 'document') +] %} + {% extends "templates/base.html" %} {% block content %} diff --git a/website/js/base.js b/website/js/base.js index ae8b3c01573..4e43a44d63a 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -47,6 +47,12 @@ }, 70); } + $('img').each(function() { + var src = $(this).attr('data-src'); + if (src) { + $(this).attr('src', src); + } + }); (function (d, w, c) { (w[c] = w[c] || []).push(function() { diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 7ed5a8409ec..2aca17f93a2 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -19,3 +19,7 @@ content="{% if description %}{{ description }}{% else %}{{ _('ClickHouse is an open source distributed column-oriented database management system that allows generating analytical data reports in real time using SQL queries. Сreated by Yandex ClickHouse manages extremely large volumes of data in a stable and sustainable manner.') }}{% endif %}"/> + +{% for prefetch_item in prefetch_items %} + +{% endfor %} diff --git a/website/templates/index/community.html b/website/templates/index/community.html index e48edb311b6..47bcbd67218 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -9,7 +9,7 @@
- {{ _('ClickHouse YouTube Channel') }} @@ -21,7 +21,7 @@
- {{ _('ClickHouse Official Twitter Account') }} @@ -32,7 +32,7 @@
- {{ _('ClickHouse at Telegram') }} @@ -56,7 +56,7 @@
- ClickHouse GitHub @@ -72,7 +72,7 @@
- Blazing fast + Blazing fast

Blazing fast

@@ -15,7 +15,7 @@ processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.

- Fault tolerant + Fault tolerant

Fault-tolerant

@@ -24,14 +24,14 @@
- Easy to use + Easy to use

Easy to use

ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some DBMS.

- Highly reliable + Highly reliable

Highly reliable

diff --git a/website/workers/events.js b/website/workers/events.js deleted file mode 100644 index 653139af9f9..00000000000 --- a/website/workers/events.js +++ /dev/null @@ -1,34 +0,0 @@ -addEventListener('fetch', event => { - event.respondWith(handleRequest(event.request)) -}) - -async function handleRequest(request) { - let raw = await fetch('https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/README.md'); - let text = await raw.text(); - let lines = text.split('\n'); - let skip = true; - let events = []; - for (let idx in lines) { - let line = lines[idx]; - if (skip) { - if (line.includes('Upcoming Events')) { - skip = false; - } - } else { - if (!line) { continue; }; - line = line.split(']('); - var tail = line[1].split(') '); - events.push({ - 'signup_link': tail[0], - 'event_name': line[0].replace('* [', ''), - 'event_date': tail[1].slice(0, -1).replace('on ', '') - }); - } - } - - let response = new Response(JSON.stringify({ - 'events': events - })); - response.headers.set('Content-Type', 'application/json'); - return response; -} diff --git a/website/workers/meet-form.js b/website/workers/meet-form.js deleted file mode 100644 index 6506d59522e..00000000000 --- a/website/workers/meet-form.js +++ /dev/null @@ -1,75 +0,0 @@ - -addEventListener('fetch', event => { - event.respondWith(handleRequest(event.request)) -}) - -async function handleRequest(request) { - if (request.method != 'POST') { - return new Response('Bad request', { - status: 400, - statusText: 'Bad request' - }); - } - let url = new URL('https://api.sendgrid.com/v3/mail/send'); - let newHdrs = new Headers(); - newHdrs.set('Authorization', 'Bearer ' + SENDGRID_TOKEN); - newHdrs.set('Content-Type', 'application/json'); - let args = await request.json(); - let subject = args['name'] + ' wants to meet'; - let content = ''; - let argsKeys = Object.keys(args); - if (['name', 'email', 'city', 'company'].filter(n=>!argsKeys.includes(n)).length) { - return new Response('Bad request', { - status: 400, - statusText: 'Bad request' - }); - } - for (let key in args) { - content += key.charAt(0).toUpperCase() + key.slice(1); - content += ':\r\n' + args[key] + '\r\n\r\n'; - } - let body = { - "personalizations": [ - { - "to": [ - { - "email": "clickhouse-feedback@yandex-team.ru", - "name": "ClickHouse Core Team" - } - ], - "subject": subject - } - ], "content": [ - { - "type": "text/plain", - "value": content - } - ], "from": { - "email": "no-reply@clickhouse.tech", - "name": "ClickHouse Website" - }, "reply_to": - { - "email": "no-reply@clickhouse.tech", - "name": "ClickHouse Website" - } - }; - const init = { - body: JSON.stringify(body), - headers: newHdrs, - method: 'POST' - } - - let response = await fetch(url, init); - let status = 200; - if (response.status != 202) { - status = 200; - } - - return new Response('{}', { - status: status, - statusText: response.statusText.replace('Accepted', 'OK'), - headers: new Headers({ - 'Content-Type': 'application/json' - }) - }) -} diff --git a/website/workers/play-api.js b/website/workers/play-api.js deleted file mode 100644 index 62792d37a4d..00000000000 --- a/website/workers/play-api.js +++ /dev/null @@ -1,24 +0,0 @@ -addEventListener('fetch', event => { - event.respondWith(handleRequest(event.request)) -}) - -async function handleRequest(request) { - let url = new URL(request.url); - url.hostname = 'play-api.clickhouse.tech'; - url.port = 8443; - url.pathname = url.pathname.replace('/api/', '/'); - let newHdrs = new Headers() - - const init = { - body: request.body, - headers: request.headers, - method: request.method - } - - let response = await fetch(url, init); - - return new Response(response.body, { - status: response.status, - statusText: response.statusText - }) -} diff --git a/website/workers/repo.js b/website/workers/repo.js deleted file mode 100644 index 470391cf225..00000000000 --- a/website/workers/repo.js +++ /dev/null @@ -1,10 +0,0 @@ -addEventListener('fetch', event => { - event.respondWith(handleRequest(event.request)) -}) - -async function handleRequest(request) { - let url = new URL(request.url); - url.hostname = 'repo.yandex.ru'; - url.pathname = '/clickhouse' + url.pathname; - return fetch(url) -} From 994f95ddb0704d94ea190ac1d05588c87ba33a03 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Apr 2020 17:03:36 +0300 Subject: [PATCH 229/752] Added ReplacingSortedAlgorithm. --- ...sform.cpp => ReplacingSortedAlgorithm.cpp} | 85 ++++--------------- .../Merges/ReplacingSortedAlgorithm.h | 43 ++++++++++ .../Merges/ReplacingSortedTransform.h | 66 +++----------- 3 files changed, 72 insertions(+), 122 deletions(-) rename src/Processors/Merges/{ReplacingSortedTransform.cpp => ReplacingSortedAlgorithm.cpp} (51%) create mode 100644 src/Processors/Merges/ReplacingSortedAlgorithm.h diff --git a/src/Processors/Merges/ReplacingSortedTransform.cpp b/src/Processors/Merges/ReplacingSortedAlgorithm.cpp similarity index 51% rename from src/Processors/Merges/ReplacingSortedTransform.cpp rename to src/Processors/Merges/ReplacingSortedAlgorithm.cpp index 1b29b4842ed..d0b0947aa70 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.cpp +++ b/src/Processors/Merges/ReplacingSortedAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB @@ -6,71 +6,23 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; +extern const int LOGICAL_ERROR; } -ReplacingSortedTransform::ReplacingSortedTransform( - const Block & header, size_t num_inputs, - SortDescription description_, const String & version_column, - size_t max_block_size, - WriteBuffer * out_row_sources_buf_, - bool use_average_block_sizes) - : IMergingTransform(num_inputs, header, header, true) - , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) - , description(std::move(description_)) - , out_row_sources_buf(out_row_sources_buf_) - , chunk_allocator(num_inputs + max_row_refs) - , source_chunks(num_inputs) - , cursors(num_inputs) +ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( + const Block & header, size_t num_inputs, + SortDescription description_, const String & version_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_, + bool use_average_block_sizes) + : IMergingAlgorithmWithSharedChunks(num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) + , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) { if (!version_column.empty()) version_column_number = header.getPositionByName(version_column); } -void ReplacingSortedTransform::initializeInputs() -{ - queue = SortingHeap(cursors); - is_queue_initialized = true; -} - -void ReplacingSortedTransform::consume(Chunk chunk, size_t input_number) -{ - updateCursor(std::move(chunk), input_number); - - if (is_queue_initialized) - queue.push(cursors[input_number]); -} - -void ReplacingSortedTransform::updateCursor(Chunk chunk, size_t source_num) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); - - auto & source_chunk = source_chunks[source_num]; - - if (source_chunk) - { - source_chunk = chunk_allocator.alloc(std::move(chunk)); - cursors[source_num].reset(source_chunk->getColumns(), {}); - } - else - { - if (cursors[source_num].has_collation) - throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); - - source_chunk = chunk_allocator.alloc(std::move(chunk)); - cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); - } - - source_chunk->all_columns = cursors[source_num].all_columns; - source_chunk->sort_columns = cursors[source_num].sort_columns; -} - -void ReplacingSortedTransform::insertRow() +void ReplacingSortedAlgorithm::insertRow() { if (out_row_sources_buf) { @@ -86,13 +38,7 @@ void ReplacingSortedTransform::insertRow() selected_row.clear(); } -void ReplacingSortedTransform::work() -{ - merge(); - prepareOutputChunk(merged_data); -} - -void ReplacingSortedTransform::merge() +IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() { /// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size` while (queue.isValid()) @@ -109,7 +55,7 @@ void ReplacingSortedTransform::merge() /// if there are enough rows and the last one is calculated completely if (key_differs && merged_data.hasEnoughRows()) - return; + return Status(merged_data.pull()); if (key_differs) { @@ -143,8 +89,7 @@ void ReplacingSortedTransform::merge() { /// We get the next block from the corresponding source, if there is one. queue.removeTop(); - requestDataForInput(current.impl->order); - return; + return Status(current.impl->order); } } @@ -152,7 +97,7 @@ void ReplacingSortedTransform::merge() if (!selected_row.empty()) insertRow(); - is_finished = true; + return Status(merged_data.pull(), true); } } diff --git a/src/Processors/Merges/ReplacingSortedAlgorithm.h b/src/Processors/Merges/ReplacingSortedAlgorithm.h new file mode 100644 index 00000000000..9f3362e2e08 --- /dev/null +++ b/src/Processors/Merges/ReplacingSortedAlgorithm.h @@ -0,0 +1,43 @@ +#pragma once +#include +#include +#include + +namespace Poco +{ +class Logger; +} + +namespace DB +{ + +class ReplacingSortedAlgorithm : public IMergingAlgorithmWithSharedChunks +{ +public: + ReplacingSortedAlgorithm( + const Block & header, size_t num_inputs, + SortDescription description_, const String & version_column, + size_t max_block_size, + WriteBuffer * out_row_sources_buf_ = nullptr, + bool use_average_block_sizes = false); + + Status merge() override; + +private: + MergedData merged_data; + + ssize_t version_column_number = -1; + + using RowRef = detail::RowRefWithOwnedChunk; + static constexpr size_t max_row_refs = 3; /// last, current, selected. + RowRef last_row; + RowRef selected_row; /// Last row with maximum version for current primary key. + size_t max_pos = 0; /// The position (into current_row_sources) of the row with the highest version. + + /// Sources of rows with the current primary key. + PODArray current_row_sources; + + void insertRow(); +}; + +} diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 194e81ab6db..a6c36cedb71 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -1,13 +1,7 @@ #pragma once #include -#include -#include -#include -#include -#include - -#include +#include namespace DB @@ -17,7 +11,7 @@ namespace DB * For each group of consecutive identical values of the primary key (the columns by which the data is sorted), * keeps row with max `version` value. */ -class ReplacingSortedTransform final : public IMergingTransform +class ReplacingSortedTransform final : public IMergingTransform2 { public: ReplacingSortedTransform( @@ -25,52 +19,20 @@ public: SortDescription description_, const String & version_column, size_t max_block_size, WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false); + bool use_average_block_sizes = false) + : IMergingTransform2( + num_inputs, header, header, true, + header, + num_inputs, + std::move(description_), + version_column, + max_block_size, + out_row_sources_buf_, + use_average_block_sizes) + { + } String getName() const override { return "ReplacingSorted"; } - void work() override; - -protected: - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; - -private: - Logger * log = &Logger::get("ReplacingSortedTransform"); - - MergedData merged_data; - - SortDescription description; - ssize_t version_column_number = -1; - - /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) - /// If it is not nullptr then it should be populated during execution - WriteBuffer * out_row_sources_buf = nullptr; - - /// Allocator must be destroyed after all RowRefs. - detail::SharedChunkAllocator chunk_allocator; - - /// Chunks currently being merged. - using SourceChunks = std::vector; - SourceChunks source_chunks; - SortCursorImpls cursors; - - SortingHeap queue; - bool is_queue_initialized = false; - - using RowRef = detail::RowRefWithOwnedChunk; - static constexpr size_t max_row_refs = 3; /// last, current, selected. - RowRef last_row; - /// RowRef next_key; /// Primary key of next row. - RowRef selected_row; /// Last row with maximum version for current primary key. - size_t max_pos = 0; /// The position (into current_row_sources) of the row with the highest version. - - /// Sources of rows with the current primary key. - PODArray current_row_sources; - - void insertRow(); - void merge(); - void updateCursor(Chunk chunk, size_t source_num); - void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } }; } From 27eaea184a54d27f3724b533b77e8a9180560add Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Apr 2020 17:03:42 +0300 Subject: [PATCH 230/752] Add small timeout --- tests/queries/0_stateless/00646_url_engine.python | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index 1b41216b198..494eb12b0ef 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -2,6 +2,7 @@ from __future__ import print_function import csv import sys +import time import tempfile import threading import os, urllib @@ -187,6 +188,7 @@ if __name__ == "__main__": break except Exception as ex: exception_text = str(ex) + time.sleep(0.1) if exception_text: print("Exception: {}".format(exception_text), file=sys.stderr) From 5e860ddb0482deb97147934190bb7cd4e4ecb884 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 10 Apr 2020 17:09:47 +0300 Subject: [PATCH 231/752] fix 01098_temporary_and_external_tables --- .../0_stateless/01098_temporary_and_external_tables.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh index f8b9862c1c1..c984f363c31 100755 --- a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh +++ b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh @@ -5,12 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) url="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/?session_id=test_01098" -${CLICKHOUSE_CURL} -sSk "$url" --data "CREATE TEMPORARY TABLE tmp_table AS SELECT number AS n FROM numbers(42)" > /dev/null; +${CLICKHOUSE_CURL} -m 30 -sSk "$url" --data "CREATE TEMPORARY TABLE tmp_table AS SELECT number AS n FROM numbers(42)" > /dev/null; name_expr="'\`' || database || '\`.\`' || name || '\`'" -full_tmp_name=`echo "SELECT $name_expr FROM system.tables WHERE database='_temporary_and_external_tables' AND create_table_query LIKE '%tmp_table%'" | ${CLICKHOUSE_CURL} -sSgk $url -d @-` +full_tmp_name=`echo "SELECT $name_expr FROM system.tables WHERE database='_temporary_and_external_tables' AND create_table_query LIKE '%tmp_table%'" | ${CLICKHOUSE_CURL} -m 30 -sSgk $url -d @-` -echo "SELECT * FROM $full_tmp_name" | ${CLICKHOUSE_CURL} -sSgk $url -d @- | grep -F "Code: 291" > /dev/null && echo "OK" +echo "SELECT * FROM $full_tmp_name" | ${CLICKHOUSE_CURL} -m 60 -sSgk $url -d @- | grep -F "Code: 291" > /dev/null && echo "OK" -echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sum((number+GLOBAL+IN+(SELECT+number+AS+n+FROM+remote('127.0.0.2',+numbers(5))+WHERE+n+GLOBAL+IN+(SELECT+*+FROM+tmp_table)+AND+n+GLOBAL+NOT+IN+(SELECT+*+FROM+file)+))+AS+res),+sum(number*res)+FROM+remote('127.0.0.2',+numbers(10))"; +echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sum((number+GLOBAL+IN+(SELECT+number+AS+n+FROM+remote('127.0.0.2',+numbers(5))+WHERE+n+GLOBAL+IN+(SELECT+*+FROM+tmp_table)+AND+n+GLOBAL+NOT+IN+(SELECT+*+FROM+file)+))+AS+res),+sum(number*res)+FROM+remote('127.0.0.2',+numbers(10))"; From d2237c3ab850c4aeb924cf7ed7c061acac7d5bfa Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Apr 2020 17:17:32 +0300 Subject: [PATCH 232/752] Update msgpack.cmake --- cmake/find/msgpack.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake index 0b56bbc1a0d..093555bebc0 100644 --- a/cmake/find/msgpack.cmake +++ b/cmake/find/msgpack.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library if (USE_INTERNAL_MSGPACK_LIBRARY) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include/msgpack.hpp") - message(WARNING "submodule contrib/msgpack-c is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init --recursive") set(USE_INTERNAL_MSGPACK_LIBRARY 0) set(MISSING_INTERNAL_MSGPACK_LIBRARY 1) endif() From 7177bc4cbf10524f8d9fa630db4ca113e6232ad2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 17:40:55 +0300 Subject: [PATCH 233/752] fix to_url with version_prefix --- docs/tools/build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index 1719fe051d3..fb3dba8a529 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -307,7 +307,7 @@ def write_redirect_html(out_path, to_url): Page Redirection @@ -320,7 +320,7 @@ def write_redirect_html(out_path, to_url): def build_redirect_html(args, from_path, to_path): for lang in ['en', 'es', 'fr', 'ja', 'fa']: # TODO: args.lang.split(','): out_path = os.path.join(args.docs_output_dir, lang, from_path.replace('.md', '/index.html')) - version_prefix = args.version_prefix + '/' if args.version_prefix else '/' + version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/' target_path = to_path.replace('.md', '/') to_url = f'/docs{version_prefix}{lang}/{target_path}' to_url = to_url.strip() From 770bc149df7f85107020aad11f2ed59c65ae6859 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 17:54:20 +0300 Subject: [PATCH 234/752] Extra mark for redirect template page --- docs/tools/build.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index fb3dba8a529..7508a072acb 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -301,7 +301,8 @@ def write_redirect_html(out_path, to_url): except OSError: pass with open(out_path, 'w') as f: - f.write(f''' + f.write(f''' + From ca1aba62b3706c78f819ef0583318b6ec7003582 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 17:56:09 +0300 Subject: [PATCH 235/752] Do not minify redirects --- docs/tools/website.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tools/website.py b/docs/tools/website.py index 9704cf7d5a4..83eef270fc5 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -155,7 +155,8 @@ def minify_website(args): with open(path, 'rb') as f: content = f.read().decode('utf-8') if filename.endswith('.html'): - content = htmlmin.minify(content, remove_empty_space=False) + if not content.startswith(' From 0ecfede695638a9565eb6815bf1772d7ea5445bc Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 21:36:55 +0300 Subject: [PATCH 255/752] Update index.md --- docs/en/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/index.md b/docs/en/index.md index 5d40ae9177c..e38a89abd49 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -1,5 +1,5 @@ --- -toc_priority: 3 +toc_priority: 0 toc_title: Overview --- From 344be40068f5aa19b5b49bdbcf16bcdf964fe7ce Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 22:22:15 +0300 Subject: [PATCH 256/752] Re-attach index.md to better place in nav + hide empty articles (#10182) --- docs/en/introduction/distinctive_features.md | 6 ++++++ .../features_considered_disadvantages.md | 12 ------------ docs/en/sql_reference/statements/index.md | 1 - docs/es/introduction/distinctive_features.md | 6 ++++++ .../features_considered_disadvantages.md | 14 -------------- docs/fa/introduction/distinctive_features.md | 6 ++++++ .../features_considered_disadvantages.md | 11 ----------- docs/fr/introduction/distinctive_features.md | 6 ++++++ .../features_considered_disadvantages.md | 15 --------------- docs/ja/introduction/distinctive_features.md | 6 ++++++ .../features_considered_disadvantages.md | 12 ------------ docs/redirects.txt | 1 + docs/ru/introduction/distinctive_features.md | 7 +++++++ .../features_considered_disadvantages.md | 8 -------- docs/tools/nav.py | 10 ++++++++++ docs/zh/introduction/distinctive_features.md | 6 ++++++ .../features_considered_disadvantages.md | 8 -------- 17 files changed, 54 insertions(+), 81 deletions(-) delete mode 100644 docs/en/introduction/features_considered_disadvantages.md delete mode 100644 docs/es/introduction/features_considered_disadvantages.md delete mode 100644 docs/fa/introduction/features_considered_disadvantages.md delete mode 100644 docs/fr/introduction/features_considered_disadvantages.md delete mode 100644 docs/ja/introduction/features_considered_disadvantages.md delete mode 100644 docs/ru/introduction/features_considered_disadvantages.md delete mode 100644 docs/zh/introduction/features_considered_disadvantages.md diff --git a/docs/en/introduction/distinctive_features.md b/docs/en/introduction/distinctive_features.md index 31770b0bf79..1244e80a253 100644 --- a/docs/en/introduction/distinctive_features.md +++ b/docs/en/introduction/distinctive_features.md @@ -66,4 +66,10 @@ ClickHouse uses asynchronous multi-master replication. After being written to an For more information, see the section [Data replication](../engines/table_engines/mergetree_family/replication.md). +## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} + +1. No full-fledged transactions. +2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu). +3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys. + [Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/en/introduction/features_considered_disadvantages.md b/docs/en/introduction/features_considered_disadvantages.md deleted file mode 100644 index e295b5570ab..00000000000 --- a/docs/en/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -toc_priority: 5 -toc_title: ClickHouse Features that Can Be Considered Disadvantages ---- - -# ClickHouse Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} - -1. No full-fledged transactions. -2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu). -3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys. - -[Original article](https://clickhouse.tech/docs/en/introduction/features_considered_disadvantages/) diff --git a/docs/en/sql_reference/statements/index.md b/docs/en/sql_reference/statements/index.md index 1adf93a153e..507d858c14a 100644 --- a/docs/en/sql_reference/statements/index.md +++ b/docs/en/sql_reference/statements/index.md @@ -2,4 +2,3 @@ toc_folder_title: Statements toc_priority: 31 --- - diff --git a/docs/es/introduction/distinctive_features.md b/docs/es/introduction/distinctive_features.md index 5117fcf9324..82b757341be 100644 --- a/docs/es/introduction/distinctive_features.md +++ b/docs/es/introduction/distinctive_features.md @@ -68,4 +68,10 @@ ClickHouse utiliza la replicación multi-maestro asincrónica. Después de escri Para obtener más información, consulte la sección [Replicación de datos](../engines/table_engines/mergetree_family/replication.md). +## Características que pueden considerarse desventajas {#clickhouse-features-that-can-be-considered-disadvantages} + +1. No hay transacciones completas. +2. Falta de capacidad para modificar o eliminar datos ya insertados con alta tasa y baja latencia. Hay eliminaciones y actualizaciones por lotes disponibles para limpiar o modificar datos, por ejemplo, para cumplir con [GDPR](https://gdpr-info.eu). +3. El índice disperso hace que ClickHouse no sea tan adecuado para consultas de puntos que recuperan filas individuales por sus claves. + [Artículo Original](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/es/introduction/features_considered_disadvantages.md b/docs/es/introduction/features_considered_disadvantages.md deleted file mode 100644 index 60eabad3102..00000000000 --- a/docs/es/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 3e185d24c9fe772c7cf03d5475247fb829a21dfa -toc_priority: 5 -toc_title: "Caracter\xEDsticas de ClickHouse que pueden considerarse desventajas" ---- - -# Características de ClickHouse que pueden considerarse desventajas {#clickhouse-features-that-can-be-considered-disadvantages} - -1. No hay transacciones completas. -2. Falta de capacidad para modificar o eliminar datos ya insertados con alta tasa y baja latencia. Hay eliminaciones y actualizaciones por lotes disponibles para limpiar o modificar datos, por ejemplo, para cumplir con [GDPR](https://gdpr-info.eu). -3. El índice disperso hace que ClickHouse no sea tan adecuado para consultas de puntos que recuperan filas individuales por sus claves. - -[Artículo Original](https://clickhouse.tech/docs/en/introduction/features_considered_disadvantages/) diff --git a/docs/fa/introduction/distinctive_features.md b/docs/fa/introduction/distinctive_features.md index a4313168796..71a8f3eb543 100644 --- a/docs/fa/introduction/distinctive_features.md +++ b/docs/fa/introduction/distinctive_features.md @@ -62,6 +62,12 @@ ClickHouse از روش asynchronous multimaster replication استفاده می برای اطلاعات بیشتر، به بخش [replication داده ها](../engines/table_engines/mergetree_family/replication.md) مراجعه کنید. +## ویژگی های از ClickHouse که می تواند معایبی باشد. {#wyjgy-hy-z-clickhouse-khh-my-twnd-m-yby-bshd} + +1. بدون پشتیبانی کامل از تراکنش +2. عدم توانایی برای تغییر و یا حذف داده های در حال حاضر وارد شده با سرعت بالا و تاخیر کم. برای پاک کردن و یا اصلاح داده ها، به عنوان مثال برای پیروی از [GDPR](https://gdpr-info.eu)، دسته ای پاک و به روزرسانی وجود دارد.حال توسعه می باشد. +3. Sparse index باعث می شود ClickHouse چندان مناسب اجرای پرسمان های point query برای دریافت یک ردیف از داده ها با استفاده از کلید آنها نباشد. +
[مقاله اصلی](https://clickhouse.tech/docs/fa/introduction/distinctive_features/) diff --git a/docs/fa/introduction/features_considered_disadvantages.md b/docs/fa/introduction/features_considered_disadvantages.md deleted file mode 100644 index 5a8ea156da5..00000000000 --- a/docs/fa/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,11 +0,0 @@ -
- -# ویژگی های از ClickHouse که می تواند معایبی باشد. {#wyjgy-hy-z-clickhouse-khh-my-twnd-m-yby-bshd} - -1. بدون پشتیبانی کامل از تراکنش -2. عدم توانایی برای تغییر و یا حذف داده های در حال حاضر وارد شده با سرعت بالا و تاخیر کم. برای پاک کردن و یا اصلاح داده ها، به عنوان مثال برای پیروی از [GDPR](https://gdpr-info.eu)، دسته ای پاک و به روزرسانی وجود دارد.حال توسعه می باشد. -3. Sparse index باعث می شود ClickHouse چندان مناسب اجرای پرسمان های point query برای دریافت یک ردیف از داده ها با استفاده از کلید آنها نباشد. - -
- -[مقاله اصلی](https://clickhouse.tech/docs/fa/introduction/features_considered_disadvantages/) diff --git a/docs/fr/introduction/distinctive_features.md b/docs/fr/introduction/distinctive_features.md index dcea4046fcd..2c825cac85a 100644 --- a/docs/fr/introduction/distinctive_features.md +++ b/docs/fr/introduction/distinctive_features.md @@ -68,4 +68,10 @@ ClickHouse utilise la réplication multi-maître asynchrone. Après avoir été Pour plus d'informations, consultez la section [Réplication des données](../engines/table_engines/mergetree_family/replication.md). +## Caractéristiques de ClickHouse qui peuvent être considérées comme des inconvénients {#clickhouse-features-that-can-be-considered-disadvantages} + +1. Pas de transactions à part entière. +2. Manque de capacité à modifier ou supprimer des données déjà insérées avec un taux élevé et une faible latence. Des suppressions et des mises à jour par lots sont disponibles pour nettoyer ou modifier les données, par exemple pour [GDPR](https://gdpr-info.eu). +3. L'index clairsemé rend ClickHouse pas si approprié pour les requêtes ponctuelles récupérant des lignes simples par leurs clés. + [Article Original](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/fr/introduction/features_considered_disadvantages.md b/docs/fr/introduction/features_considered_disadvantages.md deleted file mode 100644 index dc9fe708fef..00000000000 --- a/docs/fr/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: f865c9653f9df092694258e0ccdd733c339112f5 -toc_priority: 5 -toc_title: "Caract\xE9ristiques de ClickHouse qui peuvent \xEAtre consid\xE9r\xE9\ - es comme des inconv\xE9nients" ---- - -# Caractéristiques de ClickHouse qui peuvent être considérées comme des inconvénients {#clickhouse-features-that-can-be-considered-disadvantages} - -1. Pas de transactions à part entière. -2. Manque de capacité à modifier ou supprimer des données déjà insérées avec un taux élevé et une faible latence. Des suppressions et des mises à jour par lots sont disponibles pour nettoyer ou modifier les données, par exemple pour [GDPR](https://gdpr-info.eu). -3. L'index clairsemé rend ClickHouse pas si approprié pour les requêtes ponctuelles récupérant des lignes simples par leurs clés. - -[Article Original](https://clickhouse.tech/docs/en/introduction/features_considered_disadvantages/) diff --git a/docs/ja/introduction/distinctive_features.md b/docs/ja/introduction/distinctive_features.md index 6cd0834708c..5c4b91759dc 100644 --- a/docs/ja/introduction/distinctive_features.md +++ b/docs/ja/introduction/distinctive_features.md @@ -63,4 +63,10 @@ ClickHouseには、精度を犠牲にしてパフォーマンスを得るため 詳細については、[データ複製](../engines/table_engines/mergetree_family/replication.md) セクションを参照してください。 +## 欠点と考えられるClickHouseの機能 {#qian-dian-tokao-erareruclickhousenoji-neng} + +1. 本格的なトランザクションはありません。 +2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 +3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 + [Original article](https://clickhouse.yandex/docs/en/introduction/distinctive_features/) diff --git a/docs/ja/introduction/features_considered_disadvantages.md b/docs/ja/introduction/features_considered_disadvantages.md deleted file mode 100644 index 8c766e06fe8..00000000000 --- a/docs/ja/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -toc_priority: 5 -toc_title: 欠点と見なすことができるClickHouseの機能 ---- - -# 欠点と考えられるClickHouseの機能 {#qian-dian-tokao-erareruclickhousenoji-neng} - -1. 本格的なトランザクションはありません。 -2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 -3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 - -[Original article](https://clickhouse.yandex/docs/en/introduction/features_considered_disadvantages/) diff --git a/docs/redirects.txt b/docs/redirects.txt index c0f3c81813f..95df6773359 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -98,6 +98,7 @@ functions/ym_dict_functions.md query_language/functions/ym_dict_functions.md interfaces/http_interface.md interfaces/http.md interfaces/third-party_client_libraries.md interfaces/third-party/client_libraries.md interfaces/third-party_gui.md interfaces/third-party/gui.md +introduction/features_considered_disadvantages.md introduction/distinctive_features.md introduction/possible_silly_questions.md faq/general.md introduction/ya_metrika_task.md introduction/history.md operations/performance/sampling_query_profiler.md operations/optimizing_performance/sampling_query_profiler.md diff --git a/docs/ru/introduction/distinctive_features.md b/docs/ru/introduction/distinctive_features.md index 0cc40e4e162..079a0667070 100644 --- a/docs/ru/introduction/distinctive_features.md +++ b/docs/ru/introduction/distinctive_features.md @@ -61,4 +61,11 @@ ClickHouse предоставляет различные способы разм Подробнее смотрите раздел [Репликация данных](../engines/table_engines/mergetree_family/replication.md). +## Особенности, которые могут считаться недостатками {#osobennosti-clickhouse-kotorye-mogut-schitatsia-nedostatkami} + +1. Отсутствие полноценных транзакций. +2. Возможность изменять или удалять ранее записанные данные с низкими задержками и высокой частотой запросов не предоставляется. Есть массовое удаление и изменение данных для очистки более не нужного или соответствия [GDPR](https://gdpr-info.eu). +3. Разреженный индекс делает ClickHouse плохо пригодным для точечных чтений одиночных строк по своим + ключам. + [Оригинальная статья](https://clickhouse.tech/docs/ru/introduction/distinctive_features/) diff --git a/docs/ru/introduction/features_considered_disadvantages.md b/docs/ru/introduction/features_considered_disadvantages.md deleted file mode 100644 index 0cd4838d908..00000000000 --- a/docs/ru/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,8 +0,0 @@ -# Особенности ClickHouse, которые могут считаться недостатками {#osobennosti-clickhouse-kotorye-mogut-schitatsia-nedostatkami} - -1. Отсутствие полноценных транзакций. -2. Возможность изменять или удалять ранее записанные данные с низкими задержками и высокой частотой запросов не предоставляется. Есть массовое удаление и изменение данных для очистки более не нужного или соответствия [GDPR](https://gdpr-info.eu). -3. Разреженный индекс делает ClickHouse плохо пригодным для точечных чтений одиночных строк по своим - ключам. - -[Оригинальная статья](https://clickhouse.tech/docs/ru/introduction/features_considered_disadvantages/) diff --git a/docs/tools/nav.py b/docs/tools/nav.py index 56d47d58d07..fe11b21d8e7 100644 --- a/docs/tools/nav.py +++ b/docs/tools/nav.py @@ -35,6 +35,8 @@ def build_nav_entry(root): title = meta.get('toc_folder_title', 'hidden') prio = meta.get('toc_priority', 9999) logging.debug(f'Nav entry: {prio}, {title}, {path}') + if not content.strip(): + title = 'hidden' result_items.append((prio, title, path)) result_items = sorted(result_items, key=lambda x: (x[0], x[1])) result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) @@ -45,8 +47,16 @@ def build_nav(lang, args): docs_dir = os.path.join(args.docs_dir, lang) _, _, nav = build_nav_entry(docs_dir) result = [] + index_key = None for key, value in nav.items(): if key and value: + if value == 'index.md': + index_key = key + continue result.append({key: value}) + if index_key: + key = list(result[0].keys())[0] + result[0][key][index_key] = 'index.md' + result[0][key].move_to_end(index_key, last=False) print('result', result) return result diff --git a/docs/zh/introduction/distinctive_features.md b/docs/zh/introduction/distinctive_features.md index 3b1e7a8c716..a267a49bf8a 100644 --- a/docs/zh/introduction/distinctive_features.md +++ b/docs/zh/introduction/distinctive_features.md @@ -62,4 +62,10 @@ ClickHouse使用异步的多主复制技术。当数据被写入任何一个可 更多信息,参见 [数据复制](../engines/table_engines/mergetree_family/replication.md)。 +# 的限制 {#clickhouseke-yi-ren-wei-shi-que-dian-de-gong-neng} + +1. 没有完整的事务支持。 +2. 缺少高频率,低延迟的修改或删除已存在数据的能力。仅能用于批量删除或修改数据,但这符合 [GDPR](https://gdpr-info.eu)。 +3. 稀疏索引使得ClickHouse不适合通过其键检索单行的点查询。 + [来源文章](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/zh/introduction/features_considered_disadvantages.md b/docs/zh/introduction/features_considered_disadvantages.md deleted file mode 100644 index efc967e90ac..00000000000 --- a/docs/zh/introduction/features_considered_disadvantages.md +++ /dev/null @@ -1,8 +0,0 @@ - -# ClickHouse的限制 {#clickhouseke-yi-ren-wei-shi-que-dian-de-gong-neng} - -1. 没有完整的事务支持。 -2. 缺少高频率,低延迟的修改或删除已存在数据的能力。仅能用于批量删除或修改数据,但这符合 [GDPR](https://gdpr-info.eu)。 -3. 稀疏索引使得ClickHouse不适合通过其键检索单行的点查询。 - -[来源文章](https://clickhouse.tech/docs/zh/introduction/features_considered_disadvantages/) From 2f5b4b0f9b3d3738291f190da11c5d43d5c36b21 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 10 Apr 2020 22:01:10 +0200 Subject: [PATCH 257/752] Added ability to relax the restriction on non-deterministic functions usage in mutations with allow_nondeterministic_mutations setting. --- src/Core/Settings.h | 2 +- src/Interpreters/MutationsInterpreter.cpp | 4 ++- ...eterministic_functions_zookeeper.reference | 3 ++ ...th_nondeterministic_functions_zookeeper.sh | 28 +++++++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 753231603b2..31b8bd6ab02 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -418,7 +418,7 @@ struct Settings : public SettingsCollection M(SettingBool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \ M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \ M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ - + M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 669b72c6317..df0267b9450 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -661,9 +661,11 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve void MutationsInterpreter::validate(TableStructureReadLockHolder &) { + const Settings & settings = context.getSettingsRef(); + /// For Replicated* storages mutations cannot employ non-deterministic functions /// because that produces inconsistencies between replicas - if (startsWith(storage->getName(), "Replicated")) + if (startsWith(storage->getName(), "Replicated") && !settings.allow_nondeterministic_mutations) { for (const auto & command : commands) { diff --git a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference index b462a5a7baa..f799e8ed8f0 100644 --- a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference +++ b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference @@ -2,3 +2,6 @@ OK OK OK OK +OK +OK +OK diff --git a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh index ac66dbc352a..9b190855adf 100755 --- a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh +++ b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh @@ -11,6 +11,22 @@ T1=table_1017_merge ${CLICKHOUSE_CLIENT} -n -q " DROP TABLE IF EXISTS $R1; DROP TABLE IF EXISTS $R2; + DROP TABLE IF EXISTS $T1; + + DROP TABLE IF EXISTS lookup_table; + DROP TABLE IF EXISTS table_for_dict; + DROP DICTIONARY IF EXISTS dict1; + + CREATE TABLE table_for_dict (y UInt64, y_new UInt32) ENGINE = Log; + INSERT INTO table_for_dict VALUES (3, 3003),(4,4004); + + CREATE DICTIONARY dict1( y UInt64 DEFAULT 0, y_new UInt32 DEFAULT 0 ) PRIMARY KEY y + SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB '${CLICKHOUSE_DATABASE}')) + LIFETIME(MIN 1 MAX 10) + LAYOUT(FLAT()); + + CREATE TABLE lookup_table (y UInt32, y_new UInt32) ENGINE = Join(ANY, LEFT, y); + INSERT INTO lookup_table VALUES(1,1001),(2,1002); CREATE TABLE $R1 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r1') ORDER BY x; CREATE TABLE $R2 (x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/${CLICKHOUSE_DATABASE}.table_1017', 'r2') ORDER BY x; @@ -35,9 +51,21 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 DELETE WHERE rand() = 0" 2>&1 > /d ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 > /dev/null \ && echo 'OK' || echo 'FAIL' +# hm... it looks like joinGet condidered determenistic +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = joinGet('${CLICKHOUSE_DATABASE}.lookup_table', 'y_new', y) WHERE x=1" 2>&1 \ +| echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE dictHas('${CLICKHOUSE_DATABASE}.dict1', toUInt64(x))" 2>&1 \ +| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE dictHas('${CLICKHOUSE_DATABASE}.dict1', toUInt64(x))" --allow_nondeterministic_mutations=1 2>&1 \ +&& echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} -n -q " DROP TABLE IF EXISTS $R2; DROP TABLE IF EXISTS $R1; DROP TABLE IF EXISTS $T1; + DROP TABLE IF EXISTS lookup_table; + DROP TABLE IF EXISTS table_for_dict; + DROP DICTIONARY IF EXISTS dict1; " From 2122eab14eda10dc3bb8f68192bad3da47ecf51e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 10 Apr 2020 23:02:36 +0300 Subject: [PATCH 258/752] Update adopters.md --- docs/ru/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md index 20c465f6418..5f8b825353c 100644 --- a/docs/ru/introduction/adopters.md +++ b/docs/ru/introduction/adopters.md @@ -3,7 +3,7 @@ machine_translated: true machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 --- -# Усыновители ClickHouse {#clickhouse-adopters} +# Пользователи ClickHouse {#clickhouse-adopters} !!! warning "Оговорка" Следующий список компаний, использующих ClickHouse, и их истории успеха собраны из открытых источников, поэтому они могут отличаться от текущей реальности. Мы были бы очень признательны, если бы вы поделились историей принятия ClickHouse в свою компанию и [добавьте его в список](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), но, пожалуйста, убедитесь, что у вас не будет никаких проблем с NDA, сделав это. Предоставление обновлений с публикациями от других компаний также полезно. From 330d13810666b1044b99f6e3916bc61ecade8c7d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 10 Apr 2020 23:58:13 +0300 Subject: [PATCH 259/752] Update Settings.h --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 31b8bd6ab02..bb2b0a20b81 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -406,6 +406,7 @@ struct Settings : public SettingsCollection M(SettingBool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(SettingUInt64, max_parser_depth, 1000, "Maximum parser depth.", 0) \ M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ + M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ @@ -418,7 +419,6 @@ struct Settings : public SettingsCollection M(SettingBool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \ M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \ M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ - M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) From 9208847a85a6db66ead9983a33d864f7164c2668 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 11 Apr 2020 00:07:08 +0300 Subject: [PATCH 260/752] Update submodule libc-headers to a new version. --- contrib/libc-headers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libc-headers b/contrib/libc-headers index 9676d2645a7..92c74f938cf 160000 --- a/contrib/libc-headers +++ b/contrib/libc-headers @@ -1 +1 @@ -Subproject commit 9676d2645a713e679dc981ffd84dee99fcd68b8e +Subproject commit 92c74f938cf2c4dd529cae4f3d2923d153b029a7 From a8d72d01c3c70855480e87d910434b4486a47e12 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Sat, 11 Apr 2020 00:29:54 +0300 Subject: [PATCH 261/752] mvp fix --- src/Interpreters/InterserverIOHandler.h | 8 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 +- .../MergeTree/MergeTreeDataMergerMutator.h | 8 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 34 +++ .../MergeTree/ReplicatedMergeTreeQueue.h | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 163 ++++++++++- src/Storages/StorageReplicatedMergeTree.h | 8 + .../test_polymorphic_parts/test.py | 2 + .../configs/conf.d/clusters.xml | 37 --- .../configs/conf.d/ddl.xml | 5 - .../configs/users.d/settings.xml | 12 + .../test_quorum_inserts/configs/users.xml | 27 -- tests/integration/test_quorum_inserts/test.py | 266 +++++++++++------- 13 files changed, 393 insertions(+), 185 deletions(-) delete mode 100644 tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml delete mode 100644 tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml create mode 100644 tests/integration/test_quorum_inserts/configs/users.d/settings.xml delete mode 100644 tests/integration/test_quorum_inserts/configs/users.xml diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index 0e85f30c886..0ffccb6c33c 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -34,7 +34,7 @@ class InterserverIOEndpoint public: virtual std::string getId(const std::string & path) const = 0; virtual void processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body, WriteBuffer & out, Poco::Net::HTTPServerResponse & response) = 0; - virtual ~InterserverIOEndpoint() {} + virtual ~InterserverIOEndpoint() = default; /// You need to stop the data transfer if blocker is activated. ActionBlocker blocker; @@ -53,8 +53,6 @@ public: void addEndpoint(const String & name, InterserverIOEndpointPtr endpoint) { std::lock_guard lock(mutex); - LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime addEndpoint() " << name); - LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), StackTrace().toString()); bool inserted = endpoint_map.try_emplace(name, std::move(endpoint)).second; if (!inserted) throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); @@ -63,8 +61,6 @@ public: bool removeEndpointIfExists(const String & name) { std::lock_guard lock(mutex); - LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime removeEndpointIfExists() " << name); - LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), StackTrace().toString()); return endpoint_map.erase(name); } @@ -72,8 +68,6 @@ public: try { std::lock_guard lock(mutex); - LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), "anime getEndpoint() " << name); - LOG_FATAL(&Poco::Logger::get("InterserverIOHandler"), StackTrace().toString()); return endpoint_map.at(name); } catch (...) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2b732d879b0..f892302086d 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -204,7 +204,8 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( bool aggressive, size_t max_total_size_to_merge, const AllowedMergingPredicate & can_merge_callback, - String * out_disable_reason) + String * out_disable_reason, + const AllowedSingleMergePredicate & single_merge) { MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector(); const auto data_settings = data.getSettings(); @@ -225,6 +226,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( bool has_part_with_expired_ttl = false; for (const MergeTreeData::DataPartPtr & part : data_parts) { + if (!single_merge(part, nullptr)) + continue; + const String & partition_id = part->info.partition_id; if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part, nullptr))) { diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index b24b56a4780..0efdb403bf0 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -49,7 +49,10 @@ struct FutureMergedMutatedPart class MergeTreeDataMergerMutator { public: - using AllowedMergingPredicate = std::function; + using AllowedMergingPredicate = std::function; + using AllowedSingleMergePredicate = std::function; +// template +// using AllowedMergingPredicate = std::function; public: MergeTreeDataMergerMutator(MergeTreeData & data_, size_t background_pool_size); @@ -81,7 +84,8 @@ public: bool aggressive, size_t max_total_size_to_merge, const AllowedMergingPredicate & can_merge, - String * out_disable_reason = nullptr); + String * out_disable_reason = nullptr, + const AllowedSingleMergePredicate & single_merge = [](const MergeTreeData::DataPartPtr &, String *) -> bool { return true; }); /** Select all the parts in the specified partition for merge, if possible. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 796664d889c..3e9f476e5ec 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1781,6 +1781,40 @@ bool ReplicatedMergeTreeMergePredicate::operator()( return true; } +bool ReplicatedMergeTreeMergePredicate::canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const +{ + LOG_FATAL(&Poco::Logger::get("ReplicatedMergeTreeMergePredicate::operator()"), "begin"); + + if (part->name == inprogress_quorum_part) + { + LOG_FATAL(&Poco::Logger::get("ReplicatedMergeTreeMergePredicate"), "operator()"); + if (out_reason) + *out_reason = "Quorum insert for part " + part->name + " is currently in progress"; + return false; + } + + if (prev_virtual_parts.getContainingPart(part->info).empty()) + { + if (out_reason) + *out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; + return false; + } + + std::lock_guard lock(queue.state_mutex); + + /// We look for containing parts in queue.virtual_parts (and not in prev_virtual_parts) because queue.virtual_parts is newer + /// and it is guaranteed that it will contain all merges assigned before this object is constructed. + String containing_part = queue.virtual_parts.getContainingPart(part->info); + if (containing_part != part->name) + { + if (out_reason) + *out_reason = "Part " + part->name >+ " has already been assigned a merge into " + containing_part; + return false; + } + + return true; +} + std::optional> ReplicatedMergeTreeMergePredicate::getDesiredMutationVersion(const MergeTreeData::DataPartPtr & part) const { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index fcb3dfb4b86..e31c0eb7ab1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -412,6 +412,8 @@ public: const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String * out_reason = nullptr) const; + bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const; + /// Return nonempty optional of desired mutation version and alter version. /// If we have no alter (modify/drop) mutations in mutations queue, than we return biggest possible /// mutation version (and -1 as alter version). In other case, we return biggest mutation version with diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7107328e4ff..ff4d5df03b9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2052,6 +2052,8 @@ void StorageReplicatedMergeTree::mutationsUpdatingTask() BackgroundProcessingPoolTaskResult StorageReplicatedMergeTree::queueTask() { + LOG_FATAL(&Poco::Logger::get("queueTask()"), "begin"); + /// If replication queue is stopped exit immediately as we successfully executed the task if (queue.actions_blocker.isCancelled()) { @@ -2189,8 +2191,14 @@ void StorageReplicatedMergeTree::mergeSelectingTask() FutureMergedMutatedPart future_merged_part; if (max_source_parts_size_for_merge > 0 && - merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred)) + merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, nullptr, + [&merge_pred](const MergeTreeData::DataPartPtr & part, String * explain) -> bool { return merge_pred.canMergeSinglePart(part, explain); })) { + /// We have to exclude parts, that are currently being writted with quorum. (check .../quorum/status node) + /// Also we have to exclude last parts written with quorum (check .../quorum/last_part node) +// if (global_context.getSettingsRef().insert_quorum) +// excludeSomePartsFromMerge(future_merged_part); + success = createLogEntryToMergeParts(zookeeper, future_merged_part.parts, future_merged_part.name, future_merged_part.type, deduplicate, force_ttl); } @@ -2575,6 +2583,7 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart( */ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) { + LOG_FATAL(&Poco::Logger::get("updateQuorum"), "BEGIN!"); auto zookeeper = getZooKeeper(); /// Information on which replicas a part has been added, if the quorum has not yet been reached. @@ -2667,6 +2676,140 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) } +void StorageReplicatedMergeTree::deletePartFromPendingQuorum(const String & part_name) +{ + auto zookeeper = getZooKeeper(); + /// Information on which replicas a part has been added, if the quorum has not yet been reached. + const String quorum_status_path = zookeeper_path + "/quorum/status"; + + /// Delete "status" node if required. + + String value; + Coordination::Stat stat; + + /// If there is no node, then all quorum INSERTs have already reached the quorum, and nothing is needed. + while (zookeeper->tryGet(quorum_status_path, value, &stat)) + { + ReplicatedMergeTreeQuorumEntry quorum_entry; + quorum_entry.fromString(value); + + if (quorum_entry.part_name != part_name) + { + /// There is no information about interested part in this node. + break; + } + + /// Since that we are sure that interested part is being involved in insert with quorum. + /// Our goal is to delete "status" node and information from "last_part" node. + + auto code = zookeeper->tryRemove(quorum_status_path, stat.version); + + if (code == Coordination::ZOK) + { + break; + } + else if (code == Coordination::ZNONODE) + { + /// The quorum has already been achieved. + break; + } + else if (code == Coordination::ZBADVERSION) + { + /// Node was updated meanwhile. We must re-read it and repeat all the actions. + continue; + } + else + throw Coordination::Exception(code, quorum_status_path); + } +} + +void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id, const String & part_name) +{ + auto zookeeper = getZooKeeper(); + + /// The name of the previous part for which the quorum was reached. + const String quorum_last_part_path = zookeeper_path + "/quorum/last_part"; + + /// Delete information from "last_part" node. + + while (true) + { + Coordination::Requests ops; + Coordination::Responses responses; + + Coordination::Stat added_parts_stat; + String old_added_parts = zookeeper->get(quorum_last_part_path, &added_parts_stat); + + ReplicatedMergeTreeQuorumAddedParts parts_with_quorum(format_version); + + if (!old_added_parts.empty()) + parts_with_quorum.fromString(old_added_parts); + + /// Delete information about particular partition. + + /// Since c++20. + if (!parts_with_quorum.added_parts.contains(partition_id)) + { + /// There is no information about interested part. + break; + } + + /// De Morgan's law + if (part_name == "" || parts_with_quorum.added_parts[partition_id] == part_name) + parts_with_quorum.added_parts.erase(partition_id); + else + break; + + String new_added_parts = parts_with_quorum.toString(); + + auto code = zookeeper->trySet(quorum_last_part_path, new_added_parts, added_parts_stat.version); + + if (code == Coordination::ZOK) + { + break; + } + else if (code == Coordination::ZNONODE) + { + /// Node is deleted. It is impossible, but it is Ok. + break; + } + else if (code == Coordination::ZBADVERSION) + { + /// Node was updated meanwhile. We must re-read it and repeat all the actions. + continue; + } + else + throw Coordination::Exception(code, quorum_last_part_path); + } +} + + +void StorageReplicatedMergeTree::excludeSomePartsFromMerge(FutureMergedMutatedPart & future_part) +{ + LOG_FATAL(&Poco::Logger::get("excludeSomePartsFromMerge"), "BEGIN!"); + + auto zookeeper = getZooKeeper(); + /// Information on which replicas a part has been added, if the quorum has not yet been reached. + const String quorum_status_path = zookeeper_path + "/quorum/status"; + + String value; + Coordination::Stat stat; + + if (zookeeper->tryGet(quorum_status_path, value, &stat)) { + ReplicatedMergeTreeQuorumEntry quorum_entry; + quorum_entry.fromString(value); + + MergeTreeData::DataPartsVector & parts_to_merge = future_part.parts; + + parts_to_merge.erase( + std::remove_if( + parts_to_merge.begin(), parts_to_merge.end(), + [&quorum_entry](const MergeTreeData::DataPartPtr & part_to_merge) { return part_to_merge->name == quorum_entry.part_name; }), + parts_to_merge.end()); + } +} + + bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const String & source_replica_path, bool to_detached, size_t quorum) { const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -2871,6 +3014,7 @@ void StorageReplicatedMergeTree::startup() void StorageReplicatedMergeTree::shutdown() { + LOG_FATAL(&Poco::Logger::get("shutdown"), "SHUTDOWN!"); clearOldPartsFromFilesystem(true); /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. fetcher.blocker.cancelForever(); @@ -3510,6 +3654,9 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPt String partition_id = getPartitionIDFromQuery(partition, query_context); + if (query_context.getSettingsRef().insert_quorum) + cleanLastPartNode(partition_id); + LogEntry entry; if (dropPartsInPartition(*zookeeper, partition_id, entry, detach)) { @@ -5177,10 +5324,17 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti return merger_mutator.ttl_merges_blocker.cancel(); if (action_type == ActionLocks::PartsFetch) - return fetcher.blocker.cancel(); + { + return fetcher.blocker.cancel(); + } + if (action_type == ActionLocks::PartsSend) - return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); + { + LOG_FATAL(&Poco::Logger::get("ActionLock"), "Cancel PartsSend"); + return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); + } + if (action_type == ActionLocks::ReplicationQueue) return queue.actions_blocker.cancel(); @@ -5198,6 +5352,9 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI /// Let's fetch new log entries firstly queue.pullLogsToQueue(getZooKeeper()); + /// This is significant, because the execution of this task could be delayed at BackgroundPool. + /// And we force it to be executed. + queue_task_handle->wake(); Poco::Event target_size_event; auto callback = [&target_size_event, queue_size] (size_t new_queue_size) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 01dd32614f9..bf07b592e2f 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -464,6 +464,14 @@ private: /// With the quorum being tracked, add a replica to the quorum for the part. void updateQuorum(const String & part_name); + /// Delete particular part name not to track it in future. + void deletePartFromPendingQuorum(const String & part_name); + + + void cleanLastPartNode(const String & partition_id, const String & part_name = ""); + + void excludeSomePartsFromMerge(FutureMergedMutatedPart & future_part); + /// Creates new block number if block with such block_id does not exist std::optional allocateBlockNumber( const String & partition_id, zkutil::ZooKeeperPtr & zookeeper, diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index f7256de9d9a..fdf9ac2834b 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -90,6 +90,8 @@ def test_polymorphic_parts_basics(start_cluster, first_node, second_node): first_node.query("SYSTEM STOP MERGES") second_node.query("SYSTEM STOP MERGES") + print(first_node.query("SELECT * FROM system.settings where name='insert_quorum' format Vertical")) + for size in [300, 300, 600]: insert_random_data('polymorphic_table', first_node, size) second_node.query("SYSTEM SYNC REPLICA polymorphic_table", timeout=20) diff --git a/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml b/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml deleted file mode 100644 index adf6ad80247..00000000000 --- a/tests/integration/test_quorum_inserts/configs/conf.d/clusters.xml +++ /dev/null @@ -1,37 +0,0 @@ - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - s0_0_2 - 9000 - - - - - - diff --git a/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml b/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml deleted file mode 100644 index abad0dee450..00000000000 --- a/tests/integration/test_quorum_inserts/configs/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_quorum_inserts/configs/users.d/settings.xml b/tests/integration/test_quorum_inserts/configs/users.d/settings.xml new file mode 100644 index 00000000000..0dbee66ff44 --- /dev/null +++ b/tests/integration/test_quorum_inserts/configs/users.d/settings.xml @@ -0,0 +1,12 @@ + + + + + 10000000000 + 0 + + 2 + 1 + + + \ No newline at end of file diff --git a/tests/integration/test_quorum_inserts/configs/users.xml b/tests/integration/test_quorum_inserts/configs/users.xml deleted file mode 100644 index c5114c10cde..00000000000 --- a/tests/integration/test_quorum_inserts/configs/users.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - 1 - 2 - 1 - 5000 - - - - - - - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 1f6c1b9b852..64190c6d8aa 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -1,173 +1,233 @@ -import os -import sys import time import pytest +from helpers.test_tools import TSV from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) +zero = cluster.add_instance("zero", + config_dir="configs", + macros={"cluster": "anime", "shard": "0", "replica": "zero"}, + with_zookeeper=True) + +first = cluster.add_instance("first", + config_dir="configs", + macros={"cluster": "anime", "shard": "0", "replica": "first"}, + with_zookeeper=True) + +second = cluster.add_instance("second", + config_dir="configs", + macros={"cluster": "anime", "shard": "0", "replica": "second"}, + with_zookeeper=True) + +def execute_on_all_cluster(query_): + for node in [zero, first, second]: + node.query(query_) + @pytest.fixture(scope="module") def started_cluster(): global cluster try: - clusters_schema = { - "0" : {"0" : ["0", "1", "2"]} - } - - for cluster_name, shards in clusters_schema.iteritems(): - for shard_name, replicas in shards.iteritems(): - for replica_name in replicas: - name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) - cluster.add_instance(name, - config_dir="configs", - macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name}, - with_zookeeper=True) - cluster.start() yield cluster finally: cluster.shutdown() -def test_drop_replica_and_achieve_quorum(started_cluster): - zero = cluster.instances['s0_0_0'] - first = cluster.instances['s0_0_1'] - second = cluster.instances['s0_0_2'] - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") - zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") +def test_simple_add_replica(started_cluster): + execute_on_all_cluster("DROP TABLE IF EXISTS test_simple") - create_query = "CREATE TABLE bug.test_drop_replica_and_achieve_quorum " \ + create_query = "CREATE TABLE test_simple " \ "(a Int8, d Date) " \ - "Engine = ReplicatedMergeTree('/clickhouse/tables/test_drop_replica_and_achieve_quorum', '{}') " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') " \ + "PARTITION BY d ORDER BY a" + + zero.query(create_query) + first.query(create_query) + + first.query("SYSTEM STOP FETCHES test_simple") + + zero.query("INSERT INTO test_simple VALUES (1, '2011-01-01')", settings={'insert_quorum' : 1}) + + assert '1\t2011-01-01\n' == zero.query("SELECT * from test_simple") + assert '' == first.query("SELECT * from test_simple") + + first.query("SYSTEM START FETCHES test_simple") + + zero.query("SYSTEM SYNC REPLICA test_simple", timeout=20) + + assert '1\t2011-01-01\n' == zero.query("SELECT * from test_simple") + assert '1\t2011-01-01\n' == first.query("SELECT * from test_simple") + + second.query(create_query) + second.query("SYSTEM SYNC REPLICA test_simple", timeout=20) + + assert '1\t2011-01-01\n' == zero.query("SELECT * from test_simple") + assert '1\t2011-01-01\n' == first.query("SELECT * from test_simple") + assert '1\t2011-01-01\n' == second.query("SELECT * from test_simple") + + execute_on_all_cluster("DROP TABLE IF EXISTS test_simple") + + + +def test_drop_replica_and_achieve_quorum(started_cluster): + execute_on_all_cluster("DROP TABLE IF EXISTS test_drop_replica_and_achieve_quorum") + + create_query = "CREATE TABLE test_drop_replica_and_achieve_quorum " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') " \ "PARTITION BY d ORDER BY a" print("Create Replicated table with two replicas") - zero.query(create_query.format(0)) - first.query(create_query.format(1)) + zero.query(create_query) + first.query(create_query) print("Stop fetches on one replica. Since that, it will be isolated.") - first.query("SYSTEM STOP FETCHES bug.test_drop_replica_and_achieve_quorum") + first.query("SYSTEM STOP FETCHES test_drop_replica_and_achieve_quorum") print("Insert to other replica. This query will fail.") - quorum_timeout = zero.query_and_get_error("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (1, '2011-01-01')") + quorum_timeout = zero.query_and_get_error("INSERT INTO test_drop_replica_and_achieve_quorum(a,d) VALUES (1, '2011-01-01')", + settings={'insert_quorum_timeout' : 5000}) assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." - assert "1\t2011-01-01\n" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", - settings={'select_sequential_consistency' : 0}) + assert TSV("1\t2011-01-01\n") == TSV(zero.query("SELECT * FROM test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 0})) - print("Add third replica") - second.query(create_query.format(2)) - - zero.query("SYSTEM RESTART REPLICA bug.test_drop_replica_and_achieve_quorum") + assert TSV("") == TSV(zero.query("SELECT * FROM test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 1})) + #TODO:(Mikhaylov) begin; maybe delete this lines. I want clickhouse to fetch parts and update quorum. print("START FETCHES first replica") - first.query("SYSTEM START FETCHES bug.test_drop_replica_and_achieve_quorum") - - time.sleep(5) - - print(zero.query("SELECT * from system.replicas format Vertical")) - - - print("---------") - print(zero.query("SELECT * from system.replication_queue format Vertical")) - print("---------") - - - print(first.query("SELECT * from system.replicas format Vertical")) - print("---------") - print(first.query("SELECT * from system.replication_queue format Vertical")) - print("---------") - print(second.query("SELECT * from system.replicas format Vertical")) - print("---------") - print(first.query("SELECT * from system.replication_queue format Vertical")) - + first.query("SYSTEM START FETCHES test_drop_replica_and_achieve_quorum") print("SYNC first replica") - first.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") + first.query("SYSTEM SYNC REPLICA test_drop_replica_and_achieve_quorum", timeout=20) + #TODO:(Mikhaylov) end + + print("Add second replica") + second.query(create_query) print("SYNC second replica") - second.query("SYSTEM SYNC REPLICA bug.test_drop_replica_and_achieve_quorum") + second.query("SYSTEM SYNC REPLICA test_drop_replica_and_achieve_quorum", timeout=20) print("Quorum for previous insert achieved.") - assert "1\t2011-01-01\n" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum", - settings={'select_sequential_consistency' : 1}) + assert TSV("1\t2011-01-01\n") == TSV(second.query("SELECT * FROM test_drop_replica_and_achieve_quorum", + settings={'select_sequential_consistency' : 1})) print("Now we can insert some other data.") - zero.query("INSERT INTO bug.test_drop_replica_and_achieve_quorum(a,d) VALUES (2, '2012-02-02')") + zero.query("INSERT INTO test_drop_replica_and_achieve_quorum(a,d) VALUES (2, '2012-02-02')") - assert "1\t2011-01-01\n2 2012-02-02" == zero.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") - assert "1\t2011-01-01\n2 2012-02-02" == second.query("SELECT * FROM bug.test_drop_replica_and_achieve_quorum") + assert TSV("1\t2011-01-01\n2\t2012-02-02\n") == TSV(zero.query("SELECT * FROM test_drop_replica_and_achieve_quorum ORDER BY a")) + assert TSV("1\t2011-01-01\n2\t2012-02-02\n") == TSV(first.query("SELECT * FROM test_drop_replica_and_achieve_quorum ORDER BY a")) + assert TSV("1\t2011-01-01\n2\t2012-02-02\n") == TSV(second.query("SELECT * FROM test_drop_replica_and_achieve_quorum ORDER BY a")) - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + execute_on_all_cluster("DROP TABLE IF EXISTS test_drop_replica_and_achieve_quorum") -def test_insert_quorum_with_drop_partition(started_cluster): - zero = cluster.instances['s0_0_0'] - first = cluster.instances['s0_0_1'] - second = cluster.instances['s0_0_2'] +@pytest.mark.parametrize( + ('add_new_data'), + [ + False, + True + ] +) - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") - zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_three_replicas") +def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): + execute_on_all_cluster("DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition") - zero.query("CREATE TABLE bug.quorum_insert_with_drop_partition ON CLUSTER one_shard_three_replicas " - "(a Int8, d Date) " - "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " - "PARTITION BY d ORDER BY a ") + create_query = "CREATE TABLE test_quorum_insert_with_drop_partition " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') " \ + "PARTITION BY d ORDER BY a " - print("Stop fetches for bug.quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_drop_partition") + print("Create Replicated table with two replicas") + zero.query(create_query) + first.query(create_query) + second.query(create_query) + + print("Stop fetches for test_quorum_insert_with_drop_partition at first replica.") + first.query("SYSTEM STOP FETCHES test_quorum_insert_with_drop_partition") print("Insert with quorum. (zero and second)") - zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')") + zero.query("INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") - zero.query_and_get_error("ALTER TABLE bug.quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'") + zero.query("ALTER TABLE test_quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'") - print("Insert to deleted partition") - zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')") + if (add_new_data): + print("Insert to deleted partition") + zero.query("INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')") - print("Sync other replica from quorum.") - second.query("SYSTEM SYNC REPLICA bug.quorum_insert_with_drop_partition") + print("Resume fetches for test_quorum_insert_with_drop_partition at first replica.") + first.query("SYSTEM START FETCHES test_quorum_insert_with_drop_partition") + + print("Sync first replica with others.") + first.query("SYSTEM SYNC REPLICA test_quorum_insert_with_drop_partition") + + assert "20110101" not in first.query("SELECT * FROM system.zookeeper " \ + "where path='/clickhouse/tables/0/test_quorum_insert_with_drop_partition/quorum/last_part' " \ + "format Vertical") print("Select from updated partition.") - assert "2 2011-01-01\n" == zero.query("SELECT * FROM bug.quorum_insert_with_drop_partition") - assert "2 2011-01-01\n" == second.query("SELECT * FROM bug.quorum_insert_with_drop_partition") + if (add_new_data): + assert TSV("2\t2011-01-01\n") == TSV(zero.query("SELECT * FROM test_quorum_insert_with_drop_partition")) + assert TSV("2\t2011-01-01\n") == TSV(second.query("SELECT * FROM test_quorum_insert_with_drop_partition")) + else: + assert TSV("") == TSV(zero.query("SELECT * FROM test_quorum_insert_with_drop_partition")) + assert TSV("") == TSV(second.query("SELECT * FROM test_quorum_insert_with_drop_partition")) - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_three_replicas") + execute_on_all_cluster("DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition") def test_insert_quorum_with_ttl(started_cluster): - zero = cluster.instances['s0_0_0'] - first = cluster.instances['s0_0_1'] + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_ttl") - zero.query("DROP DATABASE IF EXISTS bug ON CLUSTER one_shard_two_replicas") - zero.query("CREATE DATABASE IF NOT EXISTS bug ON CLUSTER one_shard_two_replicas") + create_query = "CREATE TABLE test_insert_quorum_with_ttl " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " \ + "PARTITION BY d ORDER BY a " \ + "TTL d + INTERVAL 5 second " \ + "SETTINGS merge_with_ttl_timeout=2 " - zero.query("CREATE TABLE bug.quorum_insert_with_ttl ON CLUSTER one_shard_two_replicas " - "(a Int8, d Date) " - "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " - "PARTITION BY d ORDER BY a " - "TTL d + INTERVAL 5 second " - "SETTINGS merge_with_ttl_timeout=2 ") + print("Create Replicated table with two replicas") + zero.query(create_query) + first.query(create_query) - print("Stop fetches for bug.quorum_insert_with_ttl at first replica.") - first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") + print("Stop fetches for test_insert_quorum_with_ttl at first replica.") + first.query("SYSTEM STOP FETCHES test_insert_quorum_with_ttl") print("Insert should fail since it can not reach the quorum.") - quorum_timeout = zero.query_and_get_error("INSERT INTO bug.quorum_insert_with_ttl(a,d) VALUES(6, now())") + quorum_timeout = zero.query_and_get_error("INSERT INTO test_insert_quorum_with_ttl(a,d) VALUES(1, '2011-01-01')", + settings={'insert_quorum_timeout' : 5000}) assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." - print("Wait 10 seconds and the data should be dropped by TTL.") - time.sleep(10) - count = zero.query("SELECT count() FROM bug.quorum_insert_with_ttl WHERE a=6") - assert count == "0\n", "Data have to be dropped by TTL" + print(zero.query("SELECT * FROM system.parts format Vertical")) - print("Resume fetches for bug.quorum_test_with_ttl at first replica.") - first.query("SYSTEM STOP FETCHES bug.quorum_insert_with_ttl") - time.sleep(5) + print("Wait 10 seconds and TTL merge have to be executed. But it won't delete data.") + time.sleep(10) + assert TSV("1\t2011-01-01\n") == TSV(zero.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 0})) + + print("Resume fetches for test_insert_quorum_with_ttl at first replica.") + first.query("SYSTEM START FETCHES test_insert_quorum_with_ttl") + + print("Sync first replica.") + first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_ttl") + + + print(first.query("SELECT * from system.replicas format Vertical")) + print(first.query("SELECT * from system.zookeeper where path='/clickhouse/tables/test_insert_quorum_with_ttl/quorum' format Vertical")) + + zero.query("INSERT INTO test_insert_quorum_with_ttl(a,d) VALUES(1, '2011-01-01')", + settings={'insert_quorum_timeout' : 5000}) + + + assert TSV("1\t2011-01-01\n") == TSV(first.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 0})) + assert TSV("1\t2011-01-01\n") == TSV(first.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 1})) print("Inserts should resume.") - zero.query("INSERT INTO bug.quorum_insert_with_ttl(a) VALUES(6)") + zero.query("INSERT INTO test_insert_quorum_with_ttl(a, d) VALUES(2, '2012-02-02')") + + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_ttl") From c9542b66018bc2e0ca124572642af364f627ebcb Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev Date: Sat, 11 Apr 2020 01:08:43 +0300 Subject: [PATCH 262/752] Style fixes for communication between ClickHouse and Zookeeper over SSL --- src/Common/ZooKeeper/ZooKeeper.cpp | 3 +-- src/Common/ZooKeeper/ZooKeeperImpl.h | 3 ++- src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index f2442f3f5c5..032d1e90ff5 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -72,9 +72,8 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho { bool secure = bool(startsWith(host_string, "secure://")); - if (secure) { + if (secure) host_string.erase(0, strlen("secure://")); - } nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 069df723d43..840cbdbde3f 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -93,7 +93,8 @@ struct ZooKeeperRequest; class ZooKeeper : public IKeeper { public: - struct Node { + struct Node + { Poco::Net::SocketAddress address; bool secure; }; diff --git a/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp index 0bca8e0f561..d9d3402fa32 100644 --- a/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp @@ -29,12 +29,12 @@ try splitInto<','>(hosts_strings, hosts_arg); ZooKeeper::Nodes nodes; nodes.reserve(hosts_strings.size()); - for (auto & host_string : hosts_strings) { + for (auto & host_string : hosts_strings) + { bool secure = bool(startsWith(host_string, "secure://")); - if (secure) { + if (secure) host_string.erase(0, strlen("secure://")); - } nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string},secure}); } From c3a71616d9a55b6745fbd4872ef0990a2816d5f9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Sat, 11 Apr 2020 01:29:15 +0300 Subject: [PATCH 263/752] simplified backport script --- utils/simple-backport/.gitignore | 1 + utils/simple-backport/README.md | 73 ++++++++++++++++++++++++ utils/simple-backport/backport.sh | 95 +++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 utils/simple-backport/.gitignore create mode 100644 utils/simple-backport/README.md create mode 100755 utils/simple-backport/backport.sh diff --git a/utils/simple-backport/.gitignore b/utils/simple-backport/.gitignore new file mode 100644 index 00000000000..72e8ffc0db8 --- /dev/null +++ b/utils/simple-backport/.gitignore @@ -0,0 +1 @@ +* diff --git a/utils/simple-backport/README.md b/utils/simple-backport/README.md new file mode 100644 index 00000000000..89a0c6d6f22 --- /dev/null +++ b/utils/simple-backport/README.md @@ -0,0 +1,73 @@ +# Упрощённый скрипт для бекпортирования + +Это упрощённый скрипт для бекпортирования. Он определяет, какие пулреквесты ещё не бекпортировали из мастера в указанную ветку. Запускать скрипт нужно из папки, где он лежит, указав ему название ветки. Он предполагает, что ваш апстримный remote называется origin. +``` +cd my-clickhouse-repo/simple-backport +git fetch origin +time GITHUB_TOKEN= ./backport.sh 20.1 +``` + +Скрипт выведет примитивный отчёт: +``` +$ time GITHUB_TOKEN= ~/backport.sh 20.3 +144 PRs differ between 20.3 and master. +backport https://github.com/ClickHouse/ClickHouse/pull/10135 +backport https://github.com/ClickHouse/ClickHouse/pull/10121 +... +backport https://github.com/ClickHouse/ClickHouse/pull/9808 +backport https://github.com/ClickHouse/ClickHouse/pull/9410 + +real 0m1.213s +user 0m1.065s +sys 0m0.311s +``` + +Также в рабочей папке сгенерируется отчёт `<ваша-ветка>-report.tsv`: + +``` +$ cat 20.3-report.tsv +skip 10153 https://github.com/ClickHouse/ClickHouse/pull/10153 pr10153.json +skip 10147 https://github.com/ClickHouse/ClickHouse/pull/10147 pr10147.json +no-backport 10138 https://github.com/ClickHouse/ClickHouse/pull/10138 pr10138.json +backport 10135 https://github.com/ClickHouse/ClickHouse/pull/10135 pr10135.json +skip 10134 https://github.com/ClickHouse/ClickHouse/pull/10134 pr10134.json +... +``` + +Можно кликать по ссылкам прям из консоли, а можно ещё проще: + +``` +$ cat <ветка>-report.tsv | grep ^backport | cut -f3 +$ cat <ветка>-report.tsv | grep ^backport | cut -f3 | xargs -n1 xdg-open +``` + +Такая команда откроет в браузере все пулреквесты, которые надо бекпортировать. Есть и другие статусы, посмотрите какие: + +``` +$ cat 20.1-report.tsv | cut -f1 | sort | uniq -c | sort -rn + 446 skip + 38 done + 25 conflict + 18 backport + 10 no-backport +``` + + +### Как разметить пулреквест? +По умолчанию бекпортируются все пулреквесты, у которых в описании указана категория чейнжлога Bug fix. Если этого недостаточно, используйте теги: +* v20.1-backported -- этот пулреквест уже бекпортирован в ветку 20.1. На случай, если автоматически не определилось. +* v20.1-no-backport -- в ветку 20.1 бекпортировать не нужно. +* pr-no-backport -- ни в какие ветки бекпортировать не нужно. +* v20.1-conflicts -- при бекпорте в 20.1 произошёл конфликт. Такие пулреквесты скрипт пропускает, к ним можно потом вернуться. +* pr-should-backport -- нужно бекпортировать в поддерживаемые ветки. +* v20.1-must-backport -- нужно бекпортировать в 20.1. + + +### Я поправил пулреквест, почему скрипт не видит? +В процессе работы скрипт кеширует данные о пулреквестах в текущей папке, чтобы экономить квоту гитхаба. Удалите закешированные файлы, например, для всех реквестов, которые не помечены как пропущенные: +``` +$ cat <ваша-ветка>-report.tsv | grep -v "^skip" | cut -f4 +$ cat <ваша-ветка>-report.tsv | grep -v "^skip" | cut -f4 | xargs rm +``` + + diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh new file mode 100755 index 00000000000..4a39f9d97c3 --- /dev/null +++ b/utils/simple-backport/backport.sh @@ -0,0 +1,95 @@ +#!/bin/bash +set -e + +branch="$1" +merge_base=$(git merge-base origin/master "origin/$branch") + +# Make lists of PRs that were merged into each branch. Use first parent here, or else +# we'll get weird things like seeing older master that was merged into a PR branch +# that was then merged into master. +git log "$merge_base..origin/master" --first-parent --oneline > master-log.txt +sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p" master-log.txt | sort -rn > master-prs.txt + +git log "$merge_base..origin/$branch" --first-parent --oneline > "$branch-log.txt" +sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p" "$branch-log.txt" | sort -rn > "$branch-prs.txt" + +# Find all master PRs that are not in branch by calculating differences of two PR lists. +grep -f "$branch-prs.txt" -F -x -v master-prs.txt > "$branch-diff-prs.txt" + +rm "$branch-report.tsv" ||: + +echo "$(wc -l < "$branch-diff-prs".txt) PRs differ between $branch and master." + +for pr in $(cat "$branch-diff-prs.txt") +do + # Download PR info from github. + file="pr$pr.json" + if ! [ -f "$file" ] + then + if ! curl -H "Authorization: token $GITHUB_TOKEN" \ + -sSf "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" \ + > "$file" + then + >&2 cat "$file" + rm "$file" + break + fi + sleep 0.5 + fi + + if ! [ "$pr" == "$(jq -r .number "$file")" ] + then + >&2 echo "File $file is broken (no PR number)." + continue + fi + + action="skip" + + # First, check the changelog category. We port all bugfixes. + if jq -r .body "$file" | grep -i "^- bug[ -]*fix" > /dev/null + then + action="backport" + fi + + # Next, check the tag. They might override the decision. + matched_labels=() + for label in $(jq -r .labels[].name "$file") + do + label_action="" + case "$label" in + pr-must-backport | "v$branch-must-backport") + label_action="backport" + ;; + pr-no-backport | "v$branch-no-backport") + label_action="no-backport" + ;; + "v$branch-conflicts") + label_action="conflict" + ;; + "v$branch" | "v$branch-backported") + label_action="done" + ;; + esac + if [ "$label_action" != "" ] + then + action="$label_action" + matched_labels+=("$label") + fi + done + + # Show an error if there are conflicting labels. + if [ ${#matched_labels[@]} -gt 1 ] + then + >&2 echo "PR #$pr has conflicting labels: ${matched_labels[*]}" + continue + fi + + url="https://github.com/ClickHouse/ClickHouse/pull/$pr" + printf "%s\t%s\t%s\t%s\n" "$action" "$pr" "$url" "$file" >> "$branch-report.tsv" + if [ "$action" == "backport" ] + then + printf "%s\t%s\n" "$action" "$url" + fi +done + +wait From c097e1f9e5d766989666ffec371ff7dd0b75ca6a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Sat, 11 Apr 2020 01:38:40 +0300 Subject: [PATCH 264/752] Update README.md --- utils/simple-backport/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/simple-backport/README.md b/utils/simple-backport/README.md index 89a0c6d6f22..89a9ce36934 100644 --- a/utils/simple-backport/README.md +++ b/utils/simple-backport/README.md @@ -2,7 +2,7 @@ Это упрощённый скрипт для бекпортирования. Он определяет, какие пулреквесты ещё не бекпортировали из мастера в указанную ветку. Запускать скрипт нужно из папки, где он лежит, указав ему название ветки. Он предполагает, что ваш апстримный remote называется origin. ``` -cd my-clickhouse-repo/simple-backport +cd my-clickhouse-repo/utils/simple-backport git fetch origin time GITHUB_TOKEN= ./backport.sh 20.1 ``` From 32f0789eaa98dd4c03554c054bdba4bcf19a1340 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Sat, 11 Apr 2020 01:41:47 +0300 Subject: [PATCH 265/752] Update README.md --- utils/simple-backport/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/simple-backport/README.md b/utils/simple-backport/README.md index 89a9ce36934..13378f93989 100644 --- a/utils/simple-backport/README.md +++ b/utils/simple-backport/README.md @@ -59,7 +59,7 @@ $ cat 20.1-report.tsv | cut -f1 | sort | uniq -c | sort -rn * v20.1-no-backport -- в ветку 20.1 бекпортировать не нужно. * pr-no-backport -- ни в какие ветки бекпортировать не нужно. * v20.1-conflicts -- при бекпорте в 20.1 произошёл конфликт. Такие пулреквесты скрипт пропускает, к ним можно потом вернуться. -* pr-should-backport -- нужно бекпортировать в поддерживаемые ветки. +* pr-must-backport -- нужно бекпортировать в поддерживаемые ветки. * v20.1-must-backport -- нужно бекпортировать в 20.1. From 7c57876ea4edbec8e6da7b0c4e207a807de468db Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Sat, 11 Apr 2020 03:00:33 +0300 Subject: [PATCH 266/752] simplified backport script --- utils/simple-backport/backport.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index 4a39f9d97c3..a0143108383 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -8,10 +8,15 @@ merge_base=$(git merge-base origin/master "origin/$branch") # we'll get weird things like seeing older master that was merged into a PR branch # that was then merged into master. git log "$merge_base..origin/master" --first-parent --oneline > master-log.txt -sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p" master-log.txt | sort -rn > master-prs.txt - git log "$merge_base..origin/$branch" --first-parent --oneline > "$branch-log.txt" -sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p" "$branch-log.txt" | sort -rn > "$branch-prs.txt" + +# Search for PR numbers in commit messages. First variant is normal merge, and second +# variant is squashed. +find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; + s/^.*(#\([[:digit:]]\+\))$/\1/p") + +"${find_prs[@]}" master-log.txt | sort -rn > master-prs.txt +"${find_prs[@]}" "$branch-log.txt" | sort -rn > "$branch-prs.txt" # Find all master PRs that are not in branch by calculating differences of two PR lists. grep -f "$branch-prs.txt" -F -x -v master-prs.txt > "$branch-diff-prs.txt" @@ -39,7 +44,7 @@ do if ! [ "$pr" == "$(jq -r .number "$file")" ] then - >&2 echo "File $file is broken (no PR number)." + >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." continue fi @@ -92,4 +97,3 @@ do fi done -wait From 68fa04054aec7ef6314b6850f95f4adcb1fe823e Mon Sep 17 00:00:00 2001 From: Eugene Klimov Date: Sat, 11 Apr 2020 08:27:24 +0300 Subject: [PATCH 267/752] add in server settings and monitoring section (#10015) * add description for in server settings and monitoring section Signed-off-by: Slach * Update docs/en/operations/server_settings/settings.md Co-Authored-By: BayoNet * Update docs/en/operations/server_settings/settings.md Co-Authored-By: BayoNet * Update docs/en/operations/server_settings/settings.md Co-Authored-By: BayoNet * Update docs/en/operations/server_settings/settings.md Co-Authored-By: BayoNet * Update docs/en/operations/server_settings/settings.md Co-Authored-By: BayoNet * Update docs/en/operations/server_settings/settings.md Co-Authored-By: BayoNet * sync russian description with english Signed-off-by: Slach * Update docs/ru/operations/server_settings/settings.md * sync russian description with english Signed-off-by: Slach Co-authored-by: BayoNet Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/en/operations/monitoring.md | 2 ++ .../settings.md | 24 +++++++++++++++++++ docs/ru/operations/monitoring.md | 2 ++ .../settings.md | 24 +++++++++++++++++++ 4 files changed, 52 insertions(+) diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 363e9cc4bff..dee1255569b 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -37,6 +37,8 @@ You can find metrics in the [system.metrics](../operations/system_tables.md#syst You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_configuration_parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html). +You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](server_configuration_parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/). + Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`. To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. diff --git a/docs/en/operations/server_configuration_parameters/settings.md b/docs/en/operations/server_configuration_parameters/settings.md index 85744a039f4..02c00fababf 100644 --- a/docs/en/operations/server_configuration_parameters/settings.md +++ b/docs/en/operations/server_configuration_parameters/settings.md @@ -536,6 +536,30 @@ The path to the directory containing data. /var/lib/clickhouse/ ``` +## prometheus {#server_configuration_parameters-prometheus} + +Exposing metrics data for scraping from [Prometheus](https://prometheus.io). + +Settings: + +- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from '/'. +- `port` – Port for `endpoint`. +- `metrics` – Flag that sets to expose metrics from the [system.metrics](../system_tables.md#system_tables-metrics) table. +- `events` – Flag that sets to expose metrics from the [system.events](../system_tables.md#system_tables-events) table. +- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../system_tables.md#system_tables-asynchronous_metrics) table. + +**Example** + +``` xml + + /metrics + 8001 + true + true + true + +``` + ## query\_log {#server_configuration_parameters-query-log} Setting for logging queries received with the [log\_queries=1](../settings/settings.md) setting. diff --git a/docs/ru/operations/monitoring.md b/docs/ru/operations/monitoring.md index 469d712376b..2629a4da931 100644 --- a/docs/ru/operations/monitoring.md +++ b/docs/ru/operations/monitoring.md @@ -32,6 +32,8 @@ ClickHouse собирает: Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_configuration_parameters/settings.md#server_configuration_parameters-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html). +Можно настроить экспорт метрик из ClickHouse в [Prometheus](https://prometheus.io). Смотрите [prometheus](server_configuration_parameters/settings.md#server_configuration_parameters-prometheus) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Prometheus, как указано в [официальном руководстве](https://prometheus.io/docs/prometheus/latest/installation/). + Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/ping`. Если сервер доступен, он отвечает `200 OK`. Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas_status`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas_status` возвращает `200 OK`. Если реплика отстаёт, то запрос возвращает `503 HTTP_SERVICE_UNAVAILABLE`, включая информацию о размере отставания. diff --git a/docs/ru/operations/server_configuration_parameters/settings.md b/docs/ru/operations/server_configuration_parameters/settings.md index 16f00a82016..618bb9764ad 100644 --- a/docs/ru/operations/server_configuration_parameters/settings.md +++ b/docs/ru/operations/server_configuration_parameters/settings.md @@ -524,6 +524,30 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat /var/lib/clickhouse/ ``` +## prometheus {#server_configuration_parameters-prometheus} + +Опубликовать данные о метриках, для сбора с помощью системы мониторинга [Prometheus](https://prometheus.io). + +Настройки: + +- `endpoint` – путь по которому будет осуществляться экспорт данных метрик по HTTP протоколу для сбора с помощью prometheus. Должен начинаться с '/'. +- `port` – порт по которому будет доступен endpoint для сбора метрик. +- `metrics` – флаг для экспорта текущих значений метрик из таблицы [system.metrics](../system_tables.md#system_tables-metrics). +- `events` – флаг для экспорта текущих значений метрик из таблицы [system.events](../system_tables.md#system_tables-events). +- `asynchronous_metrics` – флаг для экспорта текущих значений значения метрик из таблицы [system.asynchronous\_metrics](../system_tables.md#system_tables-asynchronous_metrics). + +**Пример** + +``` xml + + /metrics + 8001 + true + true + true + +``` + ## query\_log {#server_configuration_parameters-query-log} Настройка логирования запросов, принятых с настройкой [log\_queries=1](../settings/settings.md). From e0c972448ea92bf77d41ac0b53e139185115e1ec Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Apr 2020 13:25:53 +0300 Subject: [PATCH 268/752] Cover SHOW CREATE TABLE from database with Dictionary ENGINE --- ...how_create_table_from_dictionary.reference | 6 ++++++ ...1225_show_create_table_from_dictionary.sql | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference create mode 100644 tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference new file mode 100644 index 00000000000..14ddc093143 --- /dev/null +++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference @@ -0,0 +1,6 @@ +CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.dict` +( + `key` UInt64, + `val` UInt64 +) +ENGINE = Dictionary(`dict_db_01225.dict`) diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql new file mode 100644 index 00000000000..7550d5292d0 --- /dev/null +++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.sql @@ -0,0 +1,21 @@ +DROP DATABASE IF EXISTS dict_db_01225; +DROP DATABASE IF EXISTS dict_db_01225_dictionary; +CREATE DATABASE dict_db_01225; +CREATE DATABASE dict_db_01225_dictionary Engine=Dictionary; + +CREATE TABLE dict_db_01225.dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict_db_01225.dict +( + key UInt64 DEFAULT 0, + val UInt64 DEFAULT 10 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01225')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(FLAT()); + +SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.dict` FORMAT TSVRaw; +SHOW CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.no_such_dict`; -- { serverError 36; } + +DROP DATABASE dict_db_01225; +DROP DATABASE dict_db_01225_dictionary; From 55a143d1a559a4cdbf915da15972500b1f28a7eb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Apr 2020 01:32:59 +0300 Subject: [PATCH 269/752] Avoid superfluous dictionaries load (system.tables, SHOW CREATE TABLE) This patch avoids loading dictionaries for: - SELECT * FROM system.tables (used by clickhouse-client for completion) - SHOW CREATE TABLE some_dict But the dictionary will still be loaded on: - SHOW CREATE TABLE some_dict (from the database with Dictionary engine) --- src/Databases/DatabaseDictionary.cpp | 4 +-- src/Databases/DatabaseWithDictionaries.cpp | 36 ++++++++++++++++--- src/Databases/DatabaseWithDictionaries.h | 6 +++- src/Interpreters/ExternalDictionariesLoader.h | 7 ++-- ...01224_no_superfluous_dict_reload.reference | 19 ++++++++++ .../01224_no_superfluous_dict_reload.sql | 32 +++++++++++++++++ 6 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference create mode 100644 tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 006eb1656a2..9e7788bf846 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -64,7 +64,7 @@ StoragePtr DatabaseDictionary::tryGetTable( const Context & context, const String & table_name) const { - auto dict_ptr = context.getExternalDictionariesLoader().tryGetDictionary(table_name); + auto dict_ptr = context.getExternalDictionariesLoader().tryGetDictionary(table_name, true /*load*/); if (dict_ptr) { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); @@ -94,7 +94,7 @@ ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const Context & context, const auto & dictionaries = context.getExternalDictionariesLoader(); auto dictionary = throw_on_error ? dictionaries.getDictionary(table_name) - : dictionaries.tryGetDictionary(table_name); + : dictionaries.tryGetDictionary(table_name, true /*load*/); if (!dictionary) return {}; diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index e849962aae3..6673fdf8075 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -26,6 +26,8 @@ namespace ErrorCodes extern const int TABLE_ALREADY_EXISTS; extern const int UNKNOWN_TABLE; extern const int DICTIONARY_ALREADY_EXISTS; + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_GET_CREATE_TABLE_QUERY; } @@ -165,7 +167,7 @@ void DatabaseWithDictionaries::removeDictionary(const Context & context, const S } } -StoragePtr DatabaseWithDictionaries::tryGetTable(const Context & context, const String & table_name) const +StoragePtr DatabaseWithDictionaries::tryGetTableImpl(const Context & context, const String & table_name, bool load) const { if (auto table_ptr = DatabaseWithOwnTablesBase::tryGetTable(context, table_name)) return table_ptr; @@ -173,10 +175,34 @@ StoragePtr DatabaseWithDictionaries::tryGetTable(const Context & context, const if (isDictionaryExist(context, table_name)) /// We don't need lock database here, because database doesn't store dictionary itself /// just metadata - return getDictionaryStorage(context, table_name); + return getDictionaryStorage(context, table_name, load); return {}; } +StoragePtr DatabaseWithDictionaries::tryGetTable(const Context & context, const String & table_name) const +{ + return tryGetTableImpl(context, table_name, true /*load*/); +} + +ASTPtr DatabaseWithDictionaries::getCreateTableQueryImpl(const Context & context, const String & table_name, bool throw_on_error) const +{ + ASTPtr ast; + bool has_table = tryGetTableImpl(context, table_name, false /*load*/) != nullptr; + auto table_metadata_path = getObjectMetadataPath(table_name); + try + { + ast = getCreateQueryFromMetadata(context, table_metadata_path, throw_on_error); + } + catch (const Exception & e) + { + if (!has_table && e.code() == ErrorCodes::FILE_DOESNT_EXIST && throw_on_error) + throw Exception{"Table " + backQuote(table_name) + " doesn't exist", + ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY}; + else if (throw_on_error) + throw; + } + return ast; +} DatabaseTablesIteratorPtr DatabaseWithDictionaries::getTablesWithDictionaryTablesIterator( const Context & context, const FilterByNameFunction & filter_by_dictionary_name) @@ -195,7 +221,7 @@ DatabaseTablesIteratorPtr DatabaseWithDictionaries::getTablesWithDictionaryTable while (dictionaries_it && dictionaries_it->isValid()) { auto table_name = dictionaries_it->name(); - auto table_ptr = getDictionaryStorage(context, table_name); + auto table_ptr = getDictionaryStorage(context, table_name, false /*load*/); if (table_ptr) result.emplace(table_name, table_ptr); dictionaries_it->next(); @@ -223,11 +249,11 @@ bool DatabaseWithDictionaries::isDictionaryExist(const Context & /*context*/, co return dictionaries.find(dictionary_name) != dictionaries.end(); } -StoragePtr DatabaseWithDictionaries::getDictionaryStorage(const Context & context, const String & table_name) const +StoragePtr DatabaseWithDictionaries::getDictionaryStorage(const Context & context, const String & table_name, bool load) const { auto dict_name = database_name + "." + table_name; const auto & external_loader = context.getExternalDictionariesLoader(); - auto dict_ptr = external_loader.tryGetDictionary(dict_name); + auto dict_ptr = external_loader.tryGetDictionary(dict_name, load); if (dict_ptr) { const DictionaryStructure & dictionary_structure = dict_ptr->getStructure(); diff --git a/src/Databases/DatabaseWithDictionaries.h b/src/Databases/DatabaseWithDictionaries.h index e47ab6206c5..50e4dca671f 100644 --- a/src/Databases/DatabaseWithDictionaries.h +++ b/src/Databases/DatabaseWithDictionaries.h @@ -20,6 +20,8 @@ public: StoragePtr tryGetTable(const Context & context, const String & table_name) const override; + ASTPtr getCreateTableQueryImpl(const Context & context, const String & table_name, bool throw_on_error) const override; + DatabaseTablesIteratorPtr getTablesWithDictionaryTablesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name) override; DatabaseDictionariesIteratorPtr getDictionariesIterator(const Context & context, const FilterByNameFunction & filter_by_dictionary_name) override; @@ -37,7 +39,7 @@ protected: void attachToExternalDictionariesLoader(Context & context); void detachFromExternalDictionariesLoader(); - StoragePtr getDictionaryStorage(const Context & context, const String & table_name) const; + StoragePtr getDictionaryStorage(const Context & context, const String & table_name, bool load) const; ASTPtr getCreateDictionaryQueryImpl(const Context & context, const String & dictionary_name, @@ -45,6 +47,8 @@ protected: private: ext::scope_guard database_as_config_repo_for_external_loader; + + StoragePtr tryGetTableImpl(const Context & context, const String & table_name, bool load) const; }; } diff --git a/src/Interpreters/ExternalDictionariesLoader.h b/src/Interpreters/ExternalDictionariesLoader.h index 68913ffa166..4a54a9963e7 100644 --- a/src/Interpreters/ExternalDictionariesLoader.h +++ b/src/Interpreters/ExternalDictionariesLoader.h @@ -23,9 +23,12 @@ public: return std::static_pointer_cast(load(name)); } - DictPtr tryGetDictionary(const std::string & name) const + DictPtr tryGetDictionary(const std::string & name, bool load) const { - return std::static_pointer_cast(tryLoad(name)); + if (load) + return std::static_pointer_cast(tryLoad(name)); + else + return std::static_pointer_cast(getCurrentLoadResult(name).object); } static void resetAll(); diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference new file mode 100644 index 00000000000..5321624de02 --- /dev/null +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference @@ -0,0 +1,19 @@ +NOT_LOADED +NOT_LOADED +CREATE DICTIONARY dict_db_01224.dict +( + `key` UInt64 DEFAULT 0, + `val` UInt64 DEFAULT 10 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(FLAT()) +NOT_LOADED +CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict` +( + `key` UInt64, + `val` UInt64 +) +ENGINE = Dictionary(`dict_db_01224.dict`) +LOADED diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql new file mode 100644 index 00000000000..a6eed6f072c --- /dev/null +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.sql @@ -0,0 +1,32 @@ +DROP DATABASE IF EXISTS dict_db_01224; +DROP DATABASE IF EXISTS dict_db_01224_dictionary; +CREATE DATABASE dict_db_01224; + +CREATE TABLE dict_db_01224.dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict_db_01224.dict +( + key UInt64 DEFAULT 0, + val UInt64 DEFAULT 10 +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224')) +LIFETIME(MIN 0 MAX 0) +LAYOUT(FLAT()); + +SELECT status FROM system.dictionaries WHERE database = 'dict_db_01224' AND name = 'dict'; + +SELECT * FROM system.tables FORMAT Null; +SELECT status FROM system.dictionaries WHERE database = 'dict_db_01224' AND name = 'dict'; + +SHOW CREATE TABLE dict_db_01224.dict FORMAT TSVRaw; +SELECT status FROM system.dictionaries WHERE database = 'dict_db_01224' AND name = 'dict'; + +CREATE DATABASE dict_db_01224_dictionary Engine=Dictionary; +SHOW CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict` FORMAT TSVRaw; +SELECT status FROM system.dictionaries WHERE database = 'dict_db_01224' AND name = 'dict'; + +DROP DICTIONARY dict_db_01224.dict; +SELECT status FROM system.dictionaries WHERE database = 'dict_db_01224' AND name = 'dict'; + +DROP DATABASE dict_db_01224; +DROP DATABASE dict_db_01224_dictionary; From 67235834b3cb40ccc457f659ba7c23a6891c8765 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Sat, 11 Apr 2020 14:14:01 +0300 Subject: [PATCH 270/752] Update libdivide to v3.0 (#10169) --- contrib/libdivide/libdivide.h | 2807 +++++++++++++--------- src/Functions/intDiv.cpp | 7 +- src/Functions/modulo.cpp | 4 +- src/Interpreters/createBlockSelector.cpp | 4 +- 4 files changed, 1732 insertions(+), 1090 deletions(-) diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h index eaeaec7db6b..a153e7f9c5e 100644 --- a/contrib/libdivide/libdivide.h +++ b/contrib/libdivide/libdivide.h @@ -1,117 +1,106 @@ -/* libdivide.h - Copyright 2010 ridiculous_fish -*/ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" +// libdivide.h - Optimized integer division +// https://libdivide.com +// +// Copyright (C) 2010 - 2019 ridiculous_fish, +// Copyright (C) 2016 - 2019 Kim Walisch, +// +// libdivide is dual-licensed under the Boost or zlib licenses. +// You may use libdivide under the terms of either of these. +// See LICENSE.txt for more details. -#if defined(_WIN32) || defined(WIN32) -#define LIBDIVIDE_WINDOWS 1 -#endif +#ifndef LIBDIVIDE_H +#define LIBDIVIDE_H -#if defined(_MSC_VER) -#define LIBDIVIDE_VC 1 -#endif +#define LIBDIVIDE_VERSION "3.0" +#define LIBDIVIDE_VERSION_MAJOR 3 +#define LIBDIVIDE_VERSION_MINOR 0 -#ifdef __cplusplus -#include -#include -#include -#else -#include -#include -#include -#endif - -#if ! LIBDIVIDE_HAS_STDINT_TYPES && (! LIBDIVIDE_VC || _MSC_VER >= 1600) -/* Only Visual C++ 2010 and later include stdint.h */ #include -#define LIBDIVIDE_HAS_STDINT_TYPES 1 + +#if defined(__cplusplus) + #include + #include + #include +#else + #include + #include #endif -#if ! LIBDIVIDE_HAS_STDINT_TYPES -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -#endif - -#if LIBDIVIDE_USE_SSE2 +#if defined(LIBDIVIDE_AVX512) + #include +#elif defined(LIBDIVIDE_AVX2) + #include +#elif defined(LIBDIVIDE_SSE2) #include #endif -#if LIBDIVIDE_VC +#if defined(_MSC_VER) #include + // disable warning C4146: unary minus operator applied + // to unsigned type, result still unsigned + #pragma warning(disable: 4146) + #define LIBDIVIDE_VC #endif -#ifndef __has_builtin -#define __has_builtin(x) 0 // Compatibility with non-clang compilers. +#if !defined(__has_builtin) + #define __has_builtin(x) 0 #endif -#ifdef __ICC -#define HAS_INT128_T 0 -#else -#define HAS_INT128_T __LP64__ +#if defined(__SIZEOF_INT128__) + #define HAS_INT128_T + // clang-cl on Windows does not yet support 128-bit division + #if !(defined(__clang__) && defined(LIBDIVIDE_VC)) + #define HAS_INT128_DIV + #endif #endif -#if defined(__x86_64__) || defined(_WIN64) || defined(_M_64) -#define LIBDIVIDE_IS_X86_64 1 +#if defined(__x86_64__) || defined(_M_X64) + #define LIBDIVIDE_X86_64 #endif #if defined(__i386__) -#define LIBDIVIDE_IS_i386 1 + #define LIBDIVIDE_i386 #endif -#if __GNUC__ || __clang__ -#define LIBDIVIDE_GCC_STYLE_ASM 1 +#if defined(__GNUC__) || defined(__clang__) + #define LIBDIVIDE_GCC_STYLE_ASM #endif +#if defined(__cplusplus) || defined(LIBDIVIDE_VC) + #define LIBDIVIDE_FUNCTION __FUNCTION__ +#else + #define LIBDIVIDE_FUNCTION __func__ +#endif -/* libdivide may use the pmuldq (vector signed 32x32->64 mult instruction) which is in SSE 4.1. However, signed multiplication can be emulated efficiently with unsigned multiplication, and SSE 4.1 is currently rare, so it is OK to not turn this on */ -#ifdef LIBDIVIDE_USE_SSE4_1 -#include +#define LIBDIVIDE_ERROR(msg) \ + do { \ + fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", \ + __LINE__, LIBDIVIDE_FUNCTION, msg); \ + exit(-1); \ + } while (0) + +#if defined(LIBDIVIDE_ASSERTIONS_ON) + #define LIBDIVIDE_ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", \ + __LINE__, LIBDIVIDE_FUNCTION, #x); \ + exit(-1); \ + } \ + } while (0) +#else + #define LIBDIVIDE_ASSERT(x) #endif #ifdef __cplusplus -/* We place libdivide within the libdivide namespace, and that goes in an anonymous namespace so that the functions are only visible to files that #include this header and don't get external linkage. At least that's the theory. */ -namespace { namespace libdivide { #endif -/* Explanation of "more" field: bit 6 is whether to use shift path. If we are using the shift path, bit 7 is whether the divisor is negative in the signed case; in the unsigned case it is 0. Bits 0-4 is shift value (for shift path or mult path). In 32 bit case, bit 5 is always 0. We use bit 7 as the "negative divisor indicator" so that we can use sign extension to efficiently go to a full-width -1. - - -u32: [0-4] shift value - [5] ignored - [6] add indicator - [7] shift path - -s32: [0-4] shift value - [5] shift path - [6] add indicator - [7] indicates negative divisor - -u64: [0-5] shift value - [6] add indicator - [7] shift path - -s64: [0-5] shift value - [6] add indicator - [7] indicates negative divisor - magic number of 0 indicates shift path (we ran out of bits!) -*/ - -enum { - LIBDIVIDE_32_SHIFT_MASK = 0x1F, - LIBDIVIDE_64_SHIFT_MASK = 0x3F, - LIBDIVIDE_ADD_MARKER = 0x40, - LIBDIVIDE_U32_SHIFT_PATH = 0x80, - LIBDIVIDE_U64_SHIFT_PATH = 0x80, - LIBDIVIDE_S32_SHIFT_PATH = 0x20, - LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 -}; - +// pack divider structs to prevent compilers from padding. +// This reduces memory usage by up to 43% when using a large +// array of libdivide dividers and improves performance +// by up to 10% because of reduced memory bandwidth. +#pragma pack(push, 1) struct libdivide_u32_t { uint32_t magic; @@ -133,497 +122,446 @@ struct libdivide_s64_t { uint8_t more; }; +struct libdivide_u32_branchfree_t { + uint32_t magic; + uint8_t more; +}; +struct libdivide_s32_branchfree_t { + int32_t magic; + uint8_t more; +}; -#ifndef LIBDIVIDE_API - #ifdef __cplusplus - /* In C++, we don't want our public functions to be static, because they are arguments to templates and static functions can't do that. They get internal linkage through virtue of the anonymous namespace. In C, they should be static. */ - #define LIBDIVIDE_API - #else - #define LIBDIVIDE_API static - #endif -#endif +struct libdivide_u64_branchfree_t { + uint64_t magic; + uint8_t more; +}; -#ifdef __APPLE__ -typedef signed long Int64; -typedef unsigned long UInt64; -#endif +struct libdivide_s64_branchfree_t { + int64_t magic; + uint8_t more; +}; -LIBDIVIDE_API struct libdivide_s32_t libdivide_s32_gen(int32_t y); -LIBDIVIDE_API struct libdivide_u32_t libdivide_u32_gen(uint32_t y); -LIBDIVIDE_API struct libdivide_s64_t libdivide_s64_gen(int64_t y); -LIBDIVIDE_API struct libdivide_u64_t libdivide_u64_gen(uint64_t y); -#if defined(__APPLE__) && defined(__cplusplus) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -LIBDIVIDE_API struct libdivide_s64_t libdivide_s64_gen(Int64 y) { return libdivide_s64_gen(int64_t(y)); }; -LIBDIVIDE_API struct libdivide_u64_t libdivide_u64_gen(UInt64 y) { return libdivide_u64_gen(uint64_t(y)); }; -#pragma GCC diagnostic pop -#endif +#pragma pack(pop) -LIBDIVIDE_API int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom); -LIBDIVIDE_API uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom); -LIBDIVIDE_API int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom); -LIBDIVIDE_API uint64_t libdivide_u64_do(uint64_t y, const struct libdivide_u64_t *denom); -#if defined(__APPLE__) && defined(__cplusplus) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -LIBDIVIDE_API Int64 libdivide_s64_do(Int64 numer, const struct libdivide_s64_t *denom) { return Int64(libdivide_s64_do(int64_t(numer), denom)); }; -LIBDIVIDE_API UInt64 libdivide_u64_do(UInt64 y, const struct libdivide_u64_t *denom) { return UInt64(libdivide_u64_do(uint64_t(y), denom)); }; -#pragma GCC diagnostic pop -#endif +// Explanation of the "more" field: +// +// * Bits 0-5 is the shift value (for shift path or mult path). +// * Bit 6 is the add indicator for mult path. +// * Bit 7 is set if the divisor is negative. We use bit 7 as the negative +// divisor indicator so that we can efficiently use sign extension to +// create a bitmask with all bits set to 1 (if the divisor is negative) +// or 0 (if the divisor is positive). +// +// u32: [0-4] shift value +// [5] ignored +// [6] add indicator +// magic number of 0 indicates shift path +// +// s32: [0-4] shift value +// [5] ignored +// [6] add indicator +// [7] indicates negative divisor +// magic number of 0 indicates shift path +// +// u64: [0-5] shift value +// [6] add indicator +// magic number of 0 indicates shift path +// +// s64: [0-5] shift value +// [6] add indicator +// [7] indicates negative divisor +// magic number of 0 indicates shift path +// +// In s32 and s64 branchfree modes, the magic number is negated according to +// whether the divisor is negated. In branchfree strategy, it is not negated. -LIBDIVIDE_API int libdivide_u32_get_algorithm(const struct libdivide_u32_t *denom); -LIBDIVIDE_API uint32_t libdivide_u32_do_alg0(uint32_t numer, const struct libdivide_u32_t *denom); -LIBDIVIDE_API uint32_t libdivide_u32_do_alg1(uint32_t numer, const struct libdivide_u32_t *denom); -LIBDIVIDE_API uint32_t libdivide_u32_do_alg2(uint32_t numer, const struct libdivide_u32_t *denom); +enum { + LIBDIVIDE_32_SHIFT_MASK = 0x1F, + LIBDIVIDE_64_SHIFT_MASK = 0x3F, + LIBDIVIDE_ADD_MARKER = 0x40, + LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 +}; -LIBDIVIDE_API int libdivide_u64_get_algorithm(const struct libdivide_u64_t *denom); -LIBDIVIDE_API uint64_t libdivide_u64_do_alg0(uint64_t numer, const struct libdivide_u64_t *denom); -LIBDIVIDE_API uint64_t libdivide_u64_do_alg1(uint64_t numer, const struct libdivide_u64_t *denom); -LIBDIVIDE_API uint64_t libdivide_u64_do_alg2(uint64_t numer, const struct libdivide_u64_t *denom); -#if defined(__APPLE__) && defined(__cplusplus) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -LIBDIVIDE_API UInt64 libdivide_u64_do_alg0(UInt64 numer, const struct libdivide_u64_t *denom) { return UInt64(libdivide_u64_do_alg0(uint64_t(numer), denom)); } -LIBDIVIDE_API UInt64 libdivide_u64_do_alg1(UInt64 numer, const struct libdivide_u64_t *denom) { return UInt64(libdivide_u64_do_alg1(uint64_t(numer), denom)); } -LIBDIVIDE_API UInt64 libdivide_u64_do_alg2(UInt64 numer, const struct libdivide_u64_t *denom) { return UInt64(libdivide_u64_do_alg2(uint64_t(numer), denom)); } -#pragma GCC diagnostic pop -#endif +static inline struct libdivide_s32_t libdivide_s32_gen(int32_t d); +static inline struct libdivide_u32_t libdivide_u32_gen(uint32_t d); +static inline struct libdivide_s64_t libdivide_s64_gen(int64_t d); +static inline struct libdivide_u64_t libdivide_u64_gen(uint64_t d); -LIBDIVIDE_API int libdivide_s32_get_algorithm(const struct libdivide_s32_t *denom); -LIBDIVIDE_API int32_t libdivide_s32_do_alg0(int32_t numer, const struct libdivide_s32_t *denom); -LIBDIVIDE_API int32_t libdivide_s32_do_alg1(int32_t numer, const struct libdivide_s32_t *denom); -LIBDIVIDE_API int32_t libdivide_s32_do_alg2(int32_t numer, const struct libdivide_s32_t *denom); -LIBDIVIDE_API int32_t libdivide_s32_do_alg3(int32_t numer, const struct libdivide_s32_t *denom); -LIBDIVIDE_API int32_t libdivide_s32_do_alg4(int32_t numer, const struct libdivide_s32_t *denom); +static inline struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d); +static inline struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d); +static inline struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d); +static inline struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d); -LIBDIVIDE_API int libdivide_s64_get_algorithm(const struct libdivide_s64_t *denom); -LIBDIVIDE_API int64_t libdivide_s64_do_alg0(int64_t numer, const struct libdivide_s64_t *denom); -LIBDIVIDE_API int64_t libdivide_s64_do_alg1(int64_t numer, const struct libdivide_s64_t *denom); -LIBDIVIDE_API int64_t libdivide_s64_do_alg2(int64_t numer, const struct libdivide_s64_t *denom); -LIBDIVIDE_API int64_t libdivide_s64_do_alg3(int64_t numer, const struct libdivide_s64_t *denom); -LIBDIVIDE_API int64_t libdivide_s64_do_alg4(int64_t numer, const struct libdivide_s64_t *denom); -#if defined(__APPLE__) && defined(__cplusplus) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -LIBDIVIDE_API Int64 libdivide_s64_do_alg0(Int64 numer, const struct libdivide_s64_t *denom) { return Int64(libdivide_s64_do_alg0(int64_t(numer), denom)); } -LIBDIVIDE_API Int64 libdivide_s64_do_alg1(Int64 numer, const struct libdivide_s64_t *denom) { return Int64(libdivide_s64_do_alg1(int64_t(numer), denom)); } -LIBDIVIDE_API Int64 libdivide_s64_do_alg2(Int64 numer, const struct libdivide_s64_t *denom) { return Int64(libdivide_s64_do_alg2(int64_t(numer), denom)); } -LIBDIVIDE_API Int64 libdivide_s64_do_alg3(Int64 numer, const struct libdivide_s64_t *denom) { return Int64(libdivide_s64_do_alg3(int64_t(numer), denom)); } -LIBDIVIDE_API Int64 libdivide_s64_do_alg4(Int64 numer, const struct libdivide_s64_t *denom) { return Int64(libdivide_s64_do_alg4(int64_t(numer), denom)); } -#pragma GCC diagnostic pop -#endif +static inline int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom); +static inline uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom); +static inline int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom); +static inline uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom); +static inline int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom); +static inline uint32_t libdivide_u32_branchfree_do(uint32_t numer, const struct libdivide_u32_branchfree_t *denom); +static inline int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom); +static inline uint64_t libdivide_u64_branchfree_do(uint64_t numer, const struct libdivide_u64_branchfree_t *denom); -#if LIBDIVIDE_USE_SSE2 -LIBDIVIDE_API __m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t * denom); -LIBDIVIDE_API __m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t * denom); -LIBDIVIDE_API __m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t * denom); -LIBDIVIDE_API __m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t * denom); - -LIBDIVIDE_API __m128i libdivide_u32_do_vector_alg0(__m128i numers, const struct libdivide_u32_t * denom); -LIBDIVIDE_API __m128i libdivide_u32_do_vector_alg1(__m128i numers, const struct libdivide_u32_t * denom); -LIBDIVIDE_API __m128i libdivide_u32_do_vector_alg2(__m128i numers, const struct libdivide_u32_t * denom); - -LIBDIVIDE_API __m128i libdivide_s32_do_vector_alg0(__m128i numers, const struct libdivide_s32_t * denom); -LIBDIVIDE_API __m128i libdivide_s32_do_vector_alg1(__m128i numers, const struct libdivide_s32_t * denom); -LIBDIVIDE_API __m128i libdivide_s32_do_vector_alg2(__m128i numers, const struct libdivide_s32_t * denom); -LIBDIVIDE_API __m128i libdivide_s32_do_vector_alg3(__m128i numers, const struct libdivide_s32_t * denom); -LIBDIVIDE_API __m128i libdivide_s32_do_vector_alg4(__m128i numers, const struct libdivide_s32_t * denom); - -LIBDIVIDE_API __m128i libdivide_u64_do_vector_alg0(__m128i numers, const struct libdivide_u64_t * denom); -LIBDIVIDE_API __m128i libdivide_u64_do_vector_alg1(__m128i numers, const struct libdivide_u64_t * denom); -LIBDIVIDE_API __m128i libdivide_u64_do_vector_alg2(__m128i numers, const struct libdivide_u64_t * denom); - -LIBDIVIDE_API __m128i libdivide_s64_do_vector_alg0(__m128i numers, const struct libdivide_s64_t * denom); -LIBDIVIDE_API __m128i libdivide_s64_do_vector_alg1(__m128i numers, const struct libdivide_s64_t * denom); -LIBDIVIDE_API __m128i libdivide_s64_do_vector_alg2(__m128i numers, const struct libdivide_s64_t * denom); -LIBDIVIDE_API __m128i libdivide_s64_do_vector_alg3(__m128i numers, const struct libdivide_s64_t * denom); -LIBDIVIDE_API __m128i libdivide_s64_do_vector_alg4(__m128i numers, const struct libdivide_s64_t * denom); -#endif - +static inline int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom); +static inline uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom); +static inline int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom); +static inline uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom); +static inline int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom); +static inline uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom); +static inline int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom); +static inline uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom); //////// Internal Utility Functions -static inline uint32_t libdivide__mullhi_u32(uint32_t x, uint32_t y) { +static inline uint32_t libdivide_mullhi_u32(uint32_t x, uint32_t y) { uint64_t xl = x, yl = y; uint64_t rl = xl * yl; return (uint32_t)(rl >> 32); } -static uint64_t libdivide__mullhi_u64(uint64_t x, uint64_t y) { -#if HAS_INT128_T +static inline int32_t libdivide_mullhi_s32(int32_t x, int32_t y) { + int64_t xl = x, yl = y; + int64_t rl = xl * yl; + // needs to be arithmetic shift + return (int32_t)(rl >> 32); +} + +static inline uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) { +#if defined(LIBDIVIDE_VC) && \ + defined(LIBDIVIDE_X86_64) + return __umulh(x, y); +#elif defined(HAS_INT128_T) __uint128_t xl = x, yl = y; __uint128_t rl = xl * yl; return (uint64_t)(rl >> 64); #else - //full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - const uint32_t mask = 0xFFFFFFFF; - const uint32_t x0 = (uint32_t)(x & mask), x1 = (uint32_t)(x >> 32); - const uint32_t y0 = (uint32_t)(y & mask), y1 = (uint32_t)(y >> 32); - const uint32_t x0y0_hi = libdivide__mullhi_u32(x0, y0); - const uint64_t x0y1 = x0 * (uint64_t)y1; - const uint64_t x1y0 = x1 * (uint64_t)y0; - const uint64_t x1y1 = x1 * (uint64_t)y1; - + // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) + uint32_t mask = 0xFFFFFFFF; + uint32_t x0 = (uint32_t)(x & mask); + uint32_t x1 = (uint32_t)(x >> 32); + uint32_t y0 = (uint32_t)(y & mask); + uint32_t y1 = (uint32_t)(y >> 32); + uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); + uint64_t x0y1 = x0 * (uint64_t)y1; + uint64_t x1y0 = x1 * (uint64_t)y0; + uint64_t x1y1 = x1 * (uint64_t)y1; uint64_t temp = x1y0 + x0y0_hi; - uint64_t temp_lo = temp & mask, temp_hi = temp >> 32; + uint64_t temp_lo = temp & mask; + uint64_t temp_hi = temp >> 32; + return x1y1 + temp_hi + ((temp_lo + x0y1) >> 32); #endif } -static inline int64_t libdivide__mullhi_s64(int64_t x, int64_t y) { -#if HAS_INT128_T +static inline int64_t libdivide_mullhi_s64(int64_t x, int64_t y) { +#if defined(LIBDIVIDE_VC) && \ + defined(LIBDIVIDE_X86_64) + return __mulh(x, y); +#elif defined(HAS_INT128_T) __int128_t xl = x, yl = y; __int128_t rl = xl * yl; return (int64_t)(rl >> 64); #else - //full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - const uint32_t mask = 0xFFFFFFFF; - const uint32_t x0 = (uint32_t)(x & mask), y0 = (uint32_t)(y & mask); - const int32_t x1 = (int32_t)(x >> 32), y1 = (int32_t)(y >> 32); - const uint32_t x0y0_hi = libdivide__mullhi_u32(x0, y0); - const int64_t t = x1*(int64_t)y0 + x0y0_hi; - const int64_t w1 = x0*(int64_t)y1 + (t & mask); - return x1*(int64_t)y1 + (t >> 32) + (w1 >> 32); + // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) + uint32_t mask = 0xFFFFFFFF; + uint32_t x0 = (uint32_t)(x & mask); + uint32_t y0 = (uint32_t)(y & mask); + int32_t x1 = (int32_t)(x >> 32); + int32_t y1 = (int32_t)(y >> 32); + uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); + int64_t t = x1 * (int64_t)y0 + x0y0_hi; + int64_t w1 = x0 * (int64_t)y1 + (t & mask); + + return x1 * (int64_t)y1 + (t >> 32) + (w1 >> 32); #endif } -#if LIBDIVIDE_USE_SSE2 - -static inline __m128i libdivide__u64_to_m128(uint64_t x) { -#if LIBDIVIDE_VC && ! _WIN64 - //64 bit windows doesn't seem to have an implementation of any of these load intrinsics, and 32 bit Visual C++ crashes - _declspec(align(16)) uint64_t temp[2] = {x, x}; - return _mm_load_si128((const __m128i*)temp); -#elif defined(__ICC) - uint64_t __attribute__((aligned(16))) temp[2] = {x,x}; - return _mm_load_si128((const __m128i*)temp); -#elif __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-narrowing" // narrowing from uint64_t (aka 'unsigned long') to 'long long' - // clang does not provide this intrinsic either - return (__m128i){x, x}; -#pragma clang diagnostic pop -#else - // everyone else gets it right - return _mm_set1_epi64x(x); -#endif -} - -static inline __m128i libdivide_get_FFFFFFFF00000000(void) { - //returns the same as _mm_set1_epi64(0xFFFFFFFF00000000ULL) without touching memory - __m128i result = _mm_set1_epi8(-1); //optimizes to pcmpeqd on OS X - return _mm_slli_epi64(result, 32); -} - -static inline __m128i libdivide_get_00000000FFFFFFFF(void) { - //returns the same as _mm_set1_epi64(0x00000000FFFFFFFFULL) without touching memory - __m128i result = _mm_set1_epi8(-1); //optimizes to pcmpeqd on OS X - result = _mm_srli_epi64(result, 32); - return result; -} - -#if __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wuninitialized" -#endif -static inline __m128i libdivide_get_0000FFFF(void) { - //returns the same as _mm_set1_epi32(0x0000FFFFULL) without touching memory - __m128i result; //we don't care what its contents are - result = _mm_cmpeq_epi8(result, result); //all 1s - result = _mm_srli_epi32(result, 16); - return result; -} -#if __clang__ -#pragma clang diagnostic pop -#endif - -/// This is a bug in gcc-8, _MM_SHUFFLE was forgotten, though in trunk it is ok https://github.com/gcc-mirror/gcc/blob/master/gcc/config/rs6000/xmmintrin.h#L61 -#if defined(__PPC__) -#ifndef _MM_SHUFFLE -#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) -#endif -#endif - -static inline __m128i libdivide_s64_signbits(__m128i v) { - //we want to compute v >> 63, that is, _mm_srai_epi64(v, 63). But there is no 64 bit shift right arithmetic instruction in SSE2. So we have to fake it by first duplicating the high 32 bit values, and then using a 32 bit shift. Another option would be to use _mm_srli_epi64(v, 63) and then subtract that from 0, but that approach appears to be substantially slower for unknown reasons - __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -/* Returns an __m128i whose low 32 bits are equal to amt and has zero elsewhere. */ -static inline __m128i libdivide_u32_to_m128i(uint32_t amt) { - return _mm_set_epi32(0, 0, 0, amt); -} - -static inline __m128i libdivide_s64_shift_right_vector(__m128i v, int amt) { - //implementation of _mm_sra_epi64. Here we have two 64 bit values which are shifted right to logically become (64 - amt) values, and are then sign extended from a (64 - amt) bit number. - const int b = 64 - amt; - __m128i m = libdivide__u64_to_m128(1ULL << (b - 1)); - __m128i x = _mm_srl_epi64(v, libdivide_u32_to_m128i(amt)); - __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m); //result = x^m - m - return result; -} - -/* Here, b is assumed to contain one 32 bit value repeated four times. If it did not, the function would not work. */ -static inline __m128i libdivide__mullhi_u32_flat_vector(__m128i a, __m128i b) { - __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); - __m128i a1X3X = _mm_srli_epi64(a, 32); - __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), libdivide_get_FFFFFFFF00000000()); - return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); // = hi_product_0123 -} - - -/* Here, y is assumed to contain one 64 bit value repeated twice. */ -static inline __m128i libdivide_mullhi_u64_flat_vector(__m128i x, __m128i y) { - //full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - const __m128i mask = libdivide_get_00000000FFFFFFFF(); - const __m128i x0 = _mm_and_si128(x, mask), x1 = _mm_srli_epi64(x, 32); //x0 is low half of 2 64 bit values, x1 is high half in low slots - const __m128i y0 = _mm_and_si128(y, mask), y1 = _mm_srli_epi64(y, 32); - const __m128i x0y0_hi = _mm_srli_epi64(_mm_mul_epu32(x0, y0), 32); //x0 happens to have the low half of the two 64 bit values in 32 bit slots 0 and 2, so _mm_mul_epu32 computes their full product, and then we shift right by 32 to get just the high values - const __m128i x0y1 = _mm_mul_epu32(x0, y1); - const __m128i x1y0 = _mm_mul_epu32(x1, y0); - const __m128i x1y1 = _mm_mul_epu32(x1, y1); - - const __m128i temp = _mm_add_epi64(x1y0, x0y0_hi); - __m128i temp_lo = _mm_and_si128(temp, mask), temp_hi = _mm_srli_epi64(temp, 32); - temp_lo = _mm_srli_epi64(_mm_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm_add_epi64(x1y1, temp_hi); - - return _mm_add_epi64(temp_lo, temp_hi); -} - -/* y is one 64 bit value repeated twice */ -static inline __m128i libdivide_mullhi_s64_flat_vector(__m128i x, __m128i y) { - __m128i p = libdivide_mullhi_u64_flat_vector(x, y); - __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y); - p = _mm_sub_epi64(p, t1); - __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x); - p = _mm_sub_epi64(p, t2); - return p; -} - -#ifdef LIBDIVIDE_USE_SSE4_1 - -/* b is one 32 bit value repeated four times. */ -static inline __m128i libdivide_mullhi_s32_flat_vector(__m128i a, __m128i b) { - __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epi32(a, b), 32); - __m128i a1X3X = _mm_srli_epi64(a, 32); - __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epi32(a1X3X, b), libdivide_get_FFFFFFFF00000000()); - return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); // = hi_product_0123 -} - -#else - -/* SSE2 does not have a signed multiplication instruction, but we can convert unsigned to signed pretty efficiently. Again, b is just a 32 bit value repeated four times. */ -static inline __m128i libdivide_mullhi_s32_flat_vector(__m128i a, __m128i b) { - __m128i p = libdivide__mullhi_u32_flat_vector(a, b); - __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b); //t1 = (a >> 31) & y, arithmetic shift - __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a); - p = _mm_sub_epi32(p, t1); - p = _mm_sub_epi32(p, t2); - return p; -} -#endif -#endif - -static inline int32_t libdivide__count_trailing_zeros32(uint32_t val) { -#if __GNUC__ || __has_builtin(__builtin_ctz) - /* Fast way to count trailing zeros */ - return __builtin_ctz(val); -#elif LIBDIVIDE_VC - unsigned long result; - if (_BitScanForward(&result, val)) { - return result; - } - return 0; -#else - /* Dorky way to count trailing zeros. Note that this hangs for val = 0! */ - int32_t result = 0; - val = (val ^ (val - 1)) >> 1; // Set v's trailing 0s to 1s and zero rest - while (val) { - val >>= 1; - result++; - } - return result; -#endif -} - -static inline int32_t libdivide__count_trailing_zeros64(uint64_t val) { -#if __LP64__ && (__GNUC__ || __has_builtin(__builtin_ctzll)) - /* Fast way to count trailing zeros. Note that we disable this in 32 bit because gcc does something horrible - it calls through to a dynamically bound function. */ - return __builtin_ctzll(val); -#elif LIBDIVIDE_VC && _WIN64 - unsigned long result; - if (_BitScanForward64(&result, val)) { - return result; - } - return 0; -#else - /* Pretty good way to count trailing zeros. Note that this hangs for val = 0! */ - uint32_t lo = val & 0xFFFFFFFF; - if (lo != 0) return libdivide__count_trailing_zeros32(lo); - return 32 + libdivide__count_trailing_zeros32(val >> 32); -#endif -} - -static inline int32_t libdivide__count_leading_zeros32(uint32_t val) { -#if __GNUC__ || __has_builtin(__builtin_clzll) - /* Fast way to count leading zeros */ +static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { +#if defined(__GNUC__) || \ + __has_builtin(__builtin_clz) + // Fast way to count leading zeros return __builtin_clz(val); -#elif LIBDIVIDE_VC +#elif defined(LIBDIVIDE_VC) unsigned long result; if (_BitScanReverse(&result, val)) { return 31 - result; } return 0; #else - /* Dorky way to count leading zeros. Note that this hangs for val = 0! */ int32_t result = 0; - while (! (val & (1U << 31))) { - val <<= 1; + uint32_t hi = 1U << 31; + for (; ~val & hi; hi >>= 1) { result++; } return result; #endif } -static inline int32_t libdivide__count_leading_zeros64(uint64_t val) { -#if __GNUC__ || __has_builtin(__builtin_clzll) - /* Fast way to count leading zeros */ +static inline int32_t libdivide_count_leading_zeros64(uint64_t val) { +#if defined(__GNUC__) || \ + __has_builtin(__builtin_clzll) + // Fast way to count leading zeros return __builtin_clzll(val); -#elif LIBDIVIDE_VC && _WIN64 +#elif defined(LIBDIVIDE_VC) && defined(_WIN64) unsigned long result; if (_BitScanReverse64(&result, val)) { return 63 - result; } return 0; #else - /* Dorky way to count leading zeros. Note that this hangs for val = 0! */ - int32_t result = 0; - while (! (val & (1ULL << 63))) { - val <<= 1; - result++; - } - return result; + uint32_t hi = val >> 32; + uint32_t lo = val & 0xFFFFFFFF; + if (hi != 0) return libdivide_count_leading_zeros32(hi); + return 32 + libdivide_count_leading_zeros32(lo); #endif } -//libdivide_64_div_32_to_32: divides a 64 bit uint {u1, u0} by a 32 bit uint {v}. The result must fit in 32 bits. Returns the quotient directly and the remainder in *r -#if (LIBDIVIDE_IS_i386 || LIBDIVIDE_IS_X86_64) && LIBDIVIDE_GCC_STYLE_ASM -static uint32_t libdivide_64_div_32_to_32(uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { +// libdivide_64_div_32_to_32: divides a 64-bit uint {u1, u0} by a 32-bit +// uint {v}. The result must fit in 32 bits. +// Returns the quotient directly and the remainder in *r +static inline uint32_t libdivide_64_div_32_to_32(uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { +#if (defined(LIBDIVIDE_i386) || defined(LIBDIVIDE_X86_64)) && \ + defined(LIBDIVIDE_GCC_STYLE_ASM) uint32_t result; __asm__("divl %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1) ); return result; -} #else -static uint32_t libdivide_64_div_32_to_32(uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { - uint64_t n = (((uint64_t)u1) << 32) | u0; + uint64_t n = ((uint64_t)u1 << 32) | u0; uint32_t result = (uint32_t)(n / v); *r = (uint32_t)(n - result * (uint64_t)v); return result; -} #endif +} -#if LIBDIVIDE_IS_X86_64 && LIBDIVIDE_GCC_STYLE_ASM +// libdivide_128_div_64_to_64: divides a 128-bit uint {u1, u0} by a 64-bit +// uint {v}. The result must fit in 64 bits. +// Returns the quotient directly and the remainder in *r static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) { - //u0 -> rax - //u1 -> rdx - //divq +#if defined(LIBDIVIDE_X86_64) && \ + defined(LIBDIVIDE_GCC_STYLE_ASM) uint64_t result; __asm__("divq %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1) ); return result; - -} +#elif defined(HAS_INT128_T) && \ + defined(HAS_INT128_DIV) + __uint128_t n = ((__uint128_t)u1 << 64) | u0; + uint64_t result = (uint64_t)(n / v); + *r = (uint64_t)(n - result * (__uint128_t)v); + return result; #else + // Code taken from Hacker's Delight: + // http://www.hackersdelight.org/HDcode/divlu.c. + // License permits inclusion here per: + // http://www.hackersdelight.org/permissions.htm -/* Code taken from Hacker's Delight, http://www.hackersdelight.org/HDcode/divlu.c . License permits inclusion here per http://www.hackersdelight.org/permissions.htm - */ -static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) { - const uint64_t b = (1ULL << 32); // Number base (16 bits). - uint64_t un1, un0, // Norm. dividend LSD's. - vn1, vn0, // Norm. divisor digits. - q1, q0, // Quotient digits. - un64, un21, un10,// Dividend digit pairs. - rhat; // A remainder. - int s; // Shift amount for norm. + const uint64_t b = (1ULL << 32); // Number base (32 bits) + uint64_t un1, un0; // Norm. dividend LSD's + uint64_t vn1, vn0; // Norm. divisor digits + uint64_t q1, q0; // Quotient digits + uint64_t un64, un21, un10; // Dividend digit pairs + uint64_t rhat; // A remainder + int32_t s; // Shift amount for norm - if (u1 >= v) { // If overflow, set rem. - if (r != NULL) // to an impossible value, - *r = (uint64_t)(-1); // and return the largest - return (uint64_t)(-1);} // possible quotient. + // If overflow, set rem. to an impossible value, + // and return the largest possible quotient + if (u1 >= v) { + *r = (uint64_t) -1; + return (uint64_t) -1; + } - /* count leading zeros */ - s = libdivide__count_leading_zeros64(v); // 0 <= s <= 63. + // count leading zeros + s = libdivide_count_leading_zeros64(v); if (s > 0) { - v = v << s; // Normalize divisor. - un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31)); - un10 = u0 << s; // Shift dividend left. + // Normalize divisor + v = v << s; + un64 = (u1 << s) | (u0 >> (64 - s)); + un10 = u0 << s; // Shift dividend left } else { - // Avoid undefined behavior. - un64 = u1 | u0; + // Avoid undefined behavior of (u0 >> 64). + // The behavior is undefined if the right operand is + // negative, or greater than or equal to the length + // in bits of the promoted left operand. + un64 = u1; un10 = u0; } - vn1 = v >> 32; // Break divisor up into - vn0 = v & 0xFFFFFFFF; // two 32-bit digits. + // Break divisor up into two 32-bit digits + vn1 = v >> 32; + vn0 = v & 0xFFFFFFFF; - un1 = un10 >> 32; // Break right half of - un0 = un10 & 0xFFFFFFFF; // dividend into two digits. + // Break right half of dividend into two digits + un1 = un10 >> 32; + un0 = un10 & 0xFFFFFFFF; - q1 = un64/vn1; // Compute the first - rhat = un64 - q1*vn1; // quotient digit, q1. -again1: - if (q1 >= b || q1*vn0 > b*rhat + un1) { + // Compute the first quotient digit, q1 + q1 = un64 / vn1; + rhat = un64 - q1 * vn1; + + while (q1 >= b || q1 * vn0 > b * rhat + un1) { q1 = q1 - 1; rhat = rhat + vn1; - if (rhat < b) goto again1;} + if (rhat >= b) + break; + } - un21 = un64*b + un1 - q1*v; // Multiply and subtract. + // Multiply and subtract + un21 = un64 * b + un1 - q1 * v; - q0 = un21/vn1; // Compute the second - rhat = un21 - q0*vn1; // quotient digit, q0. -again2: - if (q0 >= b || q0*vn0 > b*rhat + un0) { + // Compute the second quotient digit + q0 = un21 / vn1; + rhat = un21 - q0 * vn1; + + while (q0 >= b || q0 * vn0 > b * rhat + un0) { q0 = q0 - 1; rhat = rhat + vn1; - if (rhat < b) goto again2;} + if (rhat >= b) + break; + } - if (r != NULL) // If remainder is wanted, - *r = (un21*b + un0 - q0*v) >> s; // return it. - return q1*b + q0; + *r = (un21 * b + un0 - q0 * v) >> s; + return q1 * b + q0; +#endif } -#endif -#if LIBDIVIDE_ASSERTIONS_ON -#define LIBDIVIDE_ASSERT(x) do { if (! (x)) { fprintf(stderr, "Assertion failure on line %ld: %s\n", (long)__LINE__, #x); exit(-1); } } while (0) +// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0) +static inline void libdivide_u128_shift(uint64_t *u1, uint64_t *u0, int32_t signed_shift) { + if (signed_shift > 0) { + uint32_t shift = signed_shift; + *u1 <<= shift; + *u1 |= *u0 >> (64 - shift); + *u0 <<= shift; + } + else if (signed_shift < 0) { + uint32_t shift = -signed_shift; + *u0 >>= shift; + *u0 |= *u1 << (64 - shift); + *u1 >>= shift; + } +} + +// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder. +static uint64_t libdivide_128_div_128_to_64(uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) { +#if defined(HAS_INT128_T) && \ + defined(HAS_INT128_DIV) + __uint128_t ufull = u_hi; + __uint128_t vfull = v_hi; + ufull = (ufull << 64) | u_lo; + vfull = (vfull << 64) | v_lo; + uint64_t res = (uint64_t)(ufull / vfull); + __uint128_t remainder = ufull - (vfull * res); + *r_lo = (uint64_t)remainder; + *r_hi = (uint64_t)(remainder >> 64); + return res; #else -#define LIBDIVIDE_ASSERT(x) -#endif + // Adapted from "Unsigned Doubleword Division" in Hacker's Delight + // We want to compute u / v + typedef struct { uint64_t hi; uint64_t lo; } u128_t; + u128_t u = {u_hi, u_lo}; + u128_t v = {v_hi, v_lo}; -#ifndef LIBDIVIDE_HEADER_ONLY + if (v.hi == 0) { + // divisor v is a 64 bit value, so we just need one 128/64 division + // Note that we are simpler than Hacker's Delight here, because we know + // the quotient fits in 64 bits whereas Hacker's Delight demands a full + // 128 bit quotient + *r_hi = 0; + return libdivide_128_div_64_to_64(u.hi, u.lo, v.lo, r_lo); + } + // Here v >= 2**64 + // We know that v.hi != 0, so count leading zeros is OK + // We have 0 <= n <= 63 + uint32_t n = libdivide_count_leading_zeros64(v.hi); + + // Normalize the divisor so its MSB is 1 + u128_t v1t = v; + libdivide_u128_shift(&v1t.hi, &v1t.lo, n); + uint64_t v1 = v1t.hi; // i.e. v1 = v1t >> 64 + + // To ensure no overflow + u128_t u1 = u; + libdivide_u128_shift(&u1.hi, &u1.lo, -1); + + // Get quotient from divide unsigned insn. + uint64_t rem_ignored; + uint64_t q1 = libdivide_128_div_64_to_64(u1.hi, u1.lo, v1, &rem_ignored); + + // Undo normalization and division of u by 2. + u128_t q0 = {0, q1}; + libdivide_u128_shift(&q0.hi, &q0.lo, n); + libdivide_u128_shift(&q0.hi, &q0.lo, -63); + + // Make q0 correct or too small by 1 + // Equivalent to `if (q0 != 0) q0 = q0 - 1;` + if (q0.hi != 0 || q0.lo != 0) { + q0.hi -= (q0.lo == 0); // borrow + q0.lo -= 1; + } + + // Now q0 is correct. + // Compute q0 * v as q0v + // = (q0.hi << 64 + q0.lo) * (v.hi << 64 + v.lo) + // = (q0.hi * v.hi << 128) + (q0.hi * v.lo << 64) + + // (q0.lo * v.hi << 64) + q0.lo * v.lo) + // Each term is 128 bit + // High half of full product (upper 128 bits!) are dropped + u128_t q0v = {0, 0}; + q0v.hi = q0.hi*v.lo + q0.lo*v.hi + libdivide_mullhi_u64(q0.lo, v.lo); + q0v.lo = q0.lo*v.lo; + + // Compute u - q0v as u_q0v + // This is the remainder + u128_t u_q0v = u; + u_q0v.hi -= q0v.hi + (u.lo < q0v.lo); // second term is borrow + u_q0v.lo -= q0v.lo; + + // Check if u_q0v >= v + // This checks if our remainder is larger than the divisor + if ((u_q0v.hi > v.hi) || + (u_q0v.hi == v.hi && u_q0v.lo >= v.lo)) { + // Increment q0 + q0.lo += 1; + q0.hi += (q0.lo == 0); // carry + + // Subtract v from remainder + u_q0v.hi -= v.hi + (u_q0v.lo < v.lo); + u_q0v.lo -= v.lo; + } + + *r_hi = u_q0v.hi; + *r_lo = u_q0v.lo; + + LIBDIVIDE_ASSERT(q0.hi == 0); + return q0.lo; +#endif +} ////////// UINT32 -struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { - struct libdivide_u32_t result; - if ((d & (d - 1)) == 0) { - result.magic = 0; - result.more = libdivide__count_trailing_zeros32(d) | LIBDIVIDE_U32_SHIFT_PATH; +static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); } - else { - const uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(d); + struct libdivide_u32_t result; + uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(d); + + // Power of 2 + if ((d & (d - 1)) == 0) { + // We need to subtract 1 from the shift value in case of an unsigned + // branchfree divider because there is a hardcoded right shift by 1 + // in its division algorithm. Because of this we also need to add back + // 1 in its recovery algorithm. + result.magic = 0; + result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); + } else { uint8_t more; uint32_t rem, proposed_m; proposed_m = libdivide_64_div_32_to_32(1U << floor_log_2_d, 0, d, &rem); @@ -631,570 +569,1358 @@ struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { LIBDIVIDE_ASSERT(rem > 0 && rem < d); const uint32_t e = d - rem; - /* This power works if e < 2**floor_log_2_d. */ - if (e < (1U << floor_log_2_d)) { - /* This power works */ + // This power works if e < 2**floor_log_2_d. + if (!branchfree && (e < (1U << floor_log_2_d))) { + // This power works more = floor_log_2_d; - } - else { - /* We have to use the general 33-bit algorithm. We need to compute (2**power) / d. However, we already have (2**(power-1))/d and its remainder. By doubling both, and then correcting the remainder, we can compute the larger division. */ - proposed_m += proposed_m; //don't care about overflow here - in fact, we expect it + } else { + // We have to use the general 33-bit algorithm. We need to compute + // (2**power) / d. However, we already have (2**(power-1))/d and + // its remainder. By doubling both, and then correcting the + // remainder, we can compute the larger division. + // don't care about overflow here - in fact, we expect it + proposed_m += proposed_m; const uint32_t twice_rem = rem + rem; if (twice_rem >= d || twice_rem < rem) proposed_m += 1; more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; } result.magic = 1 + proposed_m; result.more = more; - //result.more's shift should in general be ceil_log_2_d. But if we used the smaller power, we subtract one from the shift because we're using the smaller power. If we're using the larger power, we subtract one from the shift because it's taken care of by the add indicator. So floor_log_2_d happens to be correct in both cases. - + // result.more's shift should in general be ceil_log_2_d. But if we + // used the smaller power, we subtract one from the shift because we're + // using the smaller power. If we're using the larger power, we + // subtract one from the shift because it's taken care of by the add + // indicator. So floor_log_2_d happens to be correct in both cases. } return result; } +struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { + return libdivide_internal_u32_gen(d, 0); +} + +struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) { + if (d == 1) { + LIBDIVIDE_ERROR("branchfree divider must be != 1"); + } + struct libdivide_u32_t tmp = libdivide_internal_u32_gen(d, 1); + struct libdivide_u32_branchfree_t ret = {tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_32_SHIFT_MASK)}; + return ret; +} + uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { uint8_t more = denom->more; - if (more & LIBDIVIDE_U32_SHIFT_PATH) { - return numer >> (more & LIBDIVIDE_32_SHIFT_MASK); + if (!denom->magic) { + return numer >> more; } else { - uint32_t q = libdivide__mullhi_u32(denom->magic, numer); + uint32_t q = libdivide_mullhi_u32(denom->magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { uint32_t t = ((numer - q) >> 1) + q; return t >> (more & LIBDIVIDE_32_SHIFT_MASK); } else { - return q >> more; //all upper bits are 0 - don't need to mask them off + // All upper bits are 0, + // don't need to mask them off. + return q >> more; } } } - -int libdivide_u32_get_algorithm(const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (more & LIBDIVIDE_U32_SHIFT_PATH) return 0; - else if (! (more & LIBDIVIDE_ADD_MARKER)) return 1; - else return 2; -} - -uint32_t libdivide_u32_do_alg0(uint32_t numer, const struct libdivide_u32_t *denom) { - return numer >> (denom->more & LIBDIVIDE_32_SHIFT_MASK); -} - -uint32_t libdivide_u32_do_alg1(uint32_t numer, const struct libdivide_u32_t *denom) { - uint32_t q = libdivide__mullhi_u32(denom->magic, numer); - return q >> denom->more; -} - -uint32_t libdivide_u32_do_alg2(uint32_t numer, const struct libdivide_u32_t *denom) { - // denom->add != 0 - uint32_t q = libdivide__mullhi_u32(denom->magic, numer); +uint32_t libdivide_u32_branchfree_do(uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { + uint32_t q = libdivide_mullhi_u32(denom->magic, numer); uint32_t t = ((numer - q) >> 1) + q; - return t >> (denom->more & LIBDIVIDE_32_SHIFT_MASK); + return t >> denom->more; } - - - -#if LIBDIVIDE_USE_SSE2 -__m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom) { +uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom) { uint8_t more = denom->more; - if (more & LIBDIVIDE_U32_SHIFT_PATH) { - return _mm_srl_epi32(numers, libdivide_u32_to_m128i(more & LIBDIVIDE_32_SHIFT_MASK)); - } - else { - __m128i q = libdivide__mullhi_u32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - //uint32_t t = ((numer - q) >> 1) + q; - //return t >> denom->shift; - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srl_epi32(t, libdivide_u32_to_m128i(more & LIBDIVIDE_32_SHIFT_MASK)); + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - } - else { - //q >> denom->shift - return _mm_srl_epi32(q, libdivide_u32_to_m128i(more)); - } + if (!denom->magic) { + return 1U << shift; + } else if (!(more & LIBDIVIDE_ADD_MARKER)) { + // We compute q = n/d = n*m / 2^(32 + shift) + // Therefore we have d = 2^(32 + shift) / m + // We need to ceil it. + // We know d is not a power of 2, so m is not a power of 2, + // so we can just add 1 to the floor + uint32_t hi_dividend = 1U << shift; + uint32_t rem_ignored; + return 1 + libdivide_64_div_32_to_32(hi_dividend, 0, denom->magic, &rem_ignored); + } else { + // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). + // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now + // Also note that shift may be as high as 31, so shift + 1 will + // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and + // then double the quotient and remainder. + uint64_t half_n = 1ULL << (32 + shift); + uint64_t d = (1ULL << 32) | denom->magic; + // Note that the quotient is guaranteed <= 32 bits, but the remainder + // may need 33! + uint32_t half_q = (uint32_t)(half_n / d); + uint64_t rem = half_n % d; + // We computed 2^(32+shift)/(m+2^32) + // Need to double it, and then add 1 to the quotient if doubling th + // remainder would increase the quotient. + // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits + uint32_t full_q = half_q + half_q + ((rem<<1) >= d); + + // We rounded down in gen (hence +1) + return full_q + 1; } } -__m128i libdivide_u32_do_vector_alg0(__m128i numers, const struct libdivide_u32_t *denom) { - return _mm_srl_epi32(numers, libdivide_u32_to_m128i(denom->more & LIBDIVIDE_32_SHIFT_MASK)); -} +uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; -__m128i libdivide_u32_do_vector_alg1(__m128i numers, const struct libdivide_u32_t *denom) { - __m128i q = libdivide__mullhi_u32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - return _mm_srl_epi32(q, libdivide_u32_to_m128i(denom->more)); -} + if (!denom->magic) { + return 1U << (shift + 1); + } else { + // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). + // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now + // Also note that shift may be as high as 31, so shift + 1 will + // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and + // then double the quotient and remainder. + uint64_t half_n = 1ULL << (32 + shift); + uint64_t d = (1ULL << 32) | denom->magic; + // Note that the quotient is guaranteed <= 32 bits, but the remainder + // may need 33! + uint32_t half_q = (uint32_t)(half_n / d); + uint64_t rem = half_n % d; + // We computed 2^(32+shift)/(m+2^32) + // Need to double it, and then add 1 to the quotient if doubling th + // remainder would increase the quotient. + // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits + uint32_t full_q = half_q + half_q + ((rem<<1) >= d); -__m128i libdivide_u32_do_vector_alg2(__m128i numers, const struct libdivide_u32_t *denom) { - __m128i q = libdivide__mullhi_u32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srl_epi32(t, libdivide_u32_to_m128i(denom->more & LIBDIVIDE_32_SHIFT_MASK)); + // We rounded down in gen (hence +1) + return full_q + 1; + } } -#endif - /////////// UINT64 -struct libdivide_u64_t libdivide_u64_gen(uint64_t d) { - struct libdivide_u64_t result; - if ((d & (d - 1)) == 0) { - result.more = libdivide__count_trailing_zeros64(d) | LIBDIVIDE_U64_SHIFT_PATH; - result.magic = 0; +static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); } - else { - const uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(d); + struct libdivide_u64_t result; + uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(d); + + // Power of 2 + if ((d & (d - 1)) == 0) { + // We need to subtract 1 from the shift value in case of an unsigned + // branchfree divider because there is a hardcoded right shift by 1 + // in its division algorithm. Because of this we also need to add back + // 1 in its recovery algorithm. + result.magic = 0; + result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); + } else { uint64_t proposed_m, rem; uint8_t more; - proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); //== (1 << (64 + floor_log_2_d)) / d + // (1 << (64 + floor_log_2_d)) / d + proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); LIBDIVIDE_ASSERT(rem > 0 && rem < d); const uint64_t e = d - rem; - /* This power works if e < 2**floor_log_2_d. */ - if (e < (1ULL << floor_log_2_d)) { - /* This power works */ + // This power works if e < 2**floor_log_2_d. + if (!branchfree && e < (1ULL << floor_log_2_d)) { + // This power works more = floor_log_2_d; - } - else { - /* We have to use the general 65-bit algorithm. We need to compute (2**power) / d. However, we already have (2**(power-1))/d and its remainder. By doubling both, and then correcting the remainder, we can compute the larger division. */ - proposed_m += proposed_m; //don't care about overflow here - in fact, we expect it + } else { + // We have to use the general 65-bit algorithm. We need to compute + // (2**power) / d. However, we already have (2**(power-1))/d and + // its remainder. By doubling both, and then correcting the + // remainder, we can compute the larger division. + // don't care about overflow here - in fact, we expect it + proposed_m += proposed_m; const uint64_t twice_rem = rem + rem; if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; } result.magic = 1 + proposed_m; result.more = more; - //result.more's shift should in general be ceil_log_2_d. But if we used the smaller power, we subtract one from the shift because we're using the smaller power. If we're using the larger power, we subtract one from the shift because it's taken care of by the add indicator. So floor_log_2_d happens to be correct in both cases, which is why we do it outside of the if statement. + // result.more's shift should in general be ceil_log_2_d. But if we + // used the smaller power, we subtract one from the shift because we're + // using the smaller power. If we're using the larger power, we + // subtract one from the shift because it's taken care of by the add + // indicator. So floor_log_2_d happens to be correct in both cases, + // which is why we do it outside of the if statement. } return result; } +struct libdivide_u64_t libdivide_u64_gen(uint64_t d) { + return libdivide_internal_u64_gen(d, 0); +} + +struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) { + if (d == 1) { + LIBDIVIDE_ERROR("branchfree divider must be != 1"); + } + struct libdivide_u64_t tmp = libdivide_internal_u64_gen(d, 1); + struct libdivide_u64_branchfree_t ret = {tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_64_SHIFT_MASK)}; + return ret; +} + uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { uint8_t more = denom->more; - if (more & LIBDIVIDE_U64_SHIFT_PATH) { - return numer >> (more & LIBDIVIDE_64_SHIFT_MASK); + if (!denom->magic) { + return numer >> more; } else { - uint64_t q = libdivide__mullhi_u64(denom->magic, numer); + uint64_t q = libdivide_mullhi_u64(denom->magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { uint64_t t = ((numer - q) >> 1) + q; return t >> (more & LIBDIVIDE_64_SHIFT_MASK); } else { - return q >> more; //all upper bits are 0 - don't need to mask them off + // All upper bits are 0, + // don't need to mask them off. + return q >> more; } } } - -int libdivide_u64_get_algorithm(const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (more & LIBDIVIDE_U64_SHIFT_PATH) return 0; - else if (! (more & LIBDIVIDE_ADD_MARKER)) return 1; - else return 2; -} - -uint64_t libdivide_u64_do_alg0(uint64_t numer, const struct libdivide_u64_t *denom) { - return numer >> (denom->more & LIBDIVIDE_64_SHIFT_MASK); -} - -uint64_t libdivide_u64_do_alg1(uint64_t numer, const struct libdivide_u64_t *denom) { - uint64_t q = libdivide__mullhi_u64(denom->magic, numer); - return q >> denom->more; -} - -uint64_t libdivide_u64_do_alg2(uint64_t numer, const struct libdivide_u64_t *denom) { - uint64_t q = libdivide__mullhi_u64(denom->magic, numer); +uint64_t libdivide_u64_branchfree_do(uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { + uint64_t q = libdivide_mullhi_u64(denom->magic, numer); uint64_t t = ((numer - q) >> 1) + q; - return t >> (denom->more & LIBDIVIDE_64_SHIFT_MASK); + return t >> denom->more; } -#if LIBDIVIDE_USE_SSE2 -__m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t * denom) { +uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom) { uint8_t more = denom->more; - if (more & LIBDIVIDE_U64_SHIFT_PATH) { - return _mm_srl_epi64(numers, libdivide_u32_to_m128i(more & LIBDIVIDE_64_SHIFT_MASK)); - } - else { - __m128i q = libdivide_mullhi_u64_flat_vector(numers, libdivide__u64_to_m128(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - //uint32_t t = ((numer - q) >> 1) + q; - //return t >> denom->shift; - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srl_epi64(t, libdivide_u32_to_m128i(more & LIBDIVIDE_64_SHIFT_MASK)); - } - else { - //q >> denom->shift - return _mm_srl_epi64(q, libdivide_u32_to_m128i(more)); - } + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + + if (!denom->magic) { + return 1ULL << shift; + } else if (!(more & LIBDIVIDE_ADD_MARKER)) { + // We compute q = n/d = n*m / 2^(64 + shift) + // Therefore we have d = 2^(64 + shift) / m + // We need to ceil it. + // We know d is not a power of 2, so m is not a power of 2, + // so we can just add 1 to the floor + uint64_t hi_dividend = 1ULL << shift; + uint64_t rem_ignored; + return 1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored); + } else { + // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). + // Notice (m + 2^64) is a 65 bit number. This gets hairy. See + // libdivide_u32_recover for more on what we do here. + // TODO: do something better than 128 bit math + + // Full n is a (potentially) 129 bit value + // half_n is a 128 bit value + // Compute the hi half of half_n. Low half is 0. + uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; + // d is a 65 bit value. The high bit is always set to 1. + const uint64_t d_hi = 1, d_lo = denom->magic; + // Note that the quotient is guaranteed <= 64 bits, + // but the remainder may need 65! + uint64_t r_hi, r_lo; + uint64_t half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); + // We computed 2^(64+shift)/(m+2^64) + // Double the remainder ('dr') and check if that is larger than d + // Note that d is a 65 bit value, so r1 is small and so r1 + r1 + // cannot overflow + uint64_t dr_lo = r_lo + r_lo; + uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry + int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); + uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); + return full_q + 1; } } -__m128i libdivide_u64_do_vector_alg0(__m128i numers, const struct libdivide_u64_t *denom) { - return _mm_srl_epi64(numers, libdivide_u32_to_m128i(denom->more & LIBDIVIDE_64_SHIFT_MASK)); +uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + + if (!denom->magic) { + return 1ULL << (shift + 1); + } else { + // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). + // Notice (m + 2^64) is a 65 bit number. This gets hairy. See + // libdivide_u32_recover for more on what we do here. + // TODO: do something better than 128 bit math + + // Full n is a (potentially) 129 bit value + // half_n is a 128 bit value + // Compute the hi half of half_n. Low half is 0. + uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; + // d is a 65 bit value. The high bit is always set to 1. + const uint64_t d_hi = 1, d_lo = denom->magic; + // Note that the quotient is guaranteed <= 64 bits, + // but the remainder may need 65! + uint64_t r_hi, r_lo; + uint64_t half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); + // We computed 2^(64+shift)/(m+2^64) + // Double the remainder ('dr') and check if that is larger than d + // Note that d is a 65 bit value, so r1 is small and so r1 + r1 + // cannot overflow + uint64_t dr_lo = r_lo + r_lo; + uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry + int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); + uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); + return full_q + 1; + } } -__m128i libdivide_u64_do_vector_alg1(__m128i numers, const struct libdivide_u64_t *denom) { - __m128i q = libdivide_mullhi_u64_flat_vector(numers, libdivide__u64_to_m128(denom->magic)); - return _mm_srl_epi64(q, libdivide_u32_to_m128i(denom->more)); -} - -__m128i libdivide_u64_do_vector_alg2(__m128i numers, const struct libdivide_u64_t *denom) { - __m128i q = libdivide_mullhi_u64_flat_vector(numers, libdivide__u64_to_m128(denom->magic)); - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srl_epi64(t, libdivide_u32_to_m128i(denom->more & LIBDIVIDE_64_SHIFT_MASK)); -} - - -#endif - /////////// SINT32 +static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); + } -static inline int32_t libdivide__mullhi_s32(int32_t x, int32_t y) { - int64_t xl = x, yl = y; - int64_t rl = xl * yl; - return (int32_t)(rl >> 32); //needs to be arithmetic shift -} - -struct libdivide_s32_t libdivide_s32_gen(int32_t d) { struct libdivide_s32_t result; - /* If d is a power of 2, or negative a power of 2, we have to use a shift. This is especially important because the magic algorithm fails for -1. To check if d is a power of 2 or its inverse, it suffices to check whether its absolute value has exactly one bit set. This works even for INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set and is a power of 2. */ - uint32_t absD = (uint32_t)(d < 0 ? -d : d); //gcc optimizes this to the fast abs trick - if ((absD & (absD - 1)) == 0) { //check if exactly one bit is set, don't care if absD is 0 since that's divide by zero + // If d is a power of 2, or negative a power of 2, we have to use a shift. + // This is especially important because the magic algorithm fails for -1. + // To check if d is a power of 2 or its inverse, it suffices to check + // whether its absolute value has exactly one bit set. This works even for + // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set + // and is a power of 2. + uint32_t ud = (uint32_t)d; + uint32_t absD = (d < 0) ? -ud : ud; + uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(absD); + // check if exactly one bit is set, + // don't care if absD is 0 since that's divide by zero + if ((absD & (absD - 1)) == 0) { + // Branchfree and normal paths are exactly the same result.magic = 0; - result.more = libdivide__count_trailing_zeros32(absD) | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0) | LIBDIVIDE_S32_SHIFT_PATH; - } - else { - const uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(absD); + result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); + } else { LIBDIVIDE_ASSERT(floor_log_2_d >= 1); uint8_t more; - //the dividend here is 2**(floor_log_2_d + 31), so the low 32 bit word is 0 and the high word is floor_log_2_d - 1 + // the dividend here is 2**(floor_log_2_d + 31), so the low 32 bit word + // is 0 and the high word is floor_log_2_d - 1 uint32_t rem, proposed_m; proposed_m = libdivide_64_div_32_to_32(1U << (floor_log_2_d - 1), 0, absD, &rem); const uint32_t e = absD - rem; - /* We are going to start with a power of floor_log_2_d - 1. This works if works if e < 2**floor_log_2_d. */ - if (e < (1U << floor_log_2_d)) { - /* This power works */ + // We are going to start with a power of floor_log_2_d - 1. + // This works if works if e < 2**floor_log_2_d. + if (!branchfree && e < (1U << floor_log_2_d)) { + // This power works more = floor_log_2_d - 1; - } - else { - /* We need to go one higher. This should not make proposed_m overflow, but it will make it negative when interpreted as an int32_t. */ + } else { + // We need to go one higher. This should not make proposed_m + // overflow, but it will make it negative when interpreted as an + // int32_t. proposed_m += proposed_m; const uint32_t twice_rem = rem + rem; if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); //use the general algorithm + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; } - proposed_m += 1; - result.magic = (d < 0 ? -(int32_t)proposed_m : (int32_t)proposed_m); - result.more = more; + proposed_m += 1; + int32_t magic = (int32_t)proposed_m; + + // Mark if we are negative. Note we only negate the magic number in the + // branchfull case. + if (d < 0) { + more |= LIBDIVIDE_NEGATIVE_DIVISOR; + if (!branchfree) { + magic = -magic; + } + } + + result.more = more; + result.magic = magic; } return result; } +struct libdivide_s32_t libdivide_s32_gen(int32_t d) { + return libdivide_internal_s32_gen(d, 0); +} + +struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) { + struct libdivide_s32_t tmp = libdivide_internal_s32_gen(d, 1); + struct libdivide_s32_branchfree_t result = {tmp.magic, tmp.more}; + return result; +} + int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { uint8_t more = denom->more; - if (more & LIBDIVIDE_S32_SHIFT_PATH) { - uint8_t shifter = more & LIBDIVIDE_32_SHIFT_MASK; - int32_t q = numer + ((numer >> 31) & ((1 << shifter) - 1)); - q = q >> shifter; - int32_t shiftMask = (int8_t)more >> 7; //must be arithmetic shift and then sign-extend - q = (q ^ shiftMask) - shiftMask; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + + if (!denom->magic) { + uint32_t sign = (int8_t)more >> 7; + uint32_t mask = (1U << shift) - 1; + uint32_t uq = numer + ((numer >> 31) & mask); + int32_t q = (int32_t)uq; + q >>= shift; + q = (q ^ sign) - sign; return q; - } - else { - int32_t q = libdivide__mullhi_s32(denom->magic, numer); + } else { + uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { - int32_t sign = (int8_t)more >> 7; //must be arithmetic shift and then sign extend - q += ((numer ^ sign) - sign); + // must be arithmetic shift and then sign extend + int32_t sign = (int8_t)more >> 7; + // q += (more < 0 ? -numer : numer) + // cast required to avoid UB + uq += ((uint32_t)numer ^ sign) - sign; } - q >>= more & LIBDIVIDE_32_SHIFT_MASK; + int32_t q = (int32_t)uq; + q >>= shift; q += (q < 0); return q; } } -int libdivide_s32_get_algorithm(const struct libdivide_s32_t *denom) { +int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { uint8_t more = denom->more; - int positiveDivisor = ! (more & LIBDIVIDE_NEGATIVE_DIVISOR); - if (more & LIBDIVIDE_S32_SHIFT_PATH) return (positiveDivisor ? 0 : 1); - else if (more & LIBDIVIDE_ADD_MARKER) return (positiveDivisor ? 2 : 3); - else return 4; -} - -int32_t libdivide_s32_do_alg0(int32_t numer, const struct libdivide_s32_t *denom) { - uint8_t shifter = denom->more & LIBDIVIDE_32_SHIFT_MASK; - int32_t q = numer + ((numer >> 31) & ((1 << shifter) - 1)); - return q >> shifter; -} - -int32_t libdivide_s32_do_alg1(int32_t numer, const struct libdivide_s32_t *denom) { - uint8_t shifter = denom->more & LIBDIVIDE_32_SHIFT_MASK; - int32_t q = numer + ((numer >> 31) & ((1 << shifter) - 1)); - return - (q >> shifter); -} - -int32_t libdivide_s32_do_alg2(int32_t numer, const struct libdivide_s32_t *denom) { - int32_t q = libdivide__mullhi_s32(denom->magic, numer); + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift and then sign extend + int32_t sign = (int8_t)more >> 7; + int32_t magic = denom->magic; + int32_t q = libdivide_mullhi_s32(magic, numer); q += numer; - q >>= denom->more & LIBDIVIDE_32_SHIFT_MASK; - q += (q < 0); + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is a power of + // 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + uint32_t q_sign = (uint32_t)(q >> 31); + q += q_sign & ((1U << shift) - is_power_of_2); + + // Now arithmetic right shift + q >>= shift; + // Negate if needed + q = (q ^ sign) - sign; + return q; } -int32_t libdivide_s32_do_alg3(int32_t numer, const struct libdivide_s32_t *denom) { - int32_t q = libdivide__mullhi_s32(denom->magic, numer); - q -= numer; - q >>= denom->more & LIBDIVIDE_32_SHIFT_MASK; - q += (q < 0); - return q; -} - -int32_t libdivide_s32_do_alg4(int32_t numer, const struct libdivide_s32_t *denom) { - int32_t q = libdivide__mullhi_s32(denom->magic, numer); - q >>= denom->more & LIBDIVIDE_32_SHIFT_MASK; - q += (q < 0); - return q; -} - -#if LIBDIVIDE_USE_SSE2 -__m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t * denom) { +int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom) { uint8_t more = denom->more; - if (more & LIBDIVIDE_S32_SHIFT_PATH) { - uint32_t shifter = more & LIBDIVIDE_32_SHIFT_MASK; - __m128i roundToZeroTweak = _mm_set1_epi32((1 << shifter) - 1); //could use _mm_srli_epi32 with an all -1 register - __m128i q = _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); //q = numer + ((numer >> 31) & roundToZeroTweak); - q = _mm_sra_epi32(q, libdivide_u32_to_m128i(shifter)); // q = q >> shifter - __m128i shiftMask = _mm_set1_epi32((int32_t)((int8_t)more >> 7)); //set all bits of shift mask = to the sign bit of more - q = _mm_sub_epi32(_mm_xor_si128(q, shiftMask), shiftMask); //q = (q ^ shiftMask) - shiftMask; - return q; - } - else { - __m128i q = libdivide_mullhi_s32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - __m128i sign = _mm_set1_epi32((int32_t)(int8_t)more >> 7); //must be arithmetic shift - q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign)); // q += ((numer ^ sign) - sign); + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + if (!denom->magic) { + uint32_t absD = 1U << shift; + if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { + absD = -absD; } - q = _mm_sra_epi32(q, libdivide_u32_to_m128i(more & LIBDIVIDE_32_SHIFT_MASK)); //q >>= shift - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) - return q; + return (int32_t)absD; + } else { + // Unsigned math is much easier + // We negate the magic number only in the branchfull case, and we don't + // know which case we're in. However we have enough information to + // determine the correct sign of the magic number. The divisor was + // negative if LIBDIVIDE_NEGATIVE_DIVISOR is set. If ADD_MARKER is set, + // the magic number's sign is opposite that of the divisor. + // We want to compute the positive magic number. + int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); + int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) + ? denom->magic > 0 : denom->magic < 0; + + // Handle the power of 2 case (including branchfree) + if (denom->magic == 0) { + int32_t result = 1U << shift; + return negative_divisor ? -result : result; + } + + uint32_t d = (uint32_t)(magic_was_negated ? -denom->magic : denom->magic); + uint64_t n = 1ULL << (32 + shift); // this shift cannot exceed 30 + uint32_t q = (uint32_t)(n / d); + int32_t result = (int32_t)q; + result += 1; + return negative_divisor ? -result : result; } } -__m128i libdivide_s32_do_vector_alg0(__m128i numers, const struct libdivide_s32_t *denom) { - uint8_t shifter = denom->more & LIBDIVIDE_32_SHIFT_MASK; - __m128i roundToZeroTweak = _mm_set1_epi32((1 << shifter) - 1); - __m128i q = _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); - return _mm_sra_epi32(q, libdivide_u32_to_m128i(shifter)); +int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom) { + return libdivide_s32_recover((const struct libdivide_s32_t *)denom); } -__m128i libdivide_s32_do_vector_alg1(__m128i numers, const struct libdivide_s32_t *denom) { - uint8_t shifter = denom->more & LIBDIVIDE_32_SHIFT_MASK; - __m128i roundToZeroTweak = _mm_set1_epi32((1 << shifter) - 1); - __m128i q = _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); - return _mm_sub_epi32(_mm_setzero_si128(), _mm_sra_epi32(q, libdivide_u32_to_m128i(shifter))); -} - -__m128i libdivide_s32_do_vector_alg2(__m128i numers, const struct libdivide_s32_t *denom) { - __m128i q = libdivide_mullhi_s32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - q = _mm_add_epi32(q, numers); - q = _mm_sra_epi32(q, libdivide_u32_to_m128i(denom->more & LIBDIVIDE_32_SHIFT_MASK)); - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); - return q; -} - -__m128i libdivide_s32_do_vector_alg3(__m128i numers, const struct libdivide_s32_t *denom) { - __m128i q = libdivide_mullhi_s32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - q = _mm_sub_epi32(q, numers); - q = _mm_sra_epi32(q, libdivide_u32_to_m128i(denom->more & LIBDIVIDE_32_SHIFT_MASK)); - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); - return q; -} - -__m128i libdivide_s32_do_vector_alg4(__m128i numers, const struct libdivide_s32_t *denom) { - __m128i q = libdivide_mullhi_s32_flat_vector(numers, _mm_set1_epi32(denom->magic)); - q = _mm_sra_epi32(q, libdivide_u32_to_m128i(denom->more)); //q >>= shift - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) - return q; -} -#endif - ///////////// SINT64 +static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int branchfree) { + if (d == 0) { + LIBDIVIDE_ERROR("divider must be != 0"); + } -struct libdivide_s64_t libdivide_s64_gen(int64_t d) { struct libdivide_s64_t result; - /* If d is a power of 2, or negative a power of 2, we have to use a shift. This is especially important because the magic algorithm fails for -1. To check if d is a power of 2 or its inverse, it suffices to check whether its absolute value has exactly one bit set. This works even for INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set and is a power of 2. */ - const uint64_t absD = (uint64_t)(d < 0 ? -d : d); //gcc optimizes this to the fast abs trick - if ((absD & (absD - 1)) == 0) { //check if exactly one bit is set, don't care if absD is 0 since that's divide by zero - result.more = libdivide__count_trailing_zeros64(absD) | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); + // If d is a power of 2, or negative a power of 2, we have to use a shift. + // This is especially important because the magic algorithm fails for -1. + // To check if d is a power of 2 or its inverse, it suffices to check + // whether its absolute value has exactly one bit set. This works even for + // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set + // and is a power of 2. + uint64_t ud = (uint64_t)d; + uint64_t absD = (d < 0) ? -ud : ud; + uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(absD); + // check if exactly one bit is set, + // don't care if absD is 0 since that's divide by zero + if ((absD & (absD - 1)) == 0) { + // Branchfree and non-branchfree cases are the same result.magic = 0; - } - else { - const uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(absD); - - //the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word is 0 and the high word is floor_log_2_d - 1 + result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); + } else { + // the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word + // is 0 and the high word is floor_log_2_d - 1 uint8_t more; uint64_t rem, proposed_m; proposed_m = libdivide_128_div_64_to_64(1ULL << (floor_log_2_d - 1), 0, absD, &rem); const uint64_t e = absD - rem; - /* We are going to start with a power of floor_log_2_d - 1. This works if works if e < 2**floor_log_2_d. */ - if (e < (1ULL << floor_log_2_d)) { - /* This power works */ + // We are going to start with a power of floor_log_2_d - 1. + // This works if works if e < 2**floor_log_2_d. + if (!branchfree && e < (1ULL << floor_log_2_d)) { + // This power works more = floor_log_2_d - 1; - } - else { - /* We need to go one higher. This should not make proposed_m overflow, but it will make it negative when interpreted as an int32_t. */ + } else { + // We need to go one higher. This should not make proposed_m + // overflow, but it will make it negative when interpreted as an + // int32_t. proposed_m += proposed_m; const uint64_t twice_rem = rem + rem; if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); + // note that we only set the LIBDIVIDE_NEGATIVE_DIVISOR bit if we + // also set ADD_MARKER this is an annoying optimization that + // enables algorithm #4 to avoid the mask. However we always set it + // in the branchfree case + more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; } proposed_m += 1; + int64_t magic = (int64_t)proposed_m; + + // Mark if we are negative + if (d < 0) { + more |= LIBDIVIDE_NEGATIVE_DIVISOR; + if (!branchfree) { + magic = -magic; + } + } + result.more = more; - result.magic = (d < 0 ? -(int64_t)proposed_m : (int64_t)proposed_m); + result.magic = magic; } return result; } +struct libdivide_s64_t libdivide_s64_gen(int64_t d) { + return libdivide_internal_s64_gen(d, 0); +} + +struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) { + struct libdivide_s64_t tmp = libdivide_internal_s64_gen(d, 1); + struct libdivide_s64_branchfree_t ret = {tmp.magic, tmp.more}; + return ret; +} + int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { //shift path - uint32_t shifter = more & LIBDIVIDE_64_SHIFT_MASK; - int64_t q = numer + ((numer >> 63) & ((1LL << shifter) - 1)); - q = q >> shifter; - int64_t shiftMask = (int8_t)more >> 7; //must be arithmetic shift and then sign-extend - q = (q ^ shiftMask) - shiftMask; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + + if (!denom->magic) { // shift path + uint64_t mask = (1ULL << shift) - 1; + uint64_t uq = numer + ((numer >> 63) & mask); + int64_t q = (int64_t)uq; + q >>= shift; + // must be arithmetic shift and then sign-extend + int64_t sign = (int8_t)more >> 7; + q = (q ^ sign) - sign; return q; - } - else { - int64_t q = libdivide__mullhi_s64(magic, numer); + } else { + uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); if (more & LIBDIVIDE_ADD_MARKER) { - int64_t sign = (int8_t)more >> 7; //must be arithmetic shift and then sign extend - q += ((numer ^ sign) - sign); + // must be arithmetic shift and then sign extend + int64_t sign = (int8_t)more >> 7; + // q += (more < 0 ? -numer : numer) + // cast required to avoid UB + uq += ((uint64_t)numer ^ sign) - sign; } - q >>= more & LIBDIVIDE_64_SHIFT_MASK; + int64_t q = (int64_t)uq; + q >>= shift; q += (q < 0); return q; } } - -int libdivide_s64_get_algorithm(const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int positiveDivisor = ! (more & LIBDIVIDE_NEGATIVE_DIVISOR); - if (denom->magic == 0) return (positiveDivisor ? 0 : 1); //shift path - else if (more & LIBDIVIDE_ADD_MARKER) return (positiveDivisor ? 2 : 3); - else return 4; -} - -int64_t libdivide_s64_do_alg0(int64_t numer, const struct libdivide_s64_t *denom) { - uint32_t shifter = denom->more & LIBDIVIDE_64_SHIFT_MASK; - int64_t q = numer + ((numer >> 63) & ((1LL << shifter) - 1)); - return q >> shifter; -} - -int64_t libdivide_s64_do_alg1(int64_t numer, const struct libdivide_s64_t *denom) { - //denom->shifter != -1 && demo->shiftMask != 0 - uint32_t shifter = denom->more & LIBDIVIDE_64_SHIFT_MASK; - int64_t q = numer + ((numer >> 63) & ((1LL << shifter) - 1)); - return - (q >> shifter); -} - -int64_t libdivide_s64_do_alg2(int64_t numer, const struct libdivide_s64_t *denom) { - int64_t q = libdivide__mullhi_s64(denom->magic, numer); - q += numer; - q >>= denom->more & LIBDIVIDE_64_SHIFT_MASK; - q += (q < 0); - return q; -} - -int64_t libdivide_s64_do_alg3(int64_t numer, const struct libdivide_s64_t *denom) { - int64_t q = libdivide__mullhi_s64(denom->magic, numer); - q -= numer; - q >>= denom->more & LIBDIVIDE_64_SHIFT_MASK; - q += (q < 0); - return q; -} - -int64_t libdivide_s64_do_alg4(int64_t numer, const struct libdivide_s64_t *denom) { - int64_t q = libdivide__mullhi_s64(denom->magic, numer); - q >>= denom->more; - q += (q < 0); - return q; -} - - -#if LIBDIVIDE_USE_SSE2 -__m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t * denom) { +int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift and then sign extend + int64_t sign = (int8_t)more >> 7; int64_t magic = denom->magic; - if (magic == 0) { //shift path - uint32_t shifter = more & LIBDIVIDE_64_SHIFT_MASK; - __m128i roundToZeroTweak = libdivide__u64_to_m128((1LL << shifter) - 1); - __m128i q = _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); //q = numer + ((numer >> 63) & roundToZeroTweak); - q = libdivide_s64_shift_right_vector(q, shifter); // q = q >> shifter - __m128i shiftMask = _mm_set1_epi32((int32_t)((int8_t)more >> 7)); - q = _mm_sub_epi64(_mm_xor_si128(q, shiftMask), shiftMask); //q = (q ^ shiftMask) - shiftMask; + int64_t q = libdivide_mullhi_s64(magic, numer); + q += numer; + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is a power of + // 2, or (2**shift) if it is not a power of 2. + uint64_t is_power_of_2 = (magic == 0); + uint64_t q_sign = (uint64_t)(q >> 63); + q += q_sign & ((1ULL << shift) - is_power_of_2); + + // Arithmetic right shift + q >>= shift; + // Negate if needed + q = (q ^ sign) - sign; + + return q; +} + +int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + if (denom->magic == 0) { // shift path + uint64_t absD = 1ULL << shift; + if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { + absD = -absD; + } + return (int64_t)absD; + } else { + // Unsigned math is much easier + int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); + int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) + ? denom->magic > 0 : denom->magic < 0; + + uint64_t d = (uint64_t)(magic_was_negated ? -denom->magic : denom->magic); + uint64_t n_hi = 1ULL << shift, n_lo = 0; + uint64_t rem_ignored; + uint64_t q = libdivide_128_div_64_to_64(n_hi, n_lo, d, &rem_ignored); + int64_t result = (int64_t)(q + 1); + if (negative_divisor) { + result = -result; + } + return result; + } +} + +int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom) { + return libdivide_s64_recover((const struct libdivide_s64_t *)denom); +} + +#if defined(LIBDIVIDE_AVX512) + +static inline __m512i libdivide_u32_do_vector(__m512i numers, const struct libdivide_u32_t *denom); +static inline __m512i libdivide_s32_do_vector(__m512i numers, const struct libdivide_s32_t *denom); +static inline __m512i libdivide_u64_do_vector(__m512i numers, const struct libdivide_u64_t *denom); +static inline __m512i libdivide_s64_do_vector(__m512i numers, const struct libdivide_s64_t *denom); + +static inline __m512i libdivide_u32_branchfree_do_vector(__m512i numers, const struct libdivide_u32_branchfree_t *denom); +static inline __m512i libdivide_s32_branchfree_do_vector(__m512i numers, const struct libdivide_s32_branchfree_t *denom); +static inline __m512i libdivide_u64_branchfree_do_vector(__m512i numers, const struct libdivide_u64_branchfree_t *denom); +static inline __m512i libdivide_s64_branchfree_do_vector(__m512i numers, const struct libdivide_s64_branchfree_t *denom); + +//////// Internal Utility Functions + +static inline __m512i libdivide_s64_signbits(__m512i v) {; + return _mm512_srai_epi64(v, 63); +} + +static inline __m512i libdivide_s64_shift_right_vector(__m512i v, int amt) { + return _mm512_srai_epi64(v, amt); +} + +// Here, b is assumed to contain one 32-bit value repeated. +static inline __m512i libdivide_mullhi_u32_vector(__m512i a, __m512i b) { + __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epu32(a, b), 32); + __m512i a1X3X = _mm512_srli_epi64(a, 32); + __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); + __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epu32(a1X3X, b), mask); + return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// b is one 32-bit value repeated. +static inline __m512i libdivide_mullhi_s32_vector(__m512i a, __m512i b) { + __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epi32(a, b), 32); + __m512i a1X3X = _mm512_srli_epi64(a, 32); + __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); + __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epi32(a1X3X, b), mask); + return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// Here, y is assumed to contain one 64-bit value repeated. +// https://stackoverflow.com/a/28827013 +static inline __m512i libdivide_mullhi_u64_vector(__m512i x, __m512i y) { + __m512i lomask = _mm512_set1_epi64(0xffffffff); + __m512i xh = _mm512_shuffle_epi32(x, (_MM_PERM_ENUM) 0xB1); + __m512i yh = _mm512_shuffle_epi32(y, (_MM_PERM_ENUM) 0xB1); + __m512i w0 = _mm512_mul_epu32(x, y); + __m512i w1 = _mm512_mul_epu32(x, yh); + __m512i w2 = _mm512_mul_epu32(xh, y); + __m512i w3 = _mm512_mul_epu32(xh, yh); + __m512i w0h = _mm512_srli_epi64(w0, 32); + __m512i s1 = _mm512_add_epi64(w1, w0h); + __m512i s1l = _mm512_and_si512(s1, lomask); + __m512i s1h = _mm512_srli_epi64(s1, 32); + __m512i s2 = _mm512_add_epi64(w2, s1l); + __m512i s2h = _mm512_srli_epi64(s2, 32); + __m512i hi = _mm512_add_epi64(w3, s1h); + hi = _mm512_add_epi64(hi, s2h); + + return hi; +} + +// y is one 64-bit value repeated. +static inline __m512i libdivide_mullhi_s64_vector(__m512i x, __m512i y) { + __m512i p = libdivide_mullhi_u64_vector(x, y); + __m512i t1 = _mm512_and_si512(libdivide_s64_signbits(x), y); + __m512i t2 = _mm512_and_si512(libdivide_s64_signbits(y), x); + p = _mm512_sub_epi64(p, t1); + p = _mm512_sub_epi64(p, t2); + return p; +} + +////////// UINT32 + +__m512i libdivide_u32_do_vector(__m512i numers, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm512_srli_epi32(numers, more); + } + else { + __m512i q = libdivide_mullhi_u32_vector(numers, _mm512_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); + return _mm512_srli_epi32(t, shift); + } + else { + return _mm512_srli_epi32(q, more); + } + } +} + +__m512i libdivide_u32_branchfree_do_vector(__m512i numers, const struct libdivide_u32_branchfree_t *denom) { + __m512i q = libdivide_mullhi_u32_vector(numers, _mm512_set1_epi32(denom->magic)); + __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); + return _mm512_srli_epi32(t, denom->more); +} + +////////// UINT64 + +__m512i libdivide_u64_do_vector(__m512i numers, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm512_srli_epi64(numers, more); + } + else { + __m512i q = libdivide_mullhi_u64_vector(numers, _mm512_set1_epi64(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); + return _mm512_srli_epi64(t, shift); + } + else { + return _mm512_srli_epi64(q, more); + } + } +} + +__m512i libdivide_u64_branchfree_do_vector(__m512i numers, const struct libdivide_u64_branchfree_t *denom) { + __m512i q = libdivide_mullhi_u64_vector(numers, _mm512_set1_epi64(denom->magic)); + __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); + return _mm512_srli_epi64(t, denom->more); +} + +////////// SINT32 + +__m512i libdivide_s32_do_vector(__m512i numers, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + uint32_t mask = (1U << shift) - 1; + __m512i roundToZeroTweak = _mm512_set1_epi32(mask); + // q = numer + ((numer >> 31) & roundToZeroTweak); + __m512i q = _mm512_add_epi32(numers, _mm512_and_si512(_mm512_srai_epi32(numers, 31), roundToZeroTweak)); + q = _mm512_srai_epi32(q, shift); + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); return q; } else { - __m128i q = libdivide_mullhi_s64_flat_vector(numers, libdivide__u64_to_m128(magic)); + __m512i q = libdivide_mullhi_s32_vector(numers, _mm512_set1_epi32(denom->magic)); if (more & LIBDIVIDE_ADD_MARKER) { - __m128i sign = _mm_set1_epi32((int32_t)((int8_t)more >> 7)); //must be arithmetic shift - q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign)); // q += ((numer ^ sign) - sign); + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm512_add_epi32(q, _mm512_sub_epi32(_mm512_xor_si512(numers, sign), sign)); } - q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); //q >>= denom->mult_path.shift + // q >>= shift + q = _mm512_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); + q = _mm512_add_epi32(q, _mm512_srli_epi32(q, 31)); // q += (q < 0) + return q; + } +} + +__m512i libdivide_s32_branchfree_do_vector(__m512i numers, const struct libdivide_s32_branchfree_t *denom) { + int32_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + __m512i q = libdivide_mullhi_s32_vector(numers, _mm512_set1_epi32(magic)); + q = _mm512_add_epi32(q, numers); // q += numers + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + __m512i q_sign = _mm512_srai_epi32(q, 31); // q_sign = q >> 31 + __m512i mask = _mm512_set1_epi32((1U << shift) - is_power_of_2); + q = _mm512_add_epi32(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) + q = _mm512_srai_epi32(q, shift); // q >>= shift + q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +////////// SINT64 + +__m512i libdivide_s64_do_vector(__m512i numers, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + int64_t magic = denom->magic; + if (magic == 0) { // shift path + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + uint64_t mask = (1ULL << shift) - 1; + __m512i roundToZeroTweak = _mm512_set1_epi64(mask); + // q = numer + ((numer >> 63) & roundToZeroTweak); + __m512i q = _mm512_add_epi64(numers, _mm512_and_si512(libdivide_s64_signbits(numers), roundToZeroTweak)); + q = libdivide_s64_shift_right_vector(q, shift); + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); + return q; + } + else { + __m512i q = libdivide_mullhi_s64_vector(numers, _mm512_set1_epi64(magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm512_add_epi64(q, _mm512_sub_epi64(_mm512_xor_si512(numers, sign), sign)); + } + // q >>= denom->mult_path.shift + q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); + q = _mm512_add_epi64(q, _mm512_srli_epi64(q, 63)); // q += (q < 0) + return q; + } +} + +__m512i libdivide_s64_branchfree_do_vector(__m512i numers, const struct libdivide_s64_branchfree_t *denom) { + int64_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift + __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); + + // libdivide_mullhi_s64(numers, magic); + __m512i q = libdivide_mullhi_s64_vector(numers, _mm512_set1_epi64(magic)); + q = _mm512_add_epi64(q, numers); // q += numers + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2. + uint32_t is_power_of_2 = (magic == 0); + __m512i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 + __m512i mask = _mm512_set1_epi64((1ULL << shift) - is_power_of_2); + q = _mm512_add_epi64(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) + q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift + q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +#elif defined(LIBDIVIDE_AVX2) + +static inline __m256i libdivide_u32_do_vector(__m256i numers, const struct libdivide_u32_t *denom); +static inline __m256i libdivide_s32_do_vector(__m256i numers, const struct libdivide_s32_t *denom); +static inline __m256i libdivide_u64_do_vector(__m256i numers, const struct libdivide_u64_t *denom); +static inline __m256i libdivide_s64_do_vector(__m256i numers, const struct libdivide_s64_t *denom); + +static inline __m256i libdivide_u32_branchfree_do_vector(__m256i numers, const struct libdivide_u32_branchfree_t *denom); +static inline __m256i libdivide_s32_branchfree_do_vector(__m256i numers, const struct libdivide_s32_branchfree_t *denom); +static inline __m256i libdivide_u64_branchfree_do_vector(__m256i numers, const struct libdivide_u64_branchfree_t *denom); +static inline __m256i libdivide_s64_branchfree_do_vector(__m256i numers, const struct libdivide_s64_branchfree_t *denom); + +//////// Internal Utility Functions + +// Implementation of _mm256_srai_epi64(v, 63) (from AVX512). +static inline __m256i libdivide_s64_signbits(__m256i v) { + __m256i hiBitsDuped = _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); + __m256i signBits = _mm256_srai_epi32(hiBitsDuped, 31); + return signBits; +} + +// Implementation of _mm256_srai_epi64 (from AVX512). +static inline __m256i libdivide_s64_shift_right_vector(__m256i v, int amt) { + const int b = 64 - amt; + __m256i m = _mm256_set1_epi64x(1ULL << (b - 1)); + __m256i x = _mm256_srli_epi64(v, amt); + __m256i result = _mm256_sub_epi64(_mm256_xor_si256(x, m), m); + return result; +} + +// Here, b is assumed to contain one 32-bit value repeated. +static inline __m256i libdivide_mullhi_u32_vector(__m256i a, __m256i b) { + __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32); + __m256i a1X3X = _mm256_srli_epi64(a, 32); + __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); + __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epu32(a1X3X, b), mask); + return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// b is one 32-bit value repeated. +static inline __m256i libdivide_mullhi_s32_vector(__m256i a, __m256i b) { + __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epi32(a, b), 32); + __m256i a1X3X = _mm256_srli_epi64(a, 32); + __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); + __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epi32(a1X3X, b), mask); + return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// Here, y is assumed to contain one 64-bit value repeated. +// https://stackoverflow.com/a/28827013 +static inline __m256i libdivide_mullhi_u64_vector(__m256i x, __m256i y) { + __m256i lomask = _mm256_set1_epi64x(0xffffffff); + __m256i xh = _mm256_shuffle_epi32(x, 0xB1); // x0l, x0h, x1l, x1h + __m256i yh = _mm256_shuffle_epi32(y, 0xB1); // y0l, y0h, y1l, y1h + __m256i w0 = _mm256_mul_epu32(x, y); // x0l*y0l, x1l*y1l + __m256i w1 = _mm256_mul_epu32(x, yh); // x0l*y0h, x1l*y1h + __m256i w2 = _mm256_mul_epu32(xh, y); // x0h*y0l, x1h*y0l + __m256i w3 = _mm256_mul_epu32(xh, yh); // x0h*y0h, x1h*y1h + __m256i w0h = _mm256_srli_epi64(w0, 32); + __m256i s1 = _mm256_add_epi64(w1, w0h); + __m256i s1l = _mm256_and_si256(s1, lomask); + __m256i s1h = _mm256_srli_epi64(s1, 32); + __m256i s2 = _mm256_add_epi64(w2, s1l); + __m256i s2h = _mm256_srli_epi64(s2, 32); + __m256i hi = _mm256_add_epi64(w3, s1h); + hi = _mm256_add_epi64(hi, s2h); + + return hi; +} + +// y is one 64-bit value repeated. +static inline __m256i libdivide_mullhi_s64_vector(__m256i x, __m256i y) { + __m256i p = libdivide_mullhi_u64_vector(x, y); + __m256i t1 = _mm256_and_si256(libdivide_s64_signbits(x), y); + __m256i t2 = _mm256_and_si256(libdivide_s64_signbits(y), x); + p = _mm256_sub_epi64(p, t1); + p = _mm256_sub_epi64(p, t2); + return p; +} + +////////// UINT32 + +__m256i libdivide_u32_do_vector(__m256i numers, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm256_srli_epi32(numers, more); + } + else { + __m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); + return _mm256_srli_epi32(t, shift); + } + else { + return _mm256_srli_epi32(q, more); + } + } +} + +__m256i libdivide_u32_branchfree_do_vector(__m256i numers, const struct libdivide_u32_branchfree_t *denom) { + __m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic)); + __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); + return _mm256_srli_epi32(t, denom->more); +} + +////////// UINT64 + +__m256i libdivide_u64_do_vector(__m256i numers, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm256_srli_epi64(numers, more); + } + else { + __m256i q = libdivide_mullhi_u64_vector(numers, _mm256_set1_epi64x(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); + return _mm256_srli_epi64(t, shift); + } + else { + return _mm256_srli_epi64(q, more); + } + } +} + +__m256i libdivide_u64_branchfree_do_vector(__m256i numers, const struct libdivide_u64_branchfree_t *denom) { + __m256i q = libdivide_mullhi_u64_vector(numers, _mm256_set1_epi64x(denom->magic)); + __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); + return _mm256_srli_epi64(t, denom->more); +} + +////////// SINT32 + +__m256i libdivide_s32_do_vector(__m256i numers, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + uint32_t mask = (1U << shift) - 1; + __m256i roundToZeroTweak = _mm256_set1_epi32(mask); + // q = numer + ((numer >> 31) & roundToZeroTweak); + __m256i q = _mm256_add_epi32(numers, _mm256_and_si256(_mm256_srai_epi32(numers, 31), roundToZeroTweak)); + q = _mm256_srai_epi32(q, shift); + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); + return q; + } + else { + __m256i q = libdivide_mullhi_s32_vector(numers, _mm256_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm256_add_epi32(q, _mm256_sub_epi32(_mm256_xor_si256(numers, sign), sign)); + } + // q >>= shift + q = _mm256_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); + q = _mm256_add_epi32(q, _mm256_srli_epi32(q, 31)); // q += (q < 0) + return q; + } +} + +__m256i libdivide_s32_branchfree_do_vector(__m256i numers, const struct libdivide_s32_branchfree_t *denom) { + int32_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + __m256i q = libdivide_mullhi_s32_vector(numers, _mm256_set1_epi32(magic)); + q = _mm256_add_epi32(q, numers); // q += numers + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + __m256i q_sign = _mm256_srai_epi32(q, 31); // q_sign = q >> 31 + __m256i mask = _mm256_set1_epi32((1U << shift) - is_power_of_2); + q = _mm256_add_epi32(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) + q = _mm256_srai_epi32(q, shift); // q >>= shift + q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +////////// SINT64 + +__m256i libdivide_s64_do_vector(__m256i numers, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + int64_t magic = denom->magic; + if (magic == 0) { // shift path + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + uint64_t mask = (1ULL << shift) - 1; + __m256i roundToZeroTweak = _mm256_set1_epi64x(mask); + // q = numer + ((numer >> 63) & roundToZeroTweak); + __m256i q = _mm256_add_epi64(numers, _mm256_and_si256(libdivide_s64_signbits(numers), roundToZeroTweak)); + q = libdivide_s64_shift_right_vector(q, shift); + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); + return q; + } + else { + __m256i q = libdivide_mullhi_s64_vector(numers, _mm256_set1_epi64x(magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm256_add_epi64(q, _mm256_sub_epi64(_mm256_xor_si256(numers, sign), sign)); + } + // q >>= denom->mult_path.shift + q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); + q = _mm256_add_epi64(q, _mm256_srli_epi64(q, 63)); // q += (q < 0) + return q; + } +} + +__m256i libdivide_s64_branchfree_do_vector(__m256i numers, const struct libdivide_s64_branchfree_t *denom) { + int64_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift + __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); + + // libdivide_mullhi_s64(numers, magic); + __m256i q = libdivide_mullhi_s64_vector(numers, _mm256_set1_epi64x(magic)); + q = _mm256_add_epi64(q, numers); // q += numers + + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2. + uint32_t is_power_of_2 = (magic == 0); + __m256i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 + __m256i mask = _mm256_set1_epi64x((1ULL << shift) - is_power_of_2); + q = _mm256_add_epi64(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) + q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift + q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +#elif defined(LIBDIVIDE_SSE2) + +static inline __m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom); +static inline __m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t *denom); +static inline __m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t *denom); +static inline __m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t *denom); + +static inline __m128i libdivide_u32_branchfree_do_vector(__m128i numers, const struct libdivide_u32_branchfree_t *denom); +static inline __m128i libdivide_s32_branchfree_do_vector(__m128i numers, const struct libdivide_s32_branchfree_t *denom); +static inline __m128i libdivide_u64_branchfree_do_vector(__m128i numers, const struct libdivide_u64_branchfree_t *denom); +static inline __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivide_s64_branchfree_t *denom); + +//////// Internal Utility Functions + +// Implementation of _mm_srai_epi64(v, 63) (from AVX512). +static inline __m128i libdivide_s64_signbits(__m128i v) { + __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); + __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31); + return signBits; +} + +// Implementation of _mm_srai_epi64 (from AVX512). +static inline __m128i libdivide_s64_shift_right_vector(__m128i v, int amt) { + const int b = 64 - amt; + __m128i m = _mm_set1_epi64x(1ULL << (b - 1)); + __m128i x = _mm_srli_epi64(v, amt); + __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m); + return result; +} + +// Here, b is assumed to contain one 32-bit value repeated. +static inline __m128i libdivide_mullhi_u32_vector(__m128i a, __m128i b) { + __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); + __m128i a1X3X = _mm_srli_epi64(a, 32); + __m128i mask = _mm_set_epi32(-1, 0, -1, 0); + __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), mask); + return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); +} + +// SSE2 does not have a signed multiplication instruction, but we can convert +// unsigned to signed pretty efficiently. Again, b is just a 32 bit value +// repeated four times. +static inline __m128i libdivide_mullhi_s32_vector(__m128i a, __m128i b) { + __m128i p = libdivide_mullhi_u32_vector(a, b); + // t1 = (a >> 31) & y, arithmetic shift + __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b); + __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a); + p = _mm_sub_epi32(p, t1); + p = _mm_sub_epi32(p, t2); + return p; +} + +// Here, y is assumed to contain one 64-bit value repeated. +// https://stackoverflow.com/a/28827013 +static inline __m128i libdivide_mullhi_u64_vector(__m128i x, __m128i y) { + __m128i lomask = _mm_set1_epi64x(0xffffffff); + __m128i xh = _mm_shuffle_epi32(x, 0xB1); // x0l, x0h, x1l, x1h + __m128i yh = _mm_shuffle_epi32(y, 0xB1); // y0l, y0h, y1l, y1h + __m128i w0 = _mm_mul_epu32(x, y); // x0l*y0l, x1l*y1l + __m128i w1 = _mm_mul_epu32(x, yh); // x0l*y0h, x1l*y1h + __m128i w2 = _mm_mul_epu32(xh, y); // x0h*y0l, x1h*y0l + __m128i w3 = _mm_mul_epu32(xh, yh); // x0h*y0h, x1h*y1h + __m128i w0h = _mm_srli_epi64(w0, 32); + __m128i s1 = _mm_add_epi64(w1, w0h); + __m128i s1l = _mm_and_si128(s1, lomask); + __m128i s1h = _mm_srli_epi64(s1, 32); + __m128i s2 = _mm_add_epi64(w2, s1l); + __m128i s2h = _mm_srli_epi64(s2, 32); + __m128i hi = _mm_add_epi64(w3, s1h); + hi = _mm_add_epi64(hi, s2h); + + return hi; +} + +// y is one 64-bit value repeated. +static inline __m128i libdivide_mullhi_s64_vector(__m128i x, __m128i y) { + __m128i p = libdivide_mullhi_u64_vector(x, y); + __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y); + __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x); + p = _mm_sub_epi64(p, t1); + p = _mm_sub_epi64(p, t2); + return p; +} + +////////// UINT32 + +__m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm_srli_epi32(numers, more); + } + else { + __m128i q = libdivide_mullhi_u32_vector(numers, _mm_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); + return _mm_srli_epi32(t, shift); + } + else { + return _mm_srli_epi32(q, more); + } + } +} + +__m128i libdivide_u32_branchfree_do_vector(__m128i numers, const struct libdivide_u32_branchfree_t *denom) { + __m128i q = libdivide_mullhi_u32_vector(numers, _mm_set1_epi32(denom->magic)); + __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); + return _mm_srli_epi32(t, denom->more); +} + +////////// UINT64 + +__m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + return _mm_srli_epi64(numers, more); + } + else { + __m128i q = libdivide_mullhi_u64_vector(numers, _mm_set1_epi64x(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // uint32_t t = ((numer - q) >> 1) + q; + // return t >> denom->shift; + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); + return _mm_srli_epi64(t, shift); + } + else { + return _mm_srli_epi64(q, more); + } + } +} + +__m128i libdivide_u64_branchfree_do_vector(__m128i numers, const struct libdivide_u64_branchfree_t *denom) { + __m128i q = libdivide_mullhi_u64_vector(numers, _mm_set1_epi64x(denom->magic)); + __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); + return _mm_srli_epi64(t, denom->more); +} + +////////// SINT32 + +__m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t *denom) { + uint8_t more = denom->more; + if (!denom->magic) { + uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + uint32_t mask = (1U << shift) - 1; + __m128i roundToZeroTweak = _mm_set1_epi32(mask); + // q = numer + ((numer >> 31) & roundToZeroTweak); + __m128i q = _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); + q = _mm_srai_epi32(q, shift); + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); + return q; + } + else { + __m128i q = libdivide_mullhi_s32_vector(numers, _mm_set1_epi32(denom->magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign)); + } + // q >>= shift + q = _mm_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); + q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) + return q; + } +} + +__m128i libdivide_s32_branchfree_do_vector(__m128i numers, const struct libdivide_s32_branchfree_t *denom) { + int32_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + __m128i q = libdivide_mullhi_s32_vector(numers, _mm_set1_epi32(magic)); + q = _mm_add_epi32(q, numers); // q += numers + + // If q is non-negative, we have nothing to do + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2 + uint32_t is_power_of_2 = (magic == 0); + __m128i q_sign = _mm_srai_epi32(q, 31); // q_sign = q >> 31 + __m128i mask = _mm_set1_epi32((1U << shift) - is_power_of_2); + q = _mm_add_epi32(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) + q = _mm_srai_epi32(q, shift); // q >>= shift + q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign + return q; +} + +////////// SINT64 + +__m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t *denom) { + uint8_t more = denom->more; + int64_t magic = denom->magic; + if (magic == 0) { // shift path + uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + uint64_t mask = (1ULL << shift) - 1; + __m128i roundToZeroTweak = _mm_set1_epi64x(mask); + // q = numer + ((numer >> 63) & roundToZeroTweak); + __m128i q = _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); + q = libdivide_s64_shift_right_vector(q, shift); + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q = (q ^ sign) - sign; + q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); + return q; + } + else { + __m128i q = libdivide_mullhi_s64_vector(numers, _mm_set1_epi64x(magic)); + if (more & LIBDIVIDE_ADD_MARKER) { + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); + // q += ((numer ^ sign) - sign); + q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign)); + } + // q >>= denom->mult_path.shift + q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK); q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) return q; } } -__m128i libdivide_s64_do_vector_alg0(__m128i numers, const struct libdivide_s64_t *denom) { - uint32_t shifter = denom->more & LIBDIVIDE_64_SHIFT_MASK; - __m128i roundToZeroTweak = libdivide__u64_to_m128((1LL << shifter) - 1); - __m128i q = _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vector(q, shifter); - return q; -} +__m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivide_s64_branchfree_t *denom) { + int64_t magic = denom->magic; + uint8_t more = denom->more; + uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; + // must be arithmetic shift + __m128i sign = _mm_set1_epi32((int8_t)more >> 7); -__m128i libdivide_s64_do_vector_alg1(__m128i numers, const struct libdivide_s64_t *denom) { - uint32_t shifter = denom->more & LIBDIVIDE_64_SHIFT_MASK; - __m128i roundToZeroTweak = libdivide__u64_to_m128((1LL << shifter) - 1); - __m128i q = _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vector(q, shifter); - return _mm_sub_epi64(_mm_setzero_si128(), q); -} + // libdivide_mullhi_s64(numers, magic); + __m128i q = libdivide_mullhi_s64_vector(numers, _mm_set1_epi64x(magic)); + q = _mm_add_epi64(q, numers); // q += numers -__m128i libdivide_s64_do_vector_alg2(__m128i numers, const struct libdivide_s64_t *denom) { - __m128i q = libdivide_mullhi_s64_flat_vector(numers, libdivide__u64_to_m128(denom->magic)); - q = _mm_add_epi64(q, numers); - q = libdivide_s64_shift_right_vector(q, denom->more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) - return q; -} - -__m128i libdivide_s64_do_vector_alg3(__m128i numers, const struct libdivide_s64_t *denom) { - __m128i q = libdivide_mullhi_s64_flat_vector(numers, libdivide__u64_to_m128(denom->magic)); - q = _mm_sub_epi64(q, numers); - q = libdivide_s64_shift_right_vector(q, denom->more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) - return q; -} - -__m128i libdivide_s64_do_vector_alg4(__m128i numers, const struct libdivide_s64_t *denom) { - __m128i q = libdivide_mullhi_s64_flat_vector(numers, libdivide__u64_to_m128(denom->magic)); - q = libdivide_s64_shift_right_vector(q, denom->more); - q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); + // If q is non-negative, we have nothing to do. + // If q is negative, we want to add either (2**shift)-1 if d is + // a power of 2, or (2**shift) if it is not a power of 2. + uint32_t is_power_of_2 = (magic == 0); + __m128i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 + __m128i mask = _mm_set1_epi64x((1ULL << shift) - is_power_of_2); + q = _mm_add_epi64(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) + q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift + q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign return q; } @@ -1204,228 +1930,143 @@ __m128i libdivide_s64_do_vector_alg4(__m128i numers, const struct libdivide_s64_ #ifdef __cplusplus -/* The C++ template design here is a total mess. This needs to be fixed by someone better at templates than I. The current design is: - -- The base is a template divider_base that takes the integer type, the libdivide struct, a generating function, a get algorithm function, a do function, and either a do vector function or a dummy int. -- The base has storage for the libdivide struct. This is the only storage (so the C++ class should be no larger than the libdivide struct). - -- Above that, there's divider_mid. This is an empty struct by default, but it is specialized against our four int types. divider_mid contains a template struct algo, that contains a typedef for a specialization of divider_base. struct algo is specialized to take an "algorithm number," where -1 means to use the general algorithm. - -- Publicly we have class divider, which inherits from divider_mid::algo. This also take an algorithm number, which defaults to -1 (the general algorithm). -- divider has a operator / which allows you to use a divider as the divisor in a quotient expression. - -*/ - -namespace libdivide_internal { - -#if LIBDIVIDE_USE_SSE2 -#define MAYBE_VECTOR(x) x -#define MAYBE_VECTOR_PARAM __m128i vector_func(__m128i, const DenomType *) -#else -#define MAYBE_VECTOR(x) 0 -#define MAYBE_VECTOR_PARAM int vector_func -#endif - - /* Some bogus unswitch functions for unsigned types so the same (presumably templated) code can work for both signed and unsigned. */ - uint32_t crash_u32(uint32_t, const libdivide_u32_t *) { abort(); } - uint64_t crash_u64(uint64_t, const libdivide_u64_t *) { abort(); } -#ifdef __APPLE__ - UInt64 crash_u64(UInt64, const libdivide_u64_t *) { abort(); } -#endif -#if LIBDIVIDE_USE_SSE2 - __m128i crash_u32_vector(__m128i, const libdivide_u32_t *) { abort(); } - __m128i crash_u64_vector(__m128i, const libdivide_u64_t *) { abort(); } -#endif - - template - class divider_base { - public: - DenomType denom; - divider_base(IntType d) : denom(gen_func(d)) { } - divider_base(const DenomType & d) : denom(d) { } - - IntType perform_divide(IntType val) const { return do_func(val, &denom); } -#if LIBDIVIDE_USE_SSE2 - __m128i perform_divide_vector(__m128i val) const { return vector_func(val, &denom); } -#endif - - int get_algorithm() const { return get_algo(&denom); } - }; - - - template struct divider_mid { }; - - template<> struct divider_mid { - typedef uint32_t IntType; - typedef struct libdivide_u32_t DenomType; - template struct denom { - typedef divider_base divider; - }; - - template struct algo { }; - template struct algo<-1, J> { typedef denom::divider divider; }; - template struct algo<0, J> { typedef denom::divider divider; }; - template struct algo<1, J> { typedef denom::divider divider; }; - template struct algo<2, J> { typedef denom::divider divider; }; - - /* Define two more bogus ones so that the same (templated, presumably) code can handle both signed and unsigned */ - template struct algo<3, J> { typedef denom::divider divider; }; - template struct algo<4, J> { typedef denom::divider divider; }; - - }; - - template<> struct divider_mid { - typedef int32_t IntType; - typedef struct libdivide_s32_t DenomType; - template struct denom { - typedef divider_base divider; - }; - - - template struct algo { }; - template struct algo<-1, J> { typedef denom::divider divider; }; - template struct algo<0, J> { typedef denom::divider divider; }; - template struct algo<1, J> { typedef denom::divider divider; }; - template struct algo<2, J> { typedef denom::divider divider; }; - template struct algo<3, J> { typedef denom::divider divider; }; - template struct algo<4, J> { typedef denom::divider divider; }; - - }; - -#ifdef __APPLE__ - template<> struct divider_mid { - typedef Int64 IntType; - typedef struct libdivide_s64_t DenomType; - template struct denom { - typedef divider_base divider; - }; - - template struct algo { }; - template struct algo<-1, J> { typedef denom::divider divider; }; - template struct algo<0, J> { typedef denom::divider divider; }; - template struct algo<1, J> { typedef denom::divider divider; }; - template struct algo<2, J> { typedef denom::divider divider; }; - template struct algo<3, J> { typedef denom::divider divider; }; - template struct algo<4, J> { typedef denom::divider divider; }; - }; - - template<> struct divider_mid { - typedef UInt64 IntType; - typedef struct libdivide_u64_t DenomType; - template struct denom { - typedef divider_base divider; - }; - - template struct algo { }; - template struct algo<-1, J> { typedef denom::divider divider; }; - template struct algo<0, J> { typedef denom::divider divider; }; - template struct algo<1, J> { typedef denom::divider divider; }; - template struct algo<2, J> { typedef denom::divider divider; }; - - /* Define two more bogus ones so that the same (templated, presumably) code can handle both signed and unsigned */ - template struct algo<3, J> { typedef denom::divider divider; }; - template struct algo<4, J> { typedef denom::divider divider; }; - - - }; -#endif - - template<> struct divider_mid { - typedef uint64_t IntType; - typedef struct libdivide_u64_t DenomType; - template struct denom { - typedef divider_base divider; - }; - - template struct algo { }; - template struct algo<-1, J> { typedef denom::divider divider; }; - template struct algo<0, J> { typedef denom::divider divider; }; - template struct algo<1, J> { typedef denom::divider divider; }; - template struct algo<2, J> { typedef denom::divider divider; }; - - /* Define two more bogus ones so that the same (templated, presumably) code can handle both signed and unsigned */ - template struct algo<3, J> { typedef denom::divider divider; }; - template struct algo<4, J> { typedef denom::divider divider; }; - - - }; - - template<> struct divider_mid { - typedef int64_t IntType; - typedef struct libdivide_s64_t DenomType; - template struct denom { - typedef divider_base divider; - }; - - template struct algo { }; - template struct algo<-1, J> { typedef denom::divider divider; }; - template struct algo<0, J> { typedef denom::divider divider; }; - template struct algo<1, J> { typedef denom::divider divider; }; - template struct algo<2, J> { typedef denom::divider divider; }; - template struct algo<3, J> { typedef denom::divider divider; }; - template struct algo<4, J> { typedef denom::divider divider; }; - }; - -} - -template -class divider -{ - private: - typename libdivide_internal::divider_mid::template algo::divider sub; - template friend divider unswitch(const divider & d); - divider(const typename libdivide_internal::divider_mid::DenomType & denom) : sub(denom) { } - - public: - - /* Ordinary constructor, that takes the divisor as a parameter. */ - divider(T n) : sub(n) { } - - /* Default constructor, that divides by 1 */ - divider() : sub(1) { } - - /* Divides the parameter by the divisor, returning the quotient */ - T perform_divide(T val) const { return sub.perform_divide(val); } - -#if LIBDIVIDE_USE_SSE2 - /* Treats the vector as either two or four packed values (depending on the size), and divides each of them by the divisor, returning the packed quotients. */ - __m128i perform_divide_vector(__m128i val) const { return sub.perform_divide_vector(val); } -#endif - - /* Returns the index of algorithm, for use in the unswitch function */ - int get_algorithm() const { return sub.get_algorithm(); } // returns the algorithm for unswitching - - /* operator== */ - bool operator==(const divider & him) const { return sub.denom.magic == him.sub.denom.magic && sub.denom.more == him.sub.denom.more; } - - bool operator!=(const divider & him) const { return ! (*this == him); } +// The C++ divider class is templated on both an integer type +// (like uint64_t) and an algorithm type. +// * BRANCHFULL is the default algorithm type. +// * BRANCHFREE is the branchfree algorithm type. +enum { + BRANCHFULL, + BRANCHFREE }; -/* Returns a divider specialized for the given algorithm. */ -template -divider unswitch(const divider & d) { return divider(d.sub.denom); } - -/* Overload of the / operator for scalar division. */ -template -int_type operator/(int_type numer, const divider & denom) { - return denom.perform_divide(numer); -} - -#if LIBDIVIDE_USE_SSE2 -/* Overload of the / operator for vector division. */ -template -__m128i operator/(__m128i numer, const divider & denom) { - return denom.perform_divide_vector(numer); -} +#if defined(LIBDIVIDE_AVX512) + #define LIBDIVIDE_VECTOR_TYPE __m512i +#elif defined(LIBDIVIDE_AVX2) + #define LIBDIVIDE_VECTOR_TYPE __m256i +#elif defined(LIBDIVIDE_SSE2) + #define LIBDIVIDE_VECTOR_TYPE __m128i #endif - -#endif //__cplusplus - -#endif //LIBDIVIDE_HEADER_ONLY -#ifdef __cplusplus -} //close namespace libdivide -} //close anonymous namespace +#if !defined(LIBDIVIDE_VECTOR_TYPE) + #define LIBDIVIDE_DIVIDE_VECTOR(ALGO) +#else + #define LIBDIVIDE_DIVIDE_VECTOR(ALGO) \ + LIBDIVIDE_VECTOR_TYPE divide(LIBDIVIDE_VECTOR_TYPE n) const { \ + return libdivide_##ALGO##_do_vector(n, &denom); \ + } #endif -#pragma GCC diagnostic pop +// The DISPATCHER_GEN() macro generates C++ methods (for the given integer +// and algorithm types) that redirect to libdivide's C API. +#define DISPATCHER_GEN(T, ALGO) \ + libdivide_##ALGO##_t denom; \ + dispatcher() { } \ + dispatcher(T d) \ + : denom(libdivide_##ALGO##_gen(d)) \ + { } \ + T divide(T n) const { \ + return libdivide_##ALGO##_do(n, &denom); \ + } \ + LIBDIVIDE_DIVIDE_VECTOR(ALGO) \ + T recover() const { \ + return libdivide_##ALGO##_recover(&denom); \ + } + +// The dispatcher selects a specific division algorithm for a given +// type and ALGO using partial template specialization. +template struct dispatcher { }; + +template<> struct dispatcher { DISPATCHER_GEN(int32_t, s32) }; +template<> struct dispatcher { DISPATCHER_GEN(int32_t, s32_branchfree) }; +template<> struct dispatcher { DISPATCHER_GEN(uint32_t, u32) }; +template<> struct dispatcher { DISPATCHER_GEN(uint32_t, u32_branchfree) }; +template<> struct dispatcher { DISPATCHER_GEN(int64_t, s64) }; +template<> struct dispatcher { DISPATCHER_GEN(int64_t, s64_branchfree) }; +template<> struct dispatcher { DISPATCHER_GEN(uint64_t, u64) }; +template<> struct dispatcher { DISPATCHER_GEN(uint64_t, u64_branchfree) }; + +// This is the main divider class for use by the user (C++ API). +// The actual division algorithm is selected using the dispatcher struct +// based on the integer and algorithm template parameters. +template +class divider { +public: + // We leave the default constructor empty so that creating + // an array of dividers and then initializing them + // later doesn't slow us down. + divider() { } + + // Constructor that takes the divisor as a parameter + divider(T d) : div(d) { } + + // Divides n by the divisor + T divide(T n) const { + return div.divide(n); + } + + // Recovers the divisor, returns the value that was + // used to initialize this divider object. + T recover() const { + return div.recover(); + } + + bool operator==(const divider& other) const { + return div.denom.magic == other.denom.magic && + div.denom.more == other.denom.more; + } + + bool operator!=(const divider& other) const { + return !(*this == other); + } + +#if defined(LIBDIVIDE_VECTOR_TYPE) + // Treats the vector as packed integer values with the same type as + // the divider (e.g. s32, u32, s64, u64) and divides each of + // them by the divider, returning the packed quotients. + LIBDIVIDE_VECTOR_TYPE divide(LIBDIVIDE_VECTOR_TYPE n) const { + return div.divide(n); + } +#endif + +private: + // Storage for the actual divisor + dispatcher::value, + std::is_signed::value, sizeof(T), ALGO> div; +}; + +// Overload of operator / for scalar division +template +T operator/(T n, const divider& div) { + return div.divide(n); +} + +// Overload of operator /= for scalar division +template +T& operator/=(T& n, const divider& div) { + n = div.divide(n); + return n; +} + +#if defined(LIBDIVIDE_VECTOR_TYPE) + // Overload of operator / for vector division + template + LIBDIVIDE_VECTOR_TYPE operator/(LIBDIVIDE_VECTOR_TYPE n, const divider& div) { + return div.divide(n); + } + // Overload of operator /= for vector division + template + LIBDIVIDE_VECTOR_TYPE& operator/=(LIBDIVIDE_VECTOR_TYPE& n, const divider& div) { + n = div.divide(n); + return n; + } +#endif + +// libdivdie::branchfree_divider +template +using branchfree_divider = divider; + +} // namespace libdivide + +#endif // __cplusplus + +#endif // LIBDIVIDE_H diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index 0b6734c0136..062a374c00f 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -1,8 +1,9 @@ #include #include -#ifdef __SSE2__ - #define LIBDIVIDE_USE_SSE2 1 +#if defined(__SSE2__) +# define LIBDIVIDE_SSE2 1 +# define LIBDIVIDE_VECTOR_TYPE #endif #include @@ -45,7 +46,7 @@ struct DivideIntegralByConstantImpl const A * a_end = a_pos + size; -#ifdef __SSE2__ +#if defined(__SSE2__) static constexpr size_t values_per_sse_register = 16 / sizeof(A); const A * a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register; diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 9e4409ca91b..631b7d12263 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -1,8 +1,8 @@ #include #include -#ifdef __SSE2__ - #define LIBDIVIDE_USE_SSE2 1 +#if defined(__SSE2__) +# define LIBDIVIDE_SSE2 1 #endif #include diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index 2b08ca0845c..0759b9d9601 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -5,8 +5,8 @@ #include -#ifdef __SSE2__ - #define LIBDIVIDE_USE_SSE2 1 +#if defined(__SSE2__) +# define LIBDIVIDE_SSE2 1 #endif #include From 19bb2976b9f82eec86965f6dad0d5934f8eee826 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Sat, 11 Apr 2020 14:16:14 +0300 Subject: [PATCH 271/752] Update pdqsort to recent version (#10171) --- contrib/pdqsort/pdqsort.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/contrib/pdqsort/pdqsort.h b/contrib/pdqsort/pdqsort.h index 31eb06fece4..01e82b710ee 100644 --- a/contrib/pdqsort/pdqsort.h +++ b/contrib/pdqsort/pdqsort.h @@ -124,11 +124,9 @@ namespace pdqsort_detail { inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) { typedef typename std::iterator_traits::value_type T; if (begin == end) return true; - - int limit = 0; - for (Iter cur = begin + 1; cur != end; ++cur) { - if (limit > partial_insertion_sort_limit) return false; + std::size_t limit = 0; + for (Iter cur = begin + 1; cur != end; ++cur) { Iter sift = cur; Iter sift_1 = cur - 1; @@ -142,6 +140,8 @@ namespace pdqsort_detail { *sift = PDQSORT_PREFER_MOVE(tmp); limit += cur - sift; } + + if (limit > partial_insertion_sort_limit) return false; } return true; @@ -232,7 +232,7 @@ namespace pdqsort_detail { unsigned char* offsets_r = align_cacheline(offsets_r_storage); int num_l, num_r, start_l, start_r; num_l = num_r = start_l = start_r = 0; - + while (last - first > 2 * block_size) { // Fill up offset blocks with elements that are on the wrong side. if (num_l == 0) { @@ -275,7 +275,7 @@ namespace pdqsort_detail { } int l_size = 0, r_size = 0; - int unknown_left = (last - first) - ((num_r || num_l) ? block_size : 0); + int unknown_left = (int)(last - first) - ((num_r || num_l) ? block_size : 0); if (num_r) { // Handle leftover block by assigning the unknown elements to the other block. l_size = unknown_left; @@ -311,7 +311,7 @@ namespace pdqsort_detail { start_l += num; start_r += num; if (num_l == 0) first += l_size; if (num_r == 0) last -= r_size; - + // We have now fully identified [first, last)'s proper position. Swap the last elements. if (num_l) { offsets_l += start_l; @@ -340,7 +340,7 @@ namespace pdqsort_detail { template inline std::pair partition_right(Iter begin, Iter end, Compare comp) { typedef typename std::iterator_traits::value_type T; - + // Move pivot into local for speed. T pivot(PDQSORT_PREFER_MOVE(*begin)); @@ -359,7 +359,7 @@ namespace pdqsort_detail { // If the first pair of elements that should be swapped to partition are the same element, // the passed in sequence already was correctly partitioned. bool already_partitioned = first >= last; - + // Keep swapping pairs of elements that are on the wrong side of the pivot. Previously // swapped pairs guard the searches, which is why the first iteration is special-cased // above. @@ -388,7 +388,7 @@ namespace pdqsort_detail { T pivot(PDQSORT_PREFER_MOVE(*begin)); Iter first = begin; Iter last = end; - + while (comp(pivot, *--last)); if (last + 1 == end) while (first < last && !comp(pivot, *++first)); @@ -475,11 +475,11 @@ namespace pdqsort_detail { std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); } } - + if (r_size >= insertion_sort_threshold) { std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); std::iter_swap(end - 1, end - r_size / 4); - + if (r_size > ninther_threshold) { std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); @@ -493,7 +493,7 @@ namespace pdqsort_detail { if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp) && partial_insertion_sort(pivot_pos + 1, end, comp)) return; } - + // Sort the left partition first using recursion and do tail recursion elimination for // the right-hand partition. pdqsort_loop(begin, pivot_pos, comp, bad_allowed, leftmost); From 1484ab1f1a0b63e2d7816b66a9375682f73cf8a6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 17:52:59 +0300 Subject: [PATCH 272/752] Fix machine translation #10191 --- docs/ru/development/architecture.md | 202 +- docs/ru/development/build.md | 140 +- docs/ru/development/build_cross_arm.md | 42 +- docs/ru/development/build_cross_osx.md | 63 +- docs/ru/development/build_osx.md | 92 +- docs/ru/development/index.md | 9 +- docs/ru/development/tests.md | 251 +- .../engines/table_engines/special/generate.md | 60 +- docs/ru/getting_started/tutorial.md | 670 +----- docs/ru/introduction/adopters.md | 81 +- .../sampling_query_profiler.md | 63 +- docs/ru/operations/performance_test.md | 81 +- .../utilities/clickhouse-benchmark.md | 155 +- docs/ru/whats_new/changelog/2017.md | 267 +-- docs/ru/whats_new/changelog/2018.md | 1062 +-------- docs/ru/whats_new/changelog/2019.md | 2073 +---------------- docs/ru/whats_new/roadmap.md | 18 +- 17 files changed, 17 insertions(+), 5312 deletions(-) mode change 100644 => 120000 docs/ru/development/architecture.md mode change 100644 => 120000 docs/ru/development/build.md mode change 100644 => 120000 docs/ru/development/build_cross_arm.md mode change 100644 => 120000 docs/ru/development/build_cross_osx.md mode change 100644 => 120000 docs/ru/development/build_osx.md mode change 100644 => 120000 docs/ru/development/index.md mode change 100644 => 120000 docs/ru/development/tests.md mode change 100644 => 120000 docs/ru/engines/table_engines/special/generate.md mode change 100644 => 120000 docs/ru/getting_started/tutorial.md mode change 100644 => 120000 docs/ru/introduction/adopters.md mode change 100644 => 120000 docs/ru/operations/optimizing_performance/sampling_query_profiler.md mode change 100644 => 120000 docs/ru/operations/performance_test.md mode change 100644 => 120000 docs/ru/operations/utilities/clickhouse-benchmark.md mode change 100644 => 120000 docs/ru/whats_new/changelog/2017.md mode change 100644 => 120000 docs/ru/whats_new/changelog/2018.md mode change 100644 => 120000 docs/ru/whats_new/changelog/2019.md mode change 100644 => 120000 docs/ru/whats_new/roadmap.md diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md deleted file mode 100644 index 0d1fc2ff947..00000000000 --- a/docs/ru/development/architecture.md +++ /dev/null @@ -1,201 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Обзор архитектуры ClickHouse {#overview-of-clickhouse-architecture} - -ClickHouse-это настоящая СУБД, ориентированная на столбцы. Данные хранятся столбцами и во время выполнения массивов (векторов или кусков столбцов). Когда это возможно, операции отправляются на массивы, а не на отдельные значения. Это называется «vectorized query execution,» и это помогает снизить стоимость фактической обработки данных. - -> В этой идее нет ничего нового. Она восходит к тому времени, когда `APL` язык программирования и его потомки: `A +`, `J`, `K`, и `Q`. Массивное программирование используется в научной обработке данных. Эта идея также не является чем-то новым в реляционных базах данных: например, она используется в `Vectorwise` система. - -Существует два различных подхода для ускорения обработки запросов: векторизованное выполнение запросов и генерация кода во время выполнения. Последнее устраняет все косвенные действия и динамическую диспетчеризацию. Ни один из этих подходов не является строго лучшим, чем другой. Генерация кода во время выполнения может быть лучше, когда он объединяет множество операций, таким образом полностью используя исполнительные блоки процессора и конвейер. Векторизованное выполнение запроса может быть менее практичным, поскольку оно включает временные векторы, которые должны быть записаны в кэш и считаны обратно. Если временные данные не помещаются в кэш L2, это становится проблемой. Но векторизованное выполнение запросов более легко использует возможности SIMD центрального процессора. Один [научная статья](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) написанное нашими друзьями показывает, что лучше сочетать оба подхода. ClickHouse использует векторизованное выполнение запросов и имеет ограниченную начальную поддержку для генерации кода во время выполнения. - -## Столбцы {#columns} - -`IColumn` интерфейс используется для представления столбцов в памяти (собственно, кусков столбцов). Этот интерфейс предоставляет вспомогательные методы для реализации различных реляционных операторов. Почти все операции неизменяемы: они не изменяют исходный столбец, а создают новый измененный. Например, в `IColumn :: filter` метод принимает маску байта фильтра. Он используется для `WHERE` и `HAVING` реляционный оператор. Дополнительные примеры: `IColumn :: permute` способ поддержки `ORDER BY`, этот `IColumn :: cut` способ поддержки `LIMIT`. - -Различный `IColumn` реализации (`ColumnUInt8`, `ColumnString`, и так далее) отвечают за расположение столбцов в памяти. Расположение памяти обычно представляет собой непрерывный массив. Для целочисленного типа столбцов это всего лишь один непрерывный массив, например `std :: vector`. Для `String` и `Array` столбцы, это два вектора: один для всех элементов массива, расположенных последовательно, и второй для смещений к началу каждого массива. Существует также `ColumnConst` это сохраняет только одно значение в памяти, но выглядит как столбец. - -## Поле {#field} - -Тем не менее, можно работать и с индивидуальными ценностями. Чтобы представить индивидуальную ценность, то `Field` предназначенный. `Field` это просто дискриминированный Союз `UInt64`, `Int64`, `Float64`, `String` и `Array`. `IColumn` имеет `operator[]` метод получения n-го значения в виде a `Field` и `insert` способ, чтобы добавить `Field` до самого конца колонны. Эти методы не очень эффективны, потому что они требуют решения временных проблем `Field` объекты, представляющие индивидуальную ценность. Существуют и более эффективные методы, такие как `insertFrom`, `insertRangeFrom` и так далее. - -`Field` у него нет достаточной информации о конкретном типе данных для таблицы. Например, `UInt8`, `UInt16`, `UInt32`, и `UInt64` все они представлены в виде `UInt64` в `Field`. - -## Дырявые абстракции {#leaky-abstractions} - -`IColumn` есть методы для общих реляционных преобразований данных, но они не удовлетворяют всем потребностям. Например, `ColumnUInt64` не имеет метода для вычисления суммы двух столбцов, и `ColumnString` у него нет метода для запуска поиска по подстрокам. Эти бесчисленные процедуры реализуются за пределами `IColumn`. - -Различные функции на столбцах могут быть реализованы общим, неэффективным способом с использованием `IColumn` способы извлечения `Field` значения, или специализированным способом, использующим знание внутренней компоновки памяти данных в определенном месте. `IColumn` реализация. Он реализуется путем приведения функций к определенному виду `IColumn` тип и дело с внутренним представлением непосредственно. Например, `ColumnUInt64` имеет `getData` метод, который возвращает ссылку на внутренний массив, а затем отдельная процедура считывает или заполняет этот массив непосредственно. У нас есть «leaky abstractions» чтобы обеспечить эффективную специализацию различных процедур. - -## Тип данных {#data_types} - -`IDataType` отвечает за сериализацию и десериализацию: чтение и запись фрагментов столбцов или отдельных значений в двоичной или текстовой форме. `IDataType` непосредственно соответствует типам данных в таблицах. Например, существуют `DataTypeUInt32`, `DataTypeDateTime`, `DataTypeString` и так далее. - -`IDataType` и `IColumn` они лишь слабо связаны друг с другом. Различные типы данных могут быть представлены в памяти одним и тем же именем `IColumn` реализации. Например, `DataTypeUInt32` и `DataTypeDateTime` оба они представлены следующим образом `ColumnUInt32` или `ColumnConstUInt32`. Кроме того, один и тот же тип данных может быть представлен разными `IColumn` реализации. Например, `DataTypeUInt8` может быть представлен следующим образом `ColumnUInt8` или `ColumnConstUInt8`. - -`IDataType` хранит только метаданные. Например, `DataTypeUInt8` не хранит вообще ничего (кроме vptr) и `DataTypeFixedString` магазины просто `N` (размер строк фиксированного размера). - -`IDataType` имеет вспомогательные методы для различных форматов данных. Примерами являются методы сериализации значения с возможным цитированием, сериализации значения для JSON и сериализации значения в формате XML. Прямого соответствия форматам данных не существует. Например, различные форматы данных `Pretty` и `TabSeparated` можно использовать то же самое `serializeTextEscaped` вспомогательный метод от `IDataType` интерфейс. - -## Блок {#block} - -A `Block` это контейнер, представляющий подмножество (фрагмент) таблицы в памяти. Это всего лишь набор троек: `(IColumn, IDataType, column name)`. Во время выполнения запроса данные обрабатываются с помощью `Block`s. Если у нас есть `Block`, у нас есть данные (в `IColumn` объект), у нас есть информация о его типе (в `IDataType`) это говорит нам, как обращаться с этим столбцом, и у нас есть имя столбца. Это может быть либо исходное имя столбца из таблицы, либо какое-то искусственное имя, назначенное для получения временных результатов вычислений. - -Когда мы вычисляем некоторую функцию по столбцам в блоке, мы добавляем другой столбец с его результатом в блок, и мы не касаемся столбцов для аргументов функции, потому что операции неизменяемы. Позже ненужные столбцы могут быть удалены из блока, но не изменены. Это удобно для исключения общих подвыражений. - -Блоки создаются для каждого обработанного фрагмента данных. Обратите внимание, что для одного и того же типа вычисления имена столбцов и типы остаются одинаковыми для разных блоков, и изменяются только данные столбцов. Лучше разделить данные блока из заголовка блока, потому что небольшие размеры блока имеют высокую нагрузку временных строк для копирования shared\_ptrs и имен столбцов. - -## Блокировать Потоки {#block-streams} - -Блочные потоки предназначены для обработки данных. Мы используем потоки блоков для чтения данных откуда-то, выполнения преобразований данных или записи данных куда-то. `IBlockInputStream` имеет `read` метод для извлечения следующего блока, пока он доступен. `IBlockOutputStream` имеет `write` метод, чтобы подтолкнуть блок куда-то. - -Потоки отвечают за: - -1. Чтение или письмо за столом. Таблица просто возвращает поток для чтения или записи блоков. -2. Реализация форматов данных. Например, если вы хотите вывести данные на терминал в `Pretty` форматирование, вы создаете поток вывода блока, где вы толкаете блоки, и он форматирует их. -3. Выполнение преобразований данных. Скажем так у вас есть `IBlockInputStream` и хотите создать отфильтрованный поток. Вы создаете `FilterBlockInputStream` и инициализируйте его с помощью своего потока. Затем, когда вы вытащите блок из `FilterBlockInputStream`, он извлекает блок из вашего потока, фильтрует его и возвращает отфильтрованный блок вам. Конвейеры выполнения запросов представлены таким образом. - -Есть и более сложные трансформации. Например, когда вы тянете из `AggregatingBlockInputStream`, он считывает все данные из своего источника, агрегирует их, а затем возвращает поток агрегированных данных для вас. Еще пример: `UnionBlockInputStream` принимает множество источников ввода в конструкторе, а также ряд потоков. Он запускает несколько потоков и читает из нескольких источников параллельно. - -> Потоки блокируют использовать «pull» подход к управлению потоком: когда вы вытягиваете блок из первого потока, он, следовательно, вытягивает необходимые блоки из вложенных потоков, и весь конвейер выполнения будет работать. Ни «pull» ни «push» это лучшее решение, потому что поток управления является неявным, и это ограничивает реализацию различных функций, таких как одновременное выполнение нескольких запросов (объединение многих конвейеров вместе). Это ограничение может быть преодолено с помощью сопрограмм или просто запуском дополнительных потоков, которые ждут друг друга. У нас может быть больше возможностей, если мы сделаем поток управления явным: если мы найдем логику для передачи данных из одной расчетной единицы в другую вне этих расчетных единиц. Читать это [статья](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) для новых мыслей. - -Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся держать размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в трубопроводе вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у него также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, кражу промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов. - -## Форматы {#formats} - -Форматы данных реализуются с помощью блочных потоков. Есть «presentational» форматы, пригодные только для вывода данных клиенту, такие как `Pretty` формат, который предоставляет только `IBlockOutputStream`. И есть форматы ввода/вывода, такие как `TabSeparated` или `JSONEachRow`. - -Существуют также потоки подряд : `IRowInputStream` и `IRowOutputStream`. Они позволяют вытягивать / выталкивать данные отдельными строками, а не блоками. И они нужны только для упрощения реализации ориентированных на строки форматов. Обертка `BlockInputStreamFromRowInputStream` и `BlockOutputStreamFromRowOutputStream` позволяет конвертировать потоки, ориентированные на строки, в обычные потоки, ориентированные на блоки. - -## I/O {#io} - -Для байт-ориентированных входов / выходов существуют `ReadBuffer` и `WriteBuffer` абстрактный класс. Они используются вместо C++ `iostream`s. Не волнуйтесь: каждый зрелый проект C++ использует что-то другое, чем `iostream`s по уважительным причинам. - -`ReadBuffer` и `WriteBuffer` это просто непрерывный буфер и курсор, указывающий на позицию в этом буфере. Реализации могут владеть или не владеть памятью для буфера. Существует виртуальный метод заполнения буфера следующими данными (для `ReadBuffer`) или смыть буфер куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываются. - -Реализация следующих принципов: `ReadBuffer`/`WriteBuffer` используются для работы с файлами и файловыми дескрипторами, а также сетевыми сокетами, для реализации сжатия (`CompressedWriteBuffer` is initialized with another WriteBuffer and performs compression before writing data to it), and for other purposes – the names `ConcatReadBuffer`, `LimitReadBuffer`, и `HashingWriteBuffer` за себя говорить. - -Буферы чтения/записи имеют дело только с байтами. Есть функции от `ReadHelpers` и `WriteHelpers` заголовочные файлы, чтобы помочь с форматированием ввода / вывода. Например, есть помощники для записи числа в десятичном формате. - -Давайте посмотрим, что происходит, когда вы хотите написать результирующий набор в `JSON` форматирование в stdout. У вас есть результирующий набор, готовый к извлечению из него `IBlockInputStream`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputStream`, инициализируется с помощью этого `WriteBuffer`, чтобы записать строки в `JSON` в stdout. Вы создаете `BlockOutputStreamFromRowOutputStream` кроме того, чтобы представить его как `IBlockOutputStream`. А потом ты позвонишь `copyData` для передачи данных из `IBlockInputStream` к `IBlockOutputStream` и все это работает. Внутренне, `JSONRowOutputStream` буду писать в формате JSON различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`. - -## Таблицы {#tables} - -То `IStorage` интерфейс представляет собой таблицы. Различные реализации этого интерфейса являются различными движками таблиц. Примеры `StorageMergeTree`, `StorageMemory` и так далее. Экземпляры этих классов являются просто таблицами. - -Ключ `IStorage` методы `read` и `write`. Есть и другие варианты `alter`, `rename`, `drop` и так далее. То `read` метод принимает следующие аргументы: набор столбцов для чтения из таблицы, набор столбцов для чтения из таблицы. `AST` запрос для рассмотрения и желаемое количество потоков для возврата. Он возвращает один или несколько `IBlockInputStream` объекты и информация о стадии обработки данных, которая была завершена внутри табличного движка во время выполнения запроса. - -В большинстве случаев метод read отвечает только за чтение указанных столбцов из таблицы, а не за дальнейшую обработку данных. Вся дальнейшая обработка данных осуществляется интерпретатором запросов и не входит в сферу ответственности компании `IStorage`. - -Но есть и заметные исключения: - -- Запрос AST передается на сервер `read` метод, и механизм таблиц может использовать его для получения использования индекса и считывания меньшего количества данных из таблицы. -- Иногда механизм таблиц может сам обрабатывать данные до определенного этапа. Например, `StorageDistributed` можно отправить запрос на удаленные серверы, попросить их обработать данные на этапе, когда данные с разных удаленных серверов могут быть объединены, и вернуть эти предварительно обработанные данные. Затем интерпретатор запросов завершает обработку данных. - -Стол `read` метод может возвращать несколько значений `IBlockInputStream` объекты, позволяющие осуществлять параллельную обработку данных. Эти несколько блочных входных потоков могут считываться из таблицы параллельно. Затем вы можете обернуть эти потоки с помощью различных преобразований (таких как вычисление выражений или фильтрация), которые могут быть вычислены независимо, и создать `UnionBlockInputStream` поверх них, чтобы читать из нескольких потоков параллельно. - -Есть и другие варианты `TableFunction`s. Это функции, которые возвращают временное значение `IStorage` объект для использования в `FROM` предложение запроса. - -Чтобы получить быстрое представление о том, как реализовать свой движок таблиц, посмотрите на что-то простое, например `StorageMemory` или `StorageTinyLog`. - -> В результате этого `read` метод, `IStorage` возвращается `QueryProcessingStage` – information about what parts of the query were already calculated inside storage. - -## Синтаксический анализатор {#parsers} - -Написанный от руки рекурсивный парсер спуска анализирует запрос. Например, `ParserSelectQuery` просто рекурсивно вызывает базовые Парсеры для различных частей запроса. Парсеры создают `AST`. То `AST` представлен узлами, которые являются экземплярами `IAST`. - -> Генераторы парсеров не используются по историческим причинам. - -## Переводчики {#interpreters} - -Интерпретаторы отвечают за создание конвейера выполнения запроса из `AST`. Есть простые переводчики, такие как `InterpreterExistsQuery` и `InterpreterDropQuery` или более изощренные `InterpreterSelectQuery`. Конвейер выполнения запроса представляет собой комбинацию блочных входных и выходных потоков. Например, результат интерпретации `SELECT` запросов `IBlockInputStream` для чтения результирующего набора из; результат запроса INSERT - это `IBlockOutputStream` чтобы записать данные для вставки в, и результат интерпретации `INSERT SELECT` запросов `IBlockInputStream` это возвращает пустой результирующий набор при первом чтении, но копирует данные из него `SELECT` к `INSERT` в то же время. - -`InterpreterSelectQuery` использует `ExpressionAnalyzer` и `ExpressionActions` машины для анализа запросов и преобразований. Именно здесь выполняется большинство оптимизаций запросов на основе правил. `ExpressionAnalyzer` это довольно грязно и должно быть переписано: различные преобразования запросов и оптимизации должны быть извлечены в отдельные классы, чтобы позволить модульные преобразования или запрос. - -## Функции {#functions} - -Существуют обычные функции и агрегатные функции. Агрегатные функции см. В следующем разделе. - -Ordinary functions don’t change the number of rows – they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`’s данных для реализации векторизованного выполнения запросов. - -Есть некоторые другие функции, такие как [размер блока](../sql_reference/functions/other_functions.md#function-blocksize), [роунумберинблок](../sql_reference/functions/other_functions.md#function-rownumberinblock), и [runningAccumulate](../sql_reference/functions/other_functions.md#function-runningaccumulate), которые эксплуатируют обработку блоков и нарушают независимость строк. - -ClickHouse имеет сильную типизацию, поэтому нет никакого неявного преобразования типов. Если функция не поддерживает определенную комбинацию типов, она создает исключение. Но функции могут работать (перегружаться) для многих различных комбинаций типов. Например, в `plus` функция (для реализации `+` оператор) работает для любой комбинации числовых типов: `UInt8` + `Float32`, `UInt16` + `Int8` и так далее. Кроме того, некоторые вариадические функции могут принимать любое количество аргументов, например `concat` функция. - -Реализация функции может быть немного неудобной, поскольку функция явно отправляет поддерживаемые типы данных и поддерживается `IColumns`. Например, в `plus` функция имеет код, генерируемый экземпляром шаблона C++ для каждой комбинации числовых типов, а также постоянные или непостоянные левые и правые аргументы. - -Это отличное место для реализации генерации кода во время выполнения, чтобы избежать раздувания кода шаблона. Кроме того, он позволяет добавлять слитые функции, такие как fused multiply-add или выполнять несколько сравнений в одной итерации цикла. - -Из-за векторизованного выполнения запроса функции не закорачиваются. Например, если вы пишете `WHERE f(x) AND g(y)`, обе стороны вычисляются, даже для строк, когда `f(x)` равно нулю (за исключением тех случаев, когда `f(x)` является нулевым постоянным выражением). Но если избирательность самого `f(x)` состояние является высоким, и расчет `f(x)` это гораздо дешевле, чем `g(y)`, лучше всего реализовать многоходовой расчет. Это будет первый расчет `f(x)`, затем отфильтруйте столбцы по результату, а затем вычислите `g(y)` только для небольших отфильтрованных фрагментов данных. - -## Статистическая функция {#aggregate-functions} - -Агрегатные функции - это функции, определяющие состояние. Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Они управляются с помощью `IAggregateFunction` интерфейс. Состояния могут быть довольно простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) или довольно сложное (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и `HyperLogLog` вероятностная структура данных). - -Государства распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении высокой мощности `GROUP BY` запрос. Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами выделять дополнительную память. Это требует некоторого внимания к созданию и уничтожению государств и правильной передаче их права собственности и порядка уничтожения. - -Агрегатные состояния могут быть сериализованы и десериализованы для передачи по сети во время выполнения распределенного запроса или для записи их на диск, где недостаточно оперативной памяти. Они даже могут храниться в таблице с `DataTypeAggregateFunction` чтобы разрешить инкрементное агрегирование данных. - -> Сериализованный формат данных для состояний агрегатных функций в настоящее время не является версионным. Это нормально, если агрегатные состояния хранятся только временно. Но у нас есть такая возможность `AggregatingMergeTree` механизм таблиц для инкрементного агрегирования, и люди уже используют его в производстве. Именно по этой причине обратная совместимость требуется при изменении сериализованного формата для любой агрегатной функции в будущем. - -## Сервер {#server} - -Сервер реализует несколько различных интерфейсов: - -- Интерфейс HTTP для любых иностранных клиентов. -- TCP-интерфейс для собственного клиента ClickHouse и для межсерверной связи во время выполнения распределенного запроса. -- Интерфейс для передачи данных для репликации. - -Внутренне это просто примитивный многопоточный сервер без сопрограмм или волокон. Поскольку сервер предназначен не для обработки высокой скорости простых запросов, а для обработки относительно низкой скорости сложных запросов, каждый из них может обрабатывать огромное количество данных для аналитики. - -Сервер инициализирует программу `Context` класс с необходимой средой для выполнения запроса: список доступных баз данных, пользователей и прав доступа, настройки, кластеры, список процессов, журнал запросов и так далее. Переводчики используют эту среду. - -Мы поддерживаем полную обратную и прямую совместимость для протокола TCP сервера: старые клиенты могут разговаривать с новыми серверами, а новые клиенты-со старыми серверами. Но мы не хотим поддерживать его вечно, и мы удаляем поддержку старых версий примерно через год. - -!!! note "Примечание" - Для большинства внешних приложений мы рекомендуем использовать интерфейс HTTP, поскольку он прост и удобен в использовании. Протокол TCP более тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных, а также использует пользовательское обрамление для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что она требует связывания большей части кодовой базы ClickHouse, что нецелесообразно. - -## Выполнение Распределенных Запросов {#distributed-query-execution} - -Серверы в кластерной установке в основном независимы. Вы можете создать `Distributed` таблица на одном или всех серверах кластера. То `Distributed` table does not store data itself – it only provides a «view» ко всем локальным таблицам на нескольких узлах кластера. Когда вы выберите из `Distributed` таблица, он переписывает этот запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет запрос к ним. То `Distributed` таблица запрашивает удаленные серверы для обработки запроса только до стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и сливает их. Распределенная таблица пытается распределить как можно больше работы на удаленные серверы и не отправляет много промежуточных данных по сети. - -Все становится сложнее, когда у вас есть подзапросы в предложениях IN или JOIN, и каждый из них использует a `Distributed` стол. У нас есть разные стратегии выполнения этих запросов. - -Глобального плана запросов для выполнения распределенных запросов не существует. Каждый узел имеет свой локальный план запроса для своей части задания. У нас есть только простое однопроходное распределенное выполнение запросов: мы отправляем запросы на удаленные узлы, а затем объединяем результаты. Но это неосуществимо для сложных запросов с высокой мощностью группы BYs или с большим количеством временных данных для соединения. В таких случаях нам необходимо: «reshuffle» данные между серверами, что требует дополнительной координации. ClickHouse не поддерживает такого рода выполнение запросов, и мы должны работать над этим. - -## Дерево Слияния {#merge-tree} - -`MergeTree` это семейство механизмов хранения данных, поддерживающих индексацию по первичному ключу. Первичный ключ может быть произвольным кортежем столбцов или выражений. Данные в a `MergeTree` таблица хранится в «parts». Каждая часть хранит данные в порядке первичного ключа, поэтому данные лексикографически упорядочиваются кортежем первичного ключа. Все столбцы таблицы хранятся отдельно `column.bin` файлы в этих краях. Файлы состоят из сжатых блоков. Каждый блок обычно содержит от 64 КБ до 1 МБ несжатых данных, в зависимости от среднего размера значения. Блоки состоят из значений столбцов, расположенных последовательно друг за другом. Значения столбцов находятся в одном и том же порядке для каждого столбца (первичный ключ определяет порядок), поэтому при итерации по многим столбцам вы получаете значения для соответствующих строк. - -Сам первичный ключ является «sparse». Он адресует не каждую отдельную строку, а только некоторые диапазоны данных. Разделение `primary.idx` файл имеет значение первичного ключа для каждой N-й строки, где N называется `index_granularity` (обычно N = 8192). Кроме того, для каждой колонки у нас есть `column.mrk` файлы с «marks,» которые являются смещениями для каждой N-й строки в файле данных. Каждая метка представляет собой пару: смещение в файле к началу сжатого блока и смещение в распакованном блоке к началу данных. Обычно сжатые блоки выравниваются по меткам, а смещение в распакованном блоке равно нулю. Данные для `primary.idx` всегда находится в памяти, а данные для `column.mrk` файлы кэшируются. - -Когда мы собираемся прочитать что-то из части в `MergeTree`, мы смотрим на `primary.idx` данные и найдите диапазоны, которые могут содержать запрошенные данные, а затем посмотрите на `column.mrk` данные и рассчитать смещения для того, чтобы начать чтение этих диапазонов. Из-за разреженности могут быть прочитаны избыточные данные. ClickHouse не подходит для высокой загрузки простых точечных запросов, так как весь диапазон с `index_granularity` строки должны быть прочитаны для каждого ключа, и весь сжатый блок должен быть распакован для каждого столбца. Мы сделали индекс разреженным, потому что мы должны быть в состоянии поддерживать триллионы строк на одном сервере без заметного потребления памяти для индекса. Кроме того, поскольку первичный ключ разрежен, он не является уникальным: он не может проверить существование ключа в таблице во время вставки. В таблице может быть много строк с одним и тем же ключом. - -Когда вы `INSERT` куча данных в `MergeTree`, эта связка сортируется по порядку первичного ключа и образует новую часть. Существуют фоновые потоки, которые периодически выделяют некоторые детали и объединяют их в одну сортированную деталь, чтобы сохранить количество деталей относительно низким. Вот почему он так называется `MergeTree`. Конечно, слияние приводит к тому, что «write amplification». Все части неизменны: они только создаются и удаляются, но не изменяются. Когда SELECT выполняется, он содержит снимок таблицы (набор деталей). После слияния мы также сохраняем старые детали в течение некоторого времени, чтобы облегчить восстановление после сбоя, поэтому, если мы видим, что какая-то объединенная деталь, вероятно, сломана, мы можем заменить ее исходными частями. - -`MergeTree` это не дерево LSM, потому что оно не содержит «memtable» и «log»: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications. - -> Таблицы MergeTree могут иметь только один (первичный) индекс: вторичных индексов не существует. Было бы неплохо разрешить несколько физических представлений в одной логической таблице, например, хранить данные в более чем одном физическом порядке или даже разрешить представления с предварительно агрегированными данными наряду с исходными данными. - -Есть движки MergeTree, которые выполняют дополнительную работу во время фоновых слияний. Примеры `CollapsingMergeTree` и `AggregatingMergeTree`. Это можно рассматривать как специальную поддержку обновлений. Имейте в виду, что это не настоящие обновления, поскольку пользователи обычно не имеют никакого контроля над временем выполнения фоновых слияний, а данные в `MergeTree` таблица почти всегда хранится в нескольких частях, а не в полностью объединенном виде. - -## Копирование {#replication} - -Репликация в ClickHouse может быть настроена на основе каждой таблицы. Вы можете иметь некоторые реплицированные и некоторые нереплицированные таблицы на одном сервере. Вы также можете иметь таблицы, реплицируемые различными способами,например, одна таблица с двухфакторной репликацией, а другая-с трехфакторной. - -Репликация осуществляется в виде `ReplicatedMergeTree` подсистема хранилища. Путь в `ZooKeeper` указывается в качестве параметра для механизма хранения данных. Все таблицы с одинаковым путем внутри `ZooKeeper` становятся репликами друг друга: они синхронизируют свои данные и поддерживают согласованность. Реплики можно добавлять и удалять динамически, просто создавая или удаляя таблицу. - -Репликация использует асинхронную многомастерную схему. Вы можете вставить данные в любую реплику, которая имеет сеанс с `ZooKeeper`, и данные реплицируются во все остальные реплики асинхронно. Поскольку ClickHouse не поддерживает обновления, репликация является бесконфликтной. Поскольку нет подтверждения кворума вставок, только что вставленные данные могут быть потеряны, если один узел выйдет из строя. - -Метаданные для репликации хранятся в ZooKeeper. Существует журнал репликации, в котором перечислены необходимые действия. Действия таковы: получить часть; объединить части; удалить раздел и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из этой очереди. Например, при вставке «get the part» действие создается в журнале, и каждая реплика загружает эту часть. Слияния координируются между репликами для получения идентичных байтам результатов. Все части объединяются одинаково на всех репликах. Это достигается путем выбора одной реплики в качестве лидера, и эта реплика инициирует слияние и запись «merge parts» действия по ведению журнала. - -Репликация является физической: между узлами передаются только сжатые части, а не запросы. Слияния обрабатываются на каждой реплике независимо в большинстве случаев, чтобы снизить затраты на сеть, избегая усиления сети. Большие объединенные части передаются по сети только в случаях значительного запаздывания репликации. - -Кроме того, каждая реплика хранит свое состояние в ZooKeeper как набор деталей и их контрольные суммы. Когда состояние локальной файловой системы отличается от эталонного состояния в ZooKeeper, реплика восстанавливает свою согласованность, загружая недостающие и сломанные части из других реплик. Когда в локальной файловой системе появляются неожиданные или неработающие данные, ClickHouse не удаляет их, а перемещает в отдельный каталог и забывает. - -!!! note "Примечание" - Кластер ClickHouse состоит из независимых сегментов, и каждый сегмент состоит из реплик. Кластер таков **неупругий**, поэтому после добавления нового осколка данные не будут автоматически перебалансированы между осколками. Вместо этого предполагается, что нагрузка на кластер будет регулироваться неравномерно. Эта реализация дает вам больше контроля, и это нормально для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в производстве, этот подход становится существенным недостатком. Мы должны реализовать механизм таблиц, который охватывает весь кластер с динамически реплицируемыми областями, которые могут быть разделены и сбалансированы между кластерами автоматически. - -{## [Оригинальная статья](https://clickhouse.tech/docs/en/development/architecture/) ##} diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md new file mode 120000 index 00000000000..61968e46da2 --- /dev/null +++ b/docs/ru/development/architecture.md @@ -0,0 +1 @@ +en/development/architecture.md \ No newline at end of file diff --git a/docs/ru/development/build.md b/docs/ru/development/build.md deleted file mode 100644 index f0e0ccfd4aa..00000000000 --- a/docs/ru/development/build.md +++ /dev/null @@ -1,139 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Как построить ClickHouse для развития {#how-to-build-clickhouse-for-development} - -Следующий учебник основан на системе Ubuntu Linux. -С соответствующими изменениями он также должен работать на любом другом дистрибутиве Linux. -Поддерживаемые платформы: x86\_64 и AArch64. Поддержка Power9 является экспериментальной. - -## Установите Git, CMake, Python и Ninja {#install-git-cmake-python-and-ninja} - -``` bash -$ sudo apt-get install git cmake python ninja-build -``` - -Или cmake3 вместо cmake на старых системах. - -## Установка GCC 9 {#install-gcc-9} - -Есть несколько способов сделать это. - -### Установка из PPA пакет {#install-from-a-ppa-package} - -``` bash -$ sudo apt-get install software-properties-common -$ sudo apt-add-repository ppa:ubuntu-toolchain-r/test -$ sudo apt-get update -$ sudo apt-get install gcc-9 g++-9 -``` - -### Установка из источников {#install-from-sources} - -Смотреть на [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh) - -## Использовать GCC для сборки 9 {#use-gcc-9-for-builds} - -``` bash -$ export CC=gcc-9 -$ export CXX=g++-9 -``` - -## Проверка Источников ClickHouse {#checkout-clickhouse-sources} - -``` bash -$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git -``` - -или - -``` bash -$ git clone --recursive https://github.com/ClickHouse/ClickHouse.git -``` - -## Построить ClickHouse {#build-clickhouse} - -``` bash -$ cd ClickHouse -$ mkdir build -$ cd build -$ cmake .. -$ ninja -$ cd .. -``` - -Чтобы создать исполняемый файл, выполните команду `ninja clickhouse`. -Это позволит создать `programs/clickhouse` исполняемый файл, который может быть использован с `client` или `server` аргументы. - -# Как построить ClickHouse на любом Linux {#how-to-build-clickhouse-on-any-linux} - -Для сборки требуются следующие компоненты: - -- Git (используется только для проверки исходных текстов, он не нужен для сборки) -- CMake 3.10 или новее -- Ниндзя (рекомендуется) или сделать -- Компилятор C++: gcc 9 или clang 8 или новее -- Компоновщик: lld или gold (классический GNU ld не будет работать) -- Python (используется только внутри сборки LLVM и является необязательным) - -Если все компоненты установлены, Вы можете построить их так же, как и описанные выше шаги. - -Пример для Ubuntu Eoan: - - sudo apt update - sudo apt install git cmake ninja-build g++ python - git clone --recursive https://github.com/ClickHouse/ClickHouse.git - mkdir build && cd build - cmake ../ClickHouse - ninja - -Пример для OpenSUSE перекати-поле: - - sudo zypper install git cmake ninja gcc-c++ python lld - git clone --recursive https://github.com/ClickHouse/ClickHouse.git - mkdir build && cd build - cmake ../ClickHouse - ninja - -Пример для сыромятной кожи Fedora: - - sudo yum update - yum --nogpg install git cmake make gcc-c++ python2 - git clone --recursive https://github.com/ClickHouse/ClickHouse.git - mkdir build && cd build - cmake ../ClickHouse - make -j $(nproc) - -# Вам не нужно строить ClickHouse {#you-dont-have-to-build-clickhouse} - -ClickHouse доступен в готовых двоичных файлах и пакетах. Двоичные файлы являются портативными и могут быть запущены на любом вкусе Linux. - -Они созданы для стабильных, предустановленных и тестовых релизов до тех пор, пока для каждого коммита к мастеру и для каждого запроса на вытягивание. - -Чтобы найти самую свежую сборку из `master`, обратиться [совершает страницы](https://github.com/ClickHouse/ClickHouse/commits/master), нажмите на первую зеленую галочку или красный крестик рядом с фиксацией и нажмите на кнопку «Details» ссылка сразу после этого «ClickHouse Build Check». - -# Как создать пакет ClickHouse Debian {#how-to-build-clickhouse-debian-package} - -## Установите Git и Pbuilder {#install-git-and-pbuilder} - -``` bash -$ sudo apt-get update -$ sudo apt-get install git python pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring -``` - -## Проверка Источников ClickHouse {#checkout-clickhouse-sources-1} - -``` bash -$ git clone --recursive --branch master https://github.com/ClickHouse/ClickHouse.git -$ cd ClickHouse -``` - -## Запустить Сценарий Выпуска {#run-release-script} - -``` bash -$ ./release -``` - -[Оригинальная статья](https://clickhouse.tech/docs/en/development/build/) diff --git a/docs/ru/development/build.md b/docs/ru/development/build.md new file mode 120000 index 00000000000..156d8382515 --- /dev/null +++ b/docs/ru/development/build.md @@ -0,0 +1 @@ +en/development/build.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_arm.md b/docs/ru/development/build_cross_arm.md deleted file mode 100644 index 184028212e9..00000000000 --- a/docs/ru/development/build_cross_arm.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Как построить ClickHouse на Linux для архитектуры AArch64 (ARM64) {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} - -Это для случая, когда у вас есть Linux-машина и вы хотите использовать ее для сборки `clickhouse` двоичный файл, который будет работать на другой машине Linux с архитектурой процессора AARCH64. Это предназначено для непрерывной проверки интеграции, которая выполняется на серверах Linux. - -Кросс-сборка для AARCH64 основана на следующих принципах: [Инструкции по сборке](build.md)- сначала следуйте за ними. - -# Установка Clang-8 {#install-clang-8} - -Следуйте инструкциям от https://apt.llvm.org/ для вашей установки Ubuntu или Debian. -Например, в Ubuntu Bionic вы можете использовать следующие команды: - -``` bash -echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" | sudo tee /etc/apt/sources.list.d/llvm.list -sudo apt-get update -sudo apt-get install clang-8 -``` - -# Установка Набора Инструментов Перекрестной Компиляции {#install-cross-compilation-toolset} - -``` bash -cd ClickHouse -mkdir -p build-aarch64/cmake/toolchain/linux-aarch64 -wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en' -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1 -``` - -# Построить ClickHouse {#build-clickhouse} - -``` bash -cd ClickHouse -mkdir build-arm64 -CC=clang-8 CXX=clang++-8 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake -ninja -C build-arm64 -``` - -Полученный двоичный файл будет работать только в Linux с архитектурой процессора AARCH64. diff --git a/docs/ru/development/build_cross_arm.md b/docs/ru/development/build_cross_arm.md new file mode 120000 index 00000000000..ea33bb61837 --- /dev/null +++ b/docs/ru/development/build_cross_arm.md @@ -0,0 +1 @@ +en/development/build_cross_arm.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_osx.md b/docs/ru/development/build_cross_osx.md deleted file mode 100644 index 04d505f1a83..00000000000 --- a/docs/ru/development/build_cross_osx.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Как построить ClickHouse на Linux для Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x} - -Это для случая, когда у вас есть Linux-машина и вы хотите использовать ее для сборки `clickhouse` двоичный файл, который будет работать на OS X. Это предназначено для непрерывной проверки интеграции, которая выполняется на серверах Linux. Если вы хотите построить ClickHouse непосредственно на Mac OS X, то продолжайте [еще одна инструкция](build_osx.md). - -Кросс-сборка для Mac OS X основана на следующих принципах: [Инструкции по сборке](build.md)- сначала следуйте за ними. - -# Установка Clang-8 {#install-clang-8} - -Следуйте инструкциям от https://apt.llvm.org/ для вашей установки Ubuntu или Debian. -Например команды для Bionic выглядят так: - -``` bash -sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list -sudo apt-get install clang-8 -``` - -# Установка Набора Инструментов Перекрестной Компиляции {#install-cross-compilation-toolset} - -Давайте вспомним путь, по которому мы устанавливаем `cctools` как ${CCTOOLS} - -``` bash -mkdir ${CCTOOLS} - -git clone https://github.com/tpoechtrager/apple-libtapi.git -cd apple-libtapi -INSTALLPREFIX=${CCTOOLS} ./build.sh -./install.sh -cd .. - -git clone https://github.com/tpoechtrager/cctools-port.git -cd cctools-port/cctools -./configure --prefix=${CCTOOLS} --with-libtapi=${CCTOOLS} --target=x86_64-apple-darwin -make install -``` - -Кроме того, нам нужно загрузить MacOS X SDK в рабочее дерево. - -``` bash -cd ClickHouse -wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz' -mkdir -p build-darwin/cmake/toolchain/darwin-x86_64 -tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1 -``` - -# Построить ClickHouse {#build-clickhouse} - -``` bash -cd ClickHouse -mkdir build-osx -CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake \ - -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \ - -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \ - -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -ninja -C build-osx -``` - -Полученный двоичный файл будет иметь исполняемый формат Mach-O и не может быть запущен в Linux. diff --git a/docs/ru/development/build_cross_osx.md b/docs/ru/development/build_cross_osx.md new file mode 120000 index 00000000000..d4dc16f2fbc --- /dev/null +++ b/docs/ru/development/build_cross_osx.md @@ -0,0 +1 @@ +en/development/build_cross_osx.md \ No newline at end of file diff --git a/docs/ru/development/build_osx.md b/docs/ru/development/build_osx.md deleted file mode 100644 index b218304d9d1..00000000000 --- a/docs/ru/development/build_osx.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Как построить ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x} - -Сборка должна работать на Mac OS X 10.15 (Catalina) - -## Установите Homebrew {#install-homebrew} - -``` bash -$ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" -``` - -## Установите необходимые компиляторы, инструменты и библиотеки {#install-required-compilers-tools-and-libraries} - -``` bash -$ brew install cmake ninja libtool gettext -``` - -## Проверка Источников ClickHouse {#checkout-clickhouse-sources} - -``` bash -$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git -``` - -или - -``` bash -$ git clone --recursive https://github.com/ClickHouse/ClickHouse.git - -$ cd ClickHouse -``` - -## Построить ClickHouse {#build-clickhouse} - -``` bash -$ mkdir build -$ cd build -$ cmake .. -DCMAKE_CXX_COMPILER=`which clang++` -DCMAKE_C_COMPILER=`which clang` -$ ninja -$ cd .. -``` - -## Предостережения {#caveats} - -Если вы собираетесь запустить clickhouse-сервер, убедитесь в том, чтобы увеличить параметром maxfiles системная переменная. - -!!! info "Примечание" - Вам нужно будет использовать sudo. - -Для этого создайте следующий файл: - -/Библиотека / LaunchDaemons / limit.параметром maxfiles.файл plist: - -``` xml - - - - - Label - limit.maxfiles - ProgramArguments - - launchctl - limit - maxfiles - 524288 - 524288 - - RunAtLoad - - ServiceIPC - - - -``` - -Выполните следующую команду: - -``` bash -$ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist -``` - -Перезагрузить. - -Чтобы проверить, работает ли он, вы можете использовать `ulimit -n` команда. - -[Оригинальная статья](https://clickhouse.tech/docs/en/development/build_osx/) diff --git a/docs/ru/development/build_osx.md b/docs/ru/development/build_osx.md new file mode 120000 index 00000000000..5c38a2b001a --- /dev/null +++ b/docs/ru/development/build_osx.md @@ -0,0 +1 @@ +en/development/build_osx.md \ No newline at end of file diff --git a/docs/ru/development/index.md b/docs/ru/development/index.md deleted file mode 100644 index 8bf31ed0d3f..00000000000 --- a/docs/ru/development/index.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Разработка ClickHouse {#clickhouse-development} - -[Оригинальная статья](https://clickhouse.tech/docs/en/development/) diff --git a/docs/ru/development/index.md b/docs/ru/development/index.md new file mode 120000 index 00000000000..754385a9f4b --- /dev/null +++ b/docs/ru/development/index.md @@ -0,0 +1 @@ +en/development/index.md \ No newline at end of file diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md deleted file mode 100644 index 1dfcdfdfe6f..00000000000 --- a/docs/ru/development/tests.md +++ /dev/null @@ -1,250 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Тестирование ClickHouse {#clickhouse-testing} - -## Функциональные пробы {#functional-tests} - -Функциональные тесты являются наиболее простыми и удобными в использовании. Большинство функций ClickHouse можно протестировать с помощью функциональных тестов, и они обязательны для использования при каждом изменении кода ClickHouse, которое может быть протестировано таким образом. - -Каждый функциональный тест отправляет один или несколько запросов на запущенный сервер ClickHouse и сравнивает результат со ссылкой. - -Тесты расположены в `queries` каталог. Существует два подкаталога: `stateless` и `stateful`. Тесты без состояния выполняют запросы без каких - либо предварительно загруженных тестовых данных-они часто создают небольшие синтетические наборы данных на лету, в самом тесте. Статусные тесты требуют предварительно загруженных тестовых данных от Яндекса.Метрика и не доступна широкой публике. Мы склонны использовать только `stateless` тесты и избегайте добавления новых `stateful` тесты. - -Каждый тест может быть одного из двух типов: `.sql` и `.sh`. `.sql` тест - это простой SQL-скрипт, который передается по конвейеру в `clickhouse-client --multiquery --testmode`. `.sh` тест - это скрипт, который запускается сам по себе. - -Чтобы выполнить все тесты, используйте `testskhouse-test` инструмент. Смотри `--help` для списка возможных вариантов. Вы можете просто запустить все тесты или запустить подмножество тестов, отфильтрованных по подстроке в имени теста: `./clickhouse-test substring`. - -Самый простой способ вызвать функциональные тесты-это скопировать `clickhouse-client` к `/usr/bin/`, бежать `clickhouse-server` а потом бежать `./clickhouse-test` из собственного каталога. - -Чтобы добавить новый тест, создайте `.sql` или `.sh` файл в `queries/0_stateless` каталог, проверьте его вручную, а затем сгенерируйте `.reference` файл создается следующим образом: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` или `./00000_test.sh > ./00000_test.reference`. - -Тесты должны использовать (создавать, отбрасывать и т. д.) Только таблицы в `test` предполагается, что база данных создается заранее; также тесты могут использовать временные таблицы. - -Если вы хотите использовать распределенные запросы в функциональных тестах, вы можете использовать их в качестве рычагов `remote` функция таблицы с `127.0.0.{1..2}` адреса для запроса самого сервера; или вы можете использовать предопределенные тестовые кластеры в файле конфигурации сервера, например `test_shard_localhost`. - -Некоторые тесты помечены знаком `zookeeper`, `shard` или `long` в своем названии. -`zookeeper` это для тестов, которые используют ZooKeeper. `shard` это для тестов, что -требуется сервер для прослушивания `127.0.0.*`; `distributed` или `global` есть то же самое -значение. `long` это для тестов, которые работают немного дольше, чем одна секунда. Ты можешь -отключите эти группы тестов с помощью `--no-zookeeper`, `--no-shard` и -`--no-long` варианты, соответственно. - -## Известная ошибка {#known-bugs} - -Если мы знаем некоторые ошибки, которые могут быть легко воспроизведены функциональными тестами, мы помещаем подготовленные функциональные тесты в `queries/bugs` каталог. Эти тесты будут перенесены в `teststests_stateless` когда ошибки будут исправлены. - -## Интеграционные Тесты {#integration-tests} - -Интеграционные тесты позволяют тестировать ClickHouse в кластерной конфигурации и взаимодействие ClickHouse с другими серверами, такими как MySQL, Postgres, MongoDB. Они полезны для эмуляции сетевых разбиений, отбрасывания пакетов и т. д. Эти тесты выполняются в Docker и создают несколько контейнеров с различным программным обеспечением. - -Видеть `testsgration/README.md` о том, как проводить эти тесты. - -Обратите внимание, что интеграция ClickHouse со сторонними драйверами не тестируется. Кроме того, в настоящее время у нас нет интеграционных тестов с нашими драйверами JDBC и ODBC. - -## Модульное тестирование {#unit-tests} - -Модульные тесты полезны, если вы хотите протестировать не весь ClickHouse в целом, а одну изолированную библиотеку или класс. Вы можете включить или отключить сборку тестов с помощью `ENABLE_TESTS` Вариант CMake. Модульные тесты (и другие тестовые программы) расположены в `tests` подкаталоги по всему коду. Чтобы запустить модульные тесты, введите `ninja test`. Некоторые тесты используют `gtest`, но некоторые из них-это просто программы, которые возвращают ненулевой код выхода при сбое теста. - -Не обязательно иметь модульные тесты, Если код уже охвачен функциональными тестами (а функциональные тесты обычно гораздо более просты в использовании). - -## Эксплуатационное испытание {#performance-tests} - -Тесты производительности позволяют измерять и сравнивать производительность некоторой изолированной части ClickHouse по синтетическим запросам. Тесты расположены по адресу `tests/performance`. Каждый тест представлен следующим образом `.xml` файл с описанием тестового случая. Тесты выполняются с помощью `clickhouse performance-test` инструмент (который встроен в `clickhouse` двоичный). Видеть `--help` для призыва. - -Каждый тест запускает один или несколько запросов (возможно, с комбинациями параметров) в цикле с некоторыми условиями остановки (например «maximum execution speed is not changing in three seconds») и измерьте некоторые показатели производительности запросов (например, «maximum execution speed»). Некоторые тесты могут содержать предварительные условия для предварительно загруженного тестового набора данных. - -Если вы хотите улучшить производительность ClickHouse в каком-то сценарии, и если улучшения могут наблюдаться в простых запросах, настоятельно рекомендуется написать тест производительности. Это всегда имеет смысл использовать `perf top` или другие инструменты perf во время ваших тестов. - -## Инструменты И Сценарии Тестирования {#test-tools-and-scripts} - -Некоторые программы в `tests` каталог-это не подготовленные тесты, а инструменты тестирования. Например, для `Lexer` есть такой инструмент `dbms/Parsers/tests/lexer` это просто делает токенизацию stdin и записывает раскрашенный результат в stdout. Вы можете использовать эти инструменты в качестве примеров кода, а также для исследования и ручного тестирования. - -Вы также можете разместить пару файлов `.sh` и `.reference` вместе с инструментом нужно запустить его на каком - то заранее заданном входе- тогда результат скрипта можно сравнить с `.reference` файл. Такого рода тесты не автоматизированы. - -## Различные Тесты {#miscellanous-tests} - -Существуют тесты для внешних словарей, расположенных по адресу `tests/external_dictionaries` и для машинно-обученных моделей в `tests/external_models`. Эти тесты не обновляются и должны быть перенесены в интеграционные тесты. - -Существует отдельный тест для вставки кворума. Этот тест запускает кластер ClickHouse на отдельных серверах и эмулирует различные случаи сбоя: разделение сети, отбрасывание пакетов (между узлами ClickHouse, между ClickHouse и ZooKeeper, между сервером ClickHouse и клиентом и т. д.), `kill -9`, `kill -STOP` и `kill -CONT` , любить [Джепсен](https://aphyr.com/tags/Jepsen). Затем тест проверяет, что все признанные вставки были записаны, а все отклоненные вставки-нет. - -Тест кворума был написан отдельной командой еще до того, как ClickHouse стал открытым исходным кодом. Эта команда больше не работает с ClickHouse. Тест был случайно написан на Java. По этим причинам тест кворума должен быть переписан и перенесен в интеграционные тесты. - -## Ручное тестирование {#manual-testing} - -Когда вы разрабатываете новую функцию, разумно также протестировать ее вручную. Вы можете сделать это с помощью следующих шагов: - -Постройте ClickHouse. Запустите ClickHouse из терминала: измените каталог на `programs/clickhouse-server` и запустить его с помощью `./clickhouse-server`. Он будет использовать конфигурацию (`config.xml`, `users.xml` и файлы внутри `config.d` и `users.d` каталоги) из текущего каталога по умолчанию. Чтобы подключиться к серверу ClickHouse, выполните команду `programs/clickhouse-client/clickhouse-client`. - -Обратите внимание, что все инструменты clickhouse (сервер, клиент и т. д.) являются просто символическими ссылками на один двоичный файл с именем `clickhouse`. Вы можете найти этот двоичный файл по адресу `programs/clickhouse`. Все инструменты также могут быть вызваны как `clickhouse tool` вместо `clickhouse-tool`. - -В качестве альтернативы вы можете установить пакет ClickHouse: либо стабильный релиз из репозитория Яндекса, либо вы можете построить пакет для себя с помощью `./release` в корне источников ClickHouse. Затем запустите сервер с помощью `sudo service clickhouse-server start` (или остановить, чтобы остановить сервер). Ищите журналы по адресу `/etc/clickhouse-server/clickhouse-server.log`. - -Когда ClickHouse уже установлен в вашей системе, вы можете построить новый `clickhouse` двоичный код и заменить существующий двоичный код: - -``` bash -$ sudo service clickhouse-server stop -$ sudo cp ./clickhouse /usr/bin/ -$ sudo service clickhouse-server start -``` - -Также вы можете остановить системный clickhouse-сервер и запустить свой собственный с той же конфигурацией, но с регистрацией в терминал: - -``` bash -$ sudo service clickhouse-server stop -$ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml -``` - -Пример с gdb: - -``` bash -$ sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml -``` - -Если системный clickhouse-сервер уже запущен, и вы не хотите его останавливать, вы можете изменить номера портов в своей системе. `config.xml` (или переопределить их в файле внутри `config.d` каталог), укажите соответствующий путь к данным и запустите его. - -`clickhouse` binary почти не имеет зависимостей и работает в широком диапазоне дистрибутивов Linux. Чтобы быстро и грязно протестировать свои изменения на сервере, вы можете просто `scp` ваша свежая постройка `clickhouse` двоичный файл на ваш сервер, а затем запустите его, как в приведенных выше примерах. - -## Тестовая среда {#testing-environment} - -Перед публикацией релиза как стабильного мы развертываем его в тестовой среде. Среда тестирования-это кластер, который обрабатывает 1/39 часть [Яндекс.Метрика](https://metrica.yandex.com/) данные. Мы делимся нашей тестовой средой с Яндексом.Команда метрики. ClickHouse обновляется без простоев поверх существующих данных. Мы смотрим сначала на то, что данные обрабатываются успешно, не отставая от реального времени, репликация продолжает работать и нет никаких проблем, видимых Яндексу.Команда метрики. Первую проверку можно провести следующим образом: - -``` sql -SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h; -``` - -В некоторых случаях мы также развернуть на тестирование среды нашего друга команды Яндекса: Маркет, облако и т. д. Кроме того, у нас есть некоторые аппаратные серверы, которые используются для целей разработки. - -## Нагрузочное тестирование {#load-testing} - -После развертывания в среде тестирования мы запускаем нагрузочное тестирование с запросами из производственного кластера. Это делается вручную. - -Убедитесь, что вы включили `query_log` на вашем производственном кластере. - -Сбор журнала запросов в течение одного или нескольких дней: - -``` bash -$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv -``` - -Это очень сложный пример. `type = 2` будет фильтровать запросы, которые выполняются успешно. `query LIKE '%ym:%'` это выбор релевантных запросов от Яндекса.Метрика. `is_initial_query` это выбор только тех запросов, которые инициируются клиентом, а не самим ClickHouse (как части распределенной обработки запросов). - -`scp` это войдите в свой тестовый кластер и запустите его следующим образом: - -``` bash -$ clickhouse benchmark --concurrency 16 < queries.tsv -``` - -(вероятно, вы также хотите указать a `--user`) - -Затем оставьте его на ночь или выходные и идите отдыхать. - -Вы должны это проверить `clickhouse-server` не дает сбоя, объем памяти ограничен, а производительность не ухудшается с течением времени. - -Точные тайминги выполнения запросов не регистрируются и не сравниваются из-за высокой вариативности запросов и окружающей среды. - -## Построение Тестов {#build-tests} - -Тесты сборки позволяют проверить, что сборка не нарушается на различных альтернативных конфигурациях и на некоторых зарубежных системах. Тесты расположены по адресу `ci` каталог. Они запускают сборку из исходного кода внутри Docker, Vagrant, а иногда и с помощью `qemu-user-static` внутри Докер. Эти тесты находятся в стадии разработки, и тестовые запуски не автоматизированы. - -Мотивация: - -Обычно мы выпускаем и запускаем все тесты на одном варианте сборки ClickHouse. Но есть и альтернативные варианты сборки, которые не проходят тщательной проверки. Примеры: - -- сборка на FreeBSD; -- сборка на Debian с библиотеками из системных пакетов; -- сборка с общим связыванием библиотек; -- построить на платформе AArch64 ; -- постройте на платформе PowerPc. - -Например, сборка с системными пакетами-это плохая практика, потому что мы не можем гарантировать, какая именно версия пакетов будет у системы. Но это действительно необходимо сопровождающим Debian. По этой причине мы, по крайней мере, должны поддерживать этот вариант сборки. Другой пример: Общие ссылки-это общий источник проблем, но он необходим для некоторых энтузиастов. - -Хотя мы не можем выполнить все тесты на всех вариантах сборки, мы хотим проверить, по крайней мере, что различные варианты сборки не нарушены. Для этого мы используем тесты сборки. - -## Тестирование Совместимости Протоколов {#testing-for-protocol-compatibility} - -Когда мы расширяем сетевой протокол ClickHouse, мы вручную проверяем, что старый clickhouse-клиент работает с новым clickhouse-сервером, а новый clickhouse-клиент работает со старым clickhouse-сервером (просто запустив двоичные файлы из соответствующих пакетов). - -## Помощь От Компилятора {#help-from-the-compiler} - -Основной код ClickHouse (который находится в `dbms` каталог) строится с помощью `-Wall -Wextra -Werror` и с некоторыми дополнительными включенными предупреждениями. Хотя эти параметры не включены для сторонних библиотек. - -У Clang есть еще более полезные предупреждения - вы можете искать их с помощью `-Weverything` и выберите что-то для сборки по умолчанию. - -Для производственных сборок используется gcc (он все еще генерирует немного более эффективный код, чем clang). Для развития, лязгают, как правило, более удобны в использовании. Вы можете построить на своей собственной машине с режимом отладки (чтобы сэкономить батарею вашего ноутбука), но обратите внимание, что компилятор способен генерировать больше предупреждений с помощью `-O3` благодаря лучшему потоку управления и межпроцедурному анализу. При строительстве с лязгом, `libc++` используется вместо `libstdc++` и при построении с режимом отладки, отладочная версия `libc++` используется, что позволяет ловить больше ошибок во время выполнения. - -## Дезинфицирующее средство {#sanitizers} - -**Адрес дезинфицирующее средство**. -Мы проводим функциональные и интеграционные тесты в асане на фиксации основы. - -**С Valgrind (Помощи Valgrind)**. -Мы проводим функциональные тесты под Valgrind ночь. Это займет несколько часов. В настоящее время существует один известный ложноположительный результат в `re2` библиотека, см. [эта статья](https://research.swtch.com/sparse). - -**Неопределенное поведение дезинфицирующего средства.** -Мы проводим функциональные и интеграционные тесты в асане на фиксации основы. - -**Дезинфицирующее средство для нитей**. -Мы проводим функциональные тесты в рамках TSan на основе per-commit. Мы все еще не запускаем интеграционные тесты под TSan на основе per-commit. - -**Дезинфицирующее средство для памяти**. -В настоящее время мы все еще не используем MSan. - -**Отладочный распределитель.** -Отладочная версия `jemalloc` используется для отладки сборки. - -## Затуманивающего {#fuzzing} - -Мы используем простой тест fuzz для генерации случайных SQL-запросов и проверки того, что сервер не умирает. Тестирование пуха проводится с помощью адресного дезинфицирующего средства. Вы можете найти его в `00746_sql_fuzzy.pl`. Этот тест следует проводить непрерывно (в течение ночи и дольше). - -По состоянию на декабрь 2018 года мы все еще не используем изолированное тестирование fuzz библиотечного кода. - -## Аудит безопасности {#security-audit} - -Люди из облачного отдела Яндекса делают некоторый базовый обзор возможностей ClickHouse с точки зрения безопасности. - -## Статический анализатор {#static-analyzers} - -Мы бежим `PVS-Studio` на основе каждой фиксации. Мы провели оценку `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`. Вы найдете инструкции по использованию в `tests/instructions/` каталог. Кроме того, вы можете читать [статья на русском языке](https://habr.com/company/yandex/blog/342018/). - -Если вы используете `CLion` как IDE, вы можете использовать некоторые из них `clang-tidy` выписывает чеки из коробки. - -## Затвердение {#hardening} - -`FORTIFY_SOURCE` используется по умолчанию. Это почти бесполезно, но все же имеет смысл в редких случаях, и мы не отключаем его. - -## Стиль Кода {#code-style} - -Описаны правила стиля кода [здесь](https://clickhouse.tech/docs/en/development/style/). - -Чтобы проверить наличие некоторых распространенных нарушений стиля, вы можете использовать `utils/check-style` скрипт. - -Чтобы принудительно создать правильный стиль вашего кода, Вы можете использовать `clang-format`. Файл `.clang-format` находится в корне источника. Это в основном соответствует нашему фактическому стилю кода. Но применять его не рекомендуется `clang-format` к существующим файлам, потому что это ухудшает форматирование. Вы можете использовать `clang-format-diff` инструмент, который вы можете найти в репозитории Clang source. - -В качестве альтернативы вы можете попробовать `uncrustify` инструмент для переформатирования вашего кода. Конфигурации в `uncrustify.cfg` в корне источников. Это меньше, чем `clang-format`. - -`CLion` имеет свой собственный формататор кода, который должен быть настроен для нашего стиля кода. - -## В2В метрика тесты {#metrica-b2b-tests} - -Каждый релиз ClickHouse тестируется с помощью движков Yandex Metrica и AppMetrica. Тестовые и стабильные версии ClickHouse развертываются на виртуальных машинах и запускаются с небольшой копией движка Metrica engine, который обрабатывает фиксированную выборку входных данных. Затем результаты двух экземпляров двигателя Metrica сравниваются вместе. - -Эти тесты автоматизированы отдельной командой. Из-за большого количества движущихся частей тесты чаще всего проваливаются по совершенно несвязанным причинам, которые очень трудно выяснить. Скорее всего, эти тесты имеют для нас отрицательное значение. Тем не менее эти тесты оказались полезными примерно в одном или двух случаях из сотен. - -## Тестовое покрытие {#test-coverage} - -По состоянию на июль 2018 года мы не отслеживаем покрытие тестов. - -## Автоматизация тестирования {#test-automation} - -Мы проводим тесты с помощью внутренней CI Яндекса и системы автоматизации заданий под названием «Sandbox». - -Задания сборки и тесты выполняются в песочнице на основе каждой фиксации. Полученные пакеты и результаты тестирования публикуются на GitHub и могут быть загружены по прямым ссылкам. Артефакты хранятся вечно. Когда вы отправляете запрос на вытягивание на GitHub, мы помечаем его как «can be tested» и наша система CI построит пакеты ClickHouse (release, debug, with address sanitizer и т. д.) Для вас. - -Мы не используем Travis CI из-за ограничения по времени и вычислительной мощности. -Мы не используем Дженкинса. Он был использован раньше, и теперь мы счастливы, что не используем Дженкинса. - -[Оригинальная статья](https://clickhouse.tech/docs/en/development/tests/) -разработка / испытания/) diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md new file mode 120000 index 00000000000..ce23c881f32 --- /dev/null +++ b/docs/ru/development/tests.md @@ -0,0 +1 @@ +en/development/tests.md \ No newline at end of file diff --git a/docs/ru/engines/table_engines/special/generate.md b/docs/ru/engines/table_engines/special/generate.md deleted file mode 100644 index 87004bfe5b1..00000000000 --- a/docs/ru/engines/table_engines/special/generate.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# GenerateRandom {#table_engines-generate} - -Механизм генерации случайных таблиц генерирует случайные данные для данной схемы таблиц. - -Примеры употребления: - -- Используйте в тесте для заполнения воспроизводимого большого стола. -- Генерируйте случайные входные данные для тестов размытия. - -## Использование в сервере ClickHouse {#usage-in-clickhouse-server} - -``` sql -ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) -``` - -То `max_array_length` и `max_string_length` параметры укажите максимальную длину всех -столбцы массива и строки соответственно в генерируемых данных. - -Генерация таблицы движок поддерживает только `SELECT` запросы. - -Он поддерживает все [Тип данных](../../../engines/table_engines/special/generate.md) это может быть сохранено в таблице за исключением `LowCardinality` и `AggregateFunction`. - -**Пример:** - -**1.** Настройка системы `generate_engine_table` стол: - -``` sql -CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) -``` - -**2.** Запрос данных: - -``` sql -SELECT * FROM generate_engine_table LIMIT 3 -``` - -``` text -┌─name─┬──────value─┐ -│ c4xJ │ 1412771199 │ -│ r │ 1791099446 │ -│ 7#$ │ 124312908 │ -└──────┴────────────┘ -``` - -## Детали внедрения {#details-of-implementation} - -- Не поддерживаемый: - - `ALTER` - - `SELECT ... SAMPLE` - - `INSERT` - - Индексы - - Копирование - -[Оригинальная статья](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/ru/engines/table_engines/special/generate.md b/docs/ru/engines/table_engines/special/generate.md new file mode 120000 index 00000000000..631f9bbba66 --- /dev/null +++ b/docs/ru/engines/table_engines/special/generate.md @@ -0,0 +1 @@ +en/engines/table_engines/special/generate.md \ No newline at end of file diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md deleted file mode 100644 index 69cdeac8387..00000000000 --- a/docs/ru/getting_started/tutorial.md +++ /dev/null @@ -1,669 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Учебник По Клик-Хаусу {#clickhouse-tutorial} - -## Чего ожидать от этого урока? {#what-to-expect-from-this-tutorial} - -Пройдя через этот учебник,вы узнаете, как настроить простой кластер ClickHouse. Он будет небольшим, но отказоустойчивым и масштабируемым. Затем мы будем использовать один из примеров наборов данных, чтобы заполнить его данными и выполнить некоторые демонстрационные запросы. - -## Настройка Одного Узла {#single-node-setup} - -Чтобы избежать сложностей распределенной среды, мы начнем с развертывания ClickHouse на одном сервере или виртуальной машине. ClickHouse обычно устанавливается из [дебютантка](install.md#install-from-deb-packages) или [оборотов в минуту](install.md#from-rpm-packages) пакеты, но есть и такие [альтернативы](install.md#from-docker-image) для операционных систем, которые их не поддерживают. - -Например, вы выбрали `deb` пакеты и выполненные работы: - -``` bash -sudo apt-get install dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 - -echo "deb http://repo.clickhouse.tech/deb/stable/ main/" | sudo tee /etc/apt/sources.list.d/clickhouse.list -sudo apt-get update - -sudo apt-get install -y clickhouse-server clickhouse-client -``` - -Что у нас есть в пакетах, которые были установлены: - -- `clickhouse-client` пакет содержит [clickhouse-клиент](../interfaces/cli.md) приложение, интерактивный консольный клиент ClickHouse. -- `clickhouse-common` пакет содержит исполняемый файл ClickHouse. -- `clickhouse-server` пакет содержит файлы конфигурации для запуска ClickHouse в качестве сервера. - -Файлы конфигурации сервера находятся в `/etc/clickhouse-server/`. Прежде чем идти дальше, пожалуйста, обратите внимание на `` элемент в `config.xml`. Путь определяет место для хранения данных, поэтому он должен быть расположен на Томе с большой емкостью диска; значение по умолчанию равно `/var/lib/clickhouse/`. Если вы хотите настроить конфигурацию, то это не удобно для непосредственного редактирования `config.xml` файл, учитывая, что он может быть переписан при будущих обновлениях пакета. Рекомендуемый способ переопределения элементов конфигурации заключается в создании [файлы в конфигурации.D каталог](../operations/configuration_files.md) которые служат в качестве «patches» к конфигурации.XML. - -Как вы могли заметить, `clickhouse-server` не запускается автоматически после установки пакета. Он также не будет автоматически перезапущен после обновления. То, как вы запускаете сервер, зависит от вашей системы init, как правило, это так: - -``` bash -sudo service clickhouse-server start -``` - -или - -``` bash -sudo /etc/init.d/clickhouse-server start -``` - -По умолчанию для журналов сервера используется следующее расположение `/var/log/clickhouse-server/`. Сервер готов к обработке клиентских подключений, как только он регистрирует `Ready for connections` сообщение. - -Как только это произойдет `clickhouse-server` все готово и работает, мы можем использовать `clickhouse-client` чтобы подключиться к серверу и выполнить некоторые тестовые запросы, такие как `SELECT "Hello, world!";`. - -
- -Быстрые советы для clickhouse-клиента -Интерактивный режим: - -``` bash -clickhouse-client -clickhouse-client --host=... --port=... --user=... --password=... -``` - -Включить многострочные запросы: - -``` bash -clickhouse-client -m -clickhouse-client --multiline -``` - -Запуск запросов в пакетном режиме: - -``` bash -clickhouse-client --query='SELECT 1' -echo 'SELECT 1' | clickhouse-client -clickhouse-client <<< 'SELECT 1' -``` - -Вставка данных из файла в заданном формате: - -``` bash -clickhouse-client --query='INSERT INTO table VALUES' < data.txt -clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv -``` - -
- -## Импорт Образца Набора Данных {#import-sample-dataset} - -Теперь пришло время заполнить наш сервер ClickHouse некоторыми образцами данных. В этом уроке мы будем использовать анонимизированные данные Яндекса.Metrica, первый сервис, который запускает ClickHouse в производственном режиме до того, как он стал открытым исходным кодом (подробнее об этом в [раздел истории](../introduction/history.md)). Есть [несколько способов импорта Яндекса.Набор метрика](example_datasets/metrica.md), и ради учебника мы пойдем с самым реалистичным из них. - -### Загрузка и извлечение данных таблицы {#download-and-extract-table-data} - -``` bash -curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv -curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv -``` - -Извлеченные файлы имеют размер около 10 ГБ. - -### Создавать таблицы {#create-tables} - -Как и в большинстве систем управления базами данных, ClickHouse логически группирует таблицы в «databases». Там есть еще один `default` база данных, но мы создадим новую с именем `tutorial`: - -``` bash -clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" -``` - -Синтаксис для создания таблиц намного сложнее по сравнению с базами данных (см. [ссылка](../sql_reference/statements/create.md). В общем `CREATE TABLE` в заявлении должны быть указаны три ключевых момента: - -1. Имя таблицы для создания. -2. Table schema, i.e. list of columns and their [тип данных](../sql_reference/data_types/index.md). -3. [Настольный двигатель](../engines/table_engines/index.md) и это настройки, которые определяют все детали того, как запросы к этой таблице будут физически выполняться. - -Яндекс.Metrica - это сервис веб-аналитики, и пример набора данных не охватывает его полную функциональность, поэтому для создания необходимо создать только две таблицы: - -- `hits` это таблица с каждым действием, выполняемым всеми пользователями на всех веб-сайтах, охватываемых сервисом. -- `visits` это таблица, которая содержит предварительно построенные сеансы вместо отдельных действий. - -Давайте посмотрим и выполним реальные запросы create table для этих таблиц: - -``` sql -CREATE TABLE tutorial.hits_v1 -( - `WatchID` UInt64, - `JavaEnable` UInt8, - `Title` String, - `GoodEvent` Int16, - `EventTime` DateTime, - `EventDate` Date, - `CounterID` UInt32, - `ClientIP` UInt32, - `ClientIP6` FixedString(16), - `RegionID` UInt32, - `UserID` UInt64, - `CounterClass` Int8, - `OS` UInt8, - `UserAgent` UInt8, - `URL` String, - `Referer` String, - `URLDomain` String, - `RefererDomain` String, - `Refresh` UInt8, - `IsRobot` UInt8, - `RefererCategories` Array(UInt16), - `URLCategories` Array(UInt16), - `URLRegions` Array(UInt32), - `RefererRegions` Array(UInt32), - `ResolutionWidth` UInt16, - `ResolutionHeight` UInt16, - `ResolutionDepth` UInt8, - `FlashMajor` UInt8, - `FlashMinor` UInt8, - `FlashMinor2` String, - `NetMajor` UInt8, - `NetMinor` UInt8, - `UserAgentMajor` UInt16, - `UserAgentMinor` FixedString(2), - `CookieEnable` UInt8, - `JavascriptEnable` UInt8, - `IsMobile` UInt8, - `MobilePhone` UInt8, - `MobilePhoneModel` String, - `Params` String, - `IPNetworkID` UInt32, - `TraficSourceID` Int8, - `SearchEngineID` UInt16, - `SearchPhrase` String, - `AdvEngineID` UInt8, - `IsArtifical` UInt8, - `WindowClientWidth` UInt16, - `WindowClientHeight` UInt16, - `ClientTimeZone` Int16, - `ClientEventTime` DateTime, - `SilverlightVersion1` UInt8, - `SilverlightVersion2` UInt8, - `SilverlightVersion3` UInt32, - `SilverlightVersion4` UInt16, - `PageCharset` String, - `CodeVersion` UInt32, - `IsLink` UInt8, - `IsDownload` UInt8, - `IsNotBounce` UInt8, - `FUniqID` UInt64, - `HID` UInt32, - `IsOldCounter` UInt8, - `IsEvent` UInt8, - `IsParameter` UInt8, - `DontCountHits` UInt8, - `WithHash` UInt8, - `HitColor` FixedString(1), - `UTCEventTime` DateTime, - `Age` UInt8, - `Sex` UInt8, - `Income` UInt8, - `Interests` UInt16, - `Robotness` UInt8, - `GeneralInterests` Array(UInt16), - `RemoteIP` UInt32, - `RemoteIP6` FixedString(16), - `WindowName` Int32, - `OpenerName` Int32, - `HistoryLength` Int16, - `BrowserLanguage` FixedString(2), - `BrowserCountry` FixedString(2), - `SocialNetwork` String, - `SocialAction` String, - `HTTPError` UInt16, - `SendTiming` Int32, - `DNSTiming` Int32, - `ConnectTiming` Int32, - `ResponseStartTiming` Int32, - `ResponseEndTiming` Int32, - `FetchTiming` Int32, - `RedirectTiming` Int32, - `DOMInteractiveTiming` Int32, - `DOMContentLoadedTiming` Int32, - `DOMCompleteTiming` Int32, - `LoadEventStartTiming` Int32, - `LoadEventEndTiming` Int32, - `NSToDOMContentLoadedTiming` Int32, - `FirstPaintTiming` Int32, - `RedirectCount` Int8, - `SocialSourceNetworkID` UInt8, - `SocialSourcePage` String, - `ParamPrice` Int64, - `ParamOrderID` String, - `ParamCurrency` FixedString(3), - `ParamCurrencyID` UInt16, - `GoalsReached` Array(UInt32), - `OpenstatServiceName` String, - `OpenstatCampaignID` String, - `OpenstatAdID` String, - `OpenstatSourceID` String, - `UTMSource` String, - `UTMMedium` String, - `UTMCampaign` String, - `UTMContent` String, - `UTMTerm` String, - `FromTag` String, - `HasGCLID` UInt8, - `RefererHash` UInt64, - `URLHash` UInt64, - `CLID` UInt32, - `YCLID` UInt64, - `ShareService` String, - `ShareURL` String, - `ShareTitle` String, - `ParsedParams` Nested( - Key1 String, - Key2 String, - Key3 String, - Key4 String, - Key5 String, - ValueDouble Float64), - `IslandID` FixedString(16), - `RequestNum` UInt32, - `RequestTry` UInt8 -) -ENGINE = MergeTree() -PARTITION BY toYYYYMM(EventDate) -ORDER BY (CounterID, EventDate, intHash32(UserID)) -SAMPLE BY intHash32(UserID) -SETTINGS index_granularity = 8192 -``` - -``` sql -CREATE TABLE tutorial.visits_v1 -( - `CounterID` UInt32, - `StartDate` Date, - `Sign` Int8, - `IsNew` UInt8, - `VisitID` UInt64, - `UserID` UInt64, - `StartTime` DateTime, - `Duration` UInt32, - `UTCStartTime` DateTime, - `PageViews` Int32, - `Hits` Int32, - `IsBounce` UInt8, - `Referer` String, - `StartURL` String, - `RefererDomain` String, - `StartURLDomain` String, - `EndURL` String, - `LinkURL` String, - `IsDownload` UInt8, - `TraficSourceID` Int8, - `SearchEngineID` UInt16, - `SearchPhrase` String, - `AdvEngineID` UInt8, - `PlaceID` Int32, - `RefererCategories` Array(UInt16), - `URLCategories` Array(UInt16), - `URLRegions` Array(UInt32), - `RefererRegions` Array(UInt32), - `IsYandex` UInt8, - `GoalReachesDepth` Int32, - `GoalReachesURL` Int32, - `GoalReachesAny` Int32, - `SocialSourceNetworkID` UInt8, - `SocialSourcePage` String, - `MobilePhoneModel` String, - `ClientEventTime` DateTime, - `RegionID` UInt32, - `ClientIP` UInt32, - `ClientIP6` FixedString(16), - `RemoteIP` UInt32, - `RemoteIP6` FixedString(16), - `IPNetworkID` UInt32, - `SilverlightVersion3` UInt32, - `CodeVersion` UInt32, - `ResolutionWidth` UInt16, - `ResolutionHeight` UInt16, - `UserAgentMajor` UInt16, - `UserAgentMinor` UInt16, - `WindowClientWidth` UInt16, - `WindowClientHeight` UInt16, - `SilverlightVersion2` UInt8, - `SilverlightVersion4` UInt16, - `FlashVersion3` UInt16, - `FlashVersion4` UInt16, - `ClientTimeZone` Int16, - `OS` UInt8, - `UserAgent` UInt8, - `ResolutionDepth` UInt8, - `FlashMajor` UInt8, - `FlashMinor` UInt8, - `NetMajor` UInt8, - `NetMinor` UInt8, - `MobilePhone` UInt8, - `SilverlightVersion1` UInt8, - `Age` UInt8, - `Sex` UInt8, - `Income` UInt8, - `JavaEnable` UInt8, - `CookieEnable` UInt8, - `JavascriptEnable` UInt8, - `IsMobile` UInt8, - `BrowserLanguage` UInt16, - `BrowserCountry` UInt16, - `Interests` UInt16, - `Robotness` UInt8, - `GeneralInterests` Array(UInt16), - `Params` Array(String), - `Goals` Nested( - ID UInt32, - Serial UInt32, - EventTime DateTime, - Price Int64, - OrderID String, - CurrencyID UInt32), - `WatchIDs` Array(UInt64), - `ParamSumPrice` Int64, - `ParamCurrency` FixedString(3), - `ParamCurrencyID` UInt16, - `ClickLogID` UInt64, - `ClickEventID` Int32, - `ClickGoodEvent` Int32, - `ClickEventTime` DateTime, - `ClickPriorityID` Int32, - `ClickPhraseID` Int32, - `ClickPageID` Int32, - `ClickPlaceID` Int32, - `ClickTypeID` Int32, - `ClickResourceID` Int32, - `ClickCost` UInt32, - `ClickClientIP` UInt32, - `ClickDomainID` UInt32, - `ClickURL` String, - `ClickAttempt` UInt8, - `ClickOrderID` UInt32, - `ClickBannerID` UInt32, - `ClickMarketCategoryID` UInt32, - `ClickMarketPP` UInt32, - `ClickMarketCategoryName` String, - `ClickMarketPPName` String, - `ClickAWAPSCampaignName` String, - `ClickPageName` String, - `ClickTargetType` UInt16, - `ClickTargetPhraseID` UInt64, - `ClickContextType` UInt8, - `ClickSelectType` Int8, - `ClickOptions` String, - `ClickGroupBannerID` Int32, - `OpenstatServiceName` String, - `OpenstatCampaignID` String, - `OpenstatAdID` String, - `OpenstatSourceID` String, - `UTMSource` String, - `UTMMedium` String, - `UTMCampaign` String, - `UTMContent` String, - `UTMTerm` String, - `FromTag` String, - `HasGCLID` UInt8, - `FirstVisit` DateTime, - `PredLastVisit` Date, - `LastVisit` Date, - `TotalVisits` UInt32, - `TraficSource` Nested( - ID Int8, - SearchEngineID UInt16, - AdvEngineID UInt8, - PlaceID UInt16, - SocialSourceNetworkID UInt8, - Domain String, - SearchPhrase String, - SocialSourcePage String), - `Attendance` FixedString(16), - `CLID` UInt32, - `YCLID` UInt64, - `NormalizedRefererHash` UInt64, - `SearchPhraseHash` UInt64, - `RefererDomainHash` UInt64, - `NormalizedStartURLHash` UInt64, - `StartURLDomainHash` UInt64, - `NormalizedEndURLHash` UInt64, - `TopLevelDomain` UInt64, - `URLScheme` UInt64, - `OpenstatServiceNameHash` UInt64, - `OpenstatCampaignIDHash` UInt64, - `OpenstatAdIDHash` UInt64, - `OpenstatSourceIDHash` UInt64, - `UTMSourceHash` UInt64, - `UTMMediumHash` UInt64, - `UTMCampaignHash` UInt64, - `UTMContentHash` UInt64, - `UTMTermHash` UInt64, - `FromHash` UInt64, - `WebVisorEnabled` UInt8, - `WebVisorActivity` UInt32, - `ParsedParams` Nested( - Key1 String, - Key2 String, - Key3 String, - Key4 String, - Key5 String, - ValueDouble Float64), - `Market` Nested( - Type UInt8, - GoalID UInt32, - OrderID String, - OrderPrice Int64, - PP UInt32, - DirectPlaceID UInt32, - DirectOrderID UInt32, - DirectBannerID UInt32, - GoodID String, - GoodName String, - GoodQuantity Int32, - GoodPrice Int64), - `IslandID` FixedString(16) -) -ENGINE = CollapsingMergeTree(Sign) -PARTITION BY toYYYYMM(StartDate) -ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) -SAMPLE BY intHash32(UserID) -SETTINGS index_granularity = 8192 -``` - -Вы можете выполнить эти запросы с помощью интерактивного режима `clickhouse-client` (просто запустите его в терминале, не указывая заранее запрос) или попробуйте некоторые [альтернативный интерфейс](../interfaces/index.md) если ты хочешь. - -Как мы видим, `hits_v1` использует [базовый движок MergeTree](../engines/table_engines/mergetree_family/mergetree.md), в то время как `visits_v1` использует [Разрушение](../engines/table_engines/mergetree_family/collapsingmergetree.md) вариант. - -### Импортировать данные {#import-data} - -Импорт данных в ClickHouse осуществляется через [INSERT INTO](../sql_reference/statements/insert_into.md) запрос, как и во многих других базах данных SQL. Однако данные обычно приводятся в одном из следующих документов: [поддерживаемые форматы сериализации](../interfaces/formats.md) вместо `VALUES` предложение (которое также поддерживается). - -Файлы, которые мы загрузили ранее, находятся в формате с разделенными вкладками, поэтому вот как импортировать их через консольный клиент: - -``` bash -clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv -clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv -``` - -У ClickHouse их очень много [настройки для настройки](../operations/settings/index.md) и один из способов указать их в консольном клиенте - это через аргументы, как мы видим с помощью `--max_insert_block_size`. Самый простой способ выяснить, какие настройки доступны, что они означают и каковы значения по умолчанию, - это запросить `system.settings` стол: - -``` sql -SELECT name, value, changed, description -FROM system.settings -WHERE name LIKE '%max_insert_b%' -FORMAT TSV - -max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." -``` - -По желанию вы можете [OPTIMIZE](../sql_reference/misc/#misc_operations-optimize) таблицы после импорта. Таблицы, настроенные с помощью движка из семейства MergeTree, всегда выполняют слияние частей данных в фоновом режиме для оптимизации хранения данных (или, по крайней мере, проверяют, имеет ли это смысл). Эти запросы заставляют механизм таблиц выполнять оптимизацию хранилища прямо сейчас, а не некоторое время спустя: - -``` bash -clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" -clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" -``` - -Эти запросы запускают интенсивную работу ввода-вывода и процессора, поэтому, если таблица постоянно получает новые данные, лучше оставить ее в покое и позволить слияниям работать в фоновом режиме. - -Теперь мы можем проверить, был ли импорт таблицы успешным: - -``` bash -clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" -clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" -``` - -## Пример запроса {#example-queries} - -``` sql -SELECT - StartURL AS URL, - AVG(Duration) AS AvgDuration -FROM tutorial.visits_v1 -WHERE StartDate BETWEEN '2014-03-23' AND '2014-03-30' -GROUP BY URL -ORDER BY AvgDuration DESC -LIMIT 10 -``` - -``` sql -SELECT - sum(Sign) AS visits, - sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, - (100. * goal_visits) / visits AS goal_percent -FROM tutorial.visits_v1 -WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') -``` - -## Развертывание Кластера {#cluster-deployment} - -Кластер ClickHouse-это однородный кластер. Шаги для настройки: - -1. Установите сервер ClickHouse на всех компьютерах кластера -2. Настройка конфигураций кластера в файлах конфигурации -3. Создание локальных таблиц на каждом экземпляре -4. Создать [Распространены таблицы](../engines/table_engines/special/distributed.md) - -[Распространены таблицы](../engines/table_engines/special/distributed.md) это на самом деле своего рода «view» к локальным таблицам кластера ClickHouse. Запрос SELECT из распределенной таблицы выполняется с использованием ресурсов всех сегментов кластера. Вы можете указать конфигурации для нескольких кластеров и создать несколько распределенных таблиц, предоставляющих представления для разных кластеров. - -Пример конфигурации для кластера с тремя сегментами, по одной реплике в каждом: - -``` xml - - - - - example-perftest01j.yandex.ru - 9000 - - - - - example-perftest02j.yandex.ru - 9000 - - - - - example-perftest03j.yandex.ru - 9000 - - - - -``` - -Для дальнейшей демонстрации давайте создадим новую локальную таблицу с тем же именем `CREATE TABLE` запрос, который мы использовали для `hits_v1`, но другое имя таблицы: - -``` sql -CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... -``` - -Создание распределенной таблицы, предоставляющей представление в локальные таблицы кластера: - -``` sql -CREATE TABLE tutorial.hits_all AS tutorial.hits_local -ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); -``` - -Распространенной практикой является создание одинаковых распределенных таблиц на всех машинах кластера. Он позволяет выполнять распределенные запросы на любой машине кластера. Кроме того, существует альтернативный вариант создания временной распределенной таблицы для данного запроса SELECT с помощью [удаленный](../sql_reference/table_functions/remote.md) табличная функция. - -Давай убежим [INSERT SELECT](../sql_reference/statements/insert_into.md) в распределенную таблицу, чтобы распространить таблицу на несколько серверов. - -``` sql -INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; -``` - -!!! warning "Уведомление" - Такой подход не подходит для сегментации больших столов. Есть отдельный инструмент [clickhouse-копировальный аппарат](../operations/utilities/clickhouse-copier.md) это может повторно осколить произвольные большие таблицы. - -Как и следовало ожидать, вычислительно тяжелые запросы выполняются в N раз быстрее, если они используют 3 сервера вместо одного. - -В этом случае мы использовали кластер с 3 осколками, и каждый из них содержит одну реплику. - -Для обеспечения устойчивости в рабочей среде рекомендуется, чтобы каждый сегмент содержал 2-3 реплики, распределенные между несколькими зонами доступности или центрами обработки данных (или, по крайней мере, стойками). Обратите внимание, что ClickHouse поддерживает неограниченное количество реплик. - -Пример конфигурации для кластера из одного осколка, содержащего три реплики: - -``` xml - - ... - - - - example-perftest01j.yandex.ru - 9000 - - - example-perftest02j.yandex.ru - 9000 - - - example-perftest03j.yandex.ru - 9000 - - - - -``` - -Чтобы включить собственную репликацию [Смотритель зоопарка](http://zookeeper.apache.org/) требуемый. ClickHouse заботится о согласованности данных во всех репликах и автоматически запускает процедуру восстановления после сбоя. Рекомендуется развернуть кластер ZooKeeper на отдельных серверах (где не выполняются никакие другие процессы, включая ClickHouse). - -!!! note "Примечание" - ZooKeeper не является строгим требованием: в некоторых простых случаях вы можете дублировать данные, записав их во все реплики из кода вашего приложения. Такой подход является **нет** рекомендуется, чтобы в этом случае ClickHouse не мог гарантировать согласованность данных на всех репликах. Таким образом, это становится ответственностью вашего приложения. - -Расположение ZooKeeper указано в конфигурационном файле: - -``` xml - - - zoo01.yandex.ru - 2181 - - - zoo02.yandex.ru - 2181 - - - zoo03.yandex.ru - 2181 - - -``` - -Кроме того, нам нужно установить макросы для идентификации каждого осколка и реплики, которые используются при создании таблицы: - -``` xml - - 01 - 01 - -``` - -Если в данный момент при создании реплицированной таблицы реплик нет, то создается новая первая реплика. Если уже существуют живые реплики, то новая реплика клонирует данные из существующих. У вас есть возможность сначала создать все реплицированные таблицы, а затем вставить в них данные. Другой вариант-создать некоторые реплики и добавить другие после или во время вставки данных. - -``` sql -CREATE TABLE tutorial.hits_replica (...) -ENGINE = ReplcatedMergeTree( - '/clickhouse_perftest/tables/{shard}/hits', - '{replica}' -) -... -``` - -Здесь мы используем [ReplicatedMergeTree](../engines/table_engines/mergetree_family/replication.md) настольный двигатель. В параметрах мы указываем путь ZooKeeper, содержащий идентификаторы сегментов и реплик. - -``` sql -INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; -``` - -Репликация работает в режиме мульти-мастер. Данные могут быть загружены в любую реплику, а затем система автоматически синхронизирует их с другими экземплярами. Репликация является асинхронной, поэтому в данный момент не все реплики могут содержать недавно вставленные данные. По крайней мере, одна реплика должна быть готова, чтобы обеспечить прием данных. Другие будут синхронизировать данные и восстанавливать согласованность, как только они снова станут активными. Обратите внимание, что этот подход допускает низкую вероятность потери недавно вставленных данных. - -[Оригинальная статья](https://clickhouse.tech/docs/en/getting_started/tutorial/) diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md new file mode 120000 index 00000000000..18b86bb2e9c --- /dev/null +++ b/docs/ru/getting_started/tutorial.md @@ -0,0 +1 @@ +en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md deleted file mode 100644 index 5f8b825353c..00000000000 --- a/docs/ru/introduction/adopters.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Пользователи ClickHouse {#clickhouse-adopters} - -!!! warning "Оговорка" - Следующий список компаний, использующих ClickHouse, и их истории успеха собраны из открытых источников, поэтому они могут отличаться от текущей реальности. Мы были бы очень признательны, если бы вы поделились историей принятия ClickHouse в свою компанию и [добавьте его в список](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), но, пожалуйста, убедитесь, что у вас не будет никаких проблем с NDA, сделав это. Предоставление обновлений с публикациями от других компаний также полезно. - -| Компания | Промышленность | Usecase | Размер кластера | (Un)Сжатый Размер Данных\* | Ссылка | -|---------------------------------------------------------------------------------|----------------------------------------|-----------------------------|------------------------------------------------------------|------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [2ГИС](https://2gis.ru) | Карты | Мониторинг | — | — | [Говорить по-русски, июль 2019](https://youtu.be/58sPkXfq6nw) | -| [Браузер Aloha](https://alohabrowser.com/) | Мобильное приложение | Серверная часть браузера | — | — | [Слайды на русском языке, май 2019 года](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | -| [Компания Amadeus](https://amadeus.com/) | Путешествовать | Аналитика | — | — | [Пресс-Релиз, Апрель 2018 Года](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| [Компания](https://www.appsflyer.com) | Мобильная аналитика | Главный продукт | — | — | [Говорить по-русски, июль 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| [ArenaData](https://arenadata.tech/) | Платформа данных | Главный продукт | — | — | [Слайды на русском языке, декабрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| [На Badoo](https://badoo.com) | Знакомства | Таймсерии | — | — | [Слайды на русском языке, декабрь 2019 года](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | -| [Бенокс](https://www.benocs.com/) | Сетевая телеметрия и аналитика | Главный продукт | — | — | [Слайды на английском языке, октябрь 2017 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | -| [Блумберг](https://www.bloomberg.com/) | Финансы, СМИ | Мониторинг | 102 сервера | — | [Слайды, Май 2018 Года](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| [Блокси](https://bloxy.info) | Блокчейн | Аналитика | — | — | [Слайды на русском языке, август 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| `Dataliance/UltraPower` | Телекоммуникационный | Аналитика | — | — | [Слайды на китайском языке, январь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| [CARTO](https://carto.com/) | Бизнес-разведка | Гео аналитика | — | — | [Геопространственная обработка с помощью Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| [CERN](http://public.web.cern.ch/public/) | Исследование | Эксперимент | — | — | [Пресс-релиз, апрель 2012 года](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| [Компании Cisco](http://cisco.com/) | Сетевой | Анализ трафика | — | — | [Молниеносный разговор, октябрь 2019 года](https://youtu.be/-hI1vDR2oPY?t=5057) | -| [Ценные Бумаги Цитадели](https://www.citadelsecurities.com/) | Финансы | — | — | — | [Взнос, Март 2019 Года](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| [Ситимобил](https://city-mobil.ru) | Такси | Аналитика | — | — | [Запись в блоге на русском языке, март 2020 года](https://habr.com/en/company/citymobil/blog/490660/) | -| [ContentSquare](https://contentsquare.com) | Веб-аналитика | Главный продукт | — | — | [Запись в блоге на французском языке, ноябрь 2018 года](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| [Cloudflare](https://cloudflare.com) | CDN | Анализ трафика | 36 серверов | — | [Сообщение в блоге, май 2017 года](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Сообщение в блоге, март 2018 года](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| [Корунет](https://coru.net/) | Аналитика | Главный продукт | — | — | [Слайды на английском языке, апрель 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| [CraiditX 氪信](https://creditx.com) | Финансовый ИИ | Анализ | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| [Criteo / Storetail](https://www.criteo.com/) | Розничная торговля | Главный продукт | — | — | [Слайды на английском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| [Дойче банк](https://db.com) | Финансы | Би аналитика | — | — | [Слайды на английском языке, октябрь 2019 года](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| [Дива-е](https://www.diva-e.com) | Цифровой Консалтинг | Главный продукт | — | — | [Слайды на английском языке, сентябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| [Компания Exness](https://www.exness.com) | Торговый | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, май 2019 года](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| [Джинн](https://geniee.co.jp) | Рекламная сеть | Главный продукт | — | — | [Запись в блоге на японском языке, июль 2017 года](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| [HUYA](https://www.huya.com/) | Потоковое видео | Аналитика | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Идеалиста](https://www.idealista.com) | Недвижимость | Аналитика | — | — | [Сообщение в блоге на английском языке, апрель 2019 года](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| [Infovista](https://www.infovista.com/) | Сети | Аналитика | — | — | [Слайды на английском языке, октябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| [Компания innogames](https://www.innogames.com) | Игры | Метрики, Ведение Журнала | — | — | [Слайды на русском языке, сентябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| [Интегрос](https://integros.com) | Платформа для видеосервисов | Аналитика | — | — | [Слайды на русском языке, май 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Данные По Кадьяку](https://www.kodiakdata.com/) | Облака | Главный продукт | — | — | [Слайды на английском языке, апрель 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| [Контур](https://kontur.ru) | Разработка программного обеспечения | Метрика | — | — | [Говорить по-русски, ноябрь 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| [LifeStreet](https://lifestreet.com/) | Рекламная сеть | Главный продукт | 75 серверов (3 реплики) | 5.27 ПИБ | [Запись в блоге на русском языке, февраль 2017 года](https://habr.com/en/post/322620/) | -| [Mail.ru Облачные Решения](https://mcs.mail.ru/) | Облачные сервисы | Главный продукт | — | — | [Запуск экземпляра ClickHouse на русском языке](https://mcs.mail.ru/help/db-create/clickhouse#) | -| [MessageBird](https://www.messagebird.com) | Электросвязь | Статистика | — | — | [Слайды на английском языке, ноябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| [MGID](https://www.mgid.com/) | Рекламная сеть | Веб-аналитика | — | — | [Наш опыт внедрения аналитической СУБД ClickHouse на русском языке](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| [OneAPM](https://www.oneapm.com/) | Мониторинг и анализ данных | Главный продукт | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| [ПРАГМА Инноваций](http://www.pragma-innovation.fr/) | Телеметрия и анализ Больших Данных | Главный продукт | — | — | [Слайды на английском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| [QINGCLOUD](https://www.qingcloud.com/) | Облачные сервисы | Главный продукт | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| [Qrator](https://qrator.net) | Защита от DDoS-атак | Главный продукт | — | — | [Сообщение В Блоге, Март 2019 Года](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| [Beijing PERCENT Information Technology Co., Лимитед.](https://www.percent.cn/) | Аналитика | Главный продукт | — | — | [Слайды на китайском языке, июнь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| [Бродяга](https://rambler.ru) | Интернет услуги | Аналитика | — | — | [Говорить по-русски, апрель 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| [Tencent](https://www.tencent.com) | Обмен сообщениями | Регистрация | — | — | [Говорить по-китайски, ноябрь 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| [Движения Звезд](https://trafficstars.com/) | Рекламная сеть | — | — | — | [Слайды на русском языке, май 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| [S7 Airlines](https://www.s7.ru) | Авиакомпании | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, март 2019 года](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| [Общий](https://www.semrush.com/) | Маркетинг | Главный продукт | — | — | [Слайды на русском языке, август 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| [scireum ГмбХ](https://www.scireum.de/) | электронная коммерция | Главный продукт | — | — | [Говорить по-немецки, февраль 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| [Караул](https://sentry.io/) | Разработчик | Бэкэнд для продукта | — | — | [Сообщение в блоге на английском языке, май 2019 года](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr) | Государственное Социальное Обеспечение | Аналитика | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| [СЕО.делать](https://seo.do/) | Аналитика | Главный продукт | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| [Зина](http://english.sina.com/index.html) | Новости | — | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| [SMI2](https://smi2.ru/) | Новости | Аналитика | — | — | [Запись в блоге на русском языке, ноябрь 2017 года](https://habr.com/ru/company/smi2/blog/314558/) | -| [Чмок](https://www.splunk.com/) | Бизнес-аналитика | Главный продукт | — | — | [Слайды на английском языке, январь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| [Спотифай](https://www.spotify.com) | Музыка | Экспериментирование | — | — | [Слайды, Июль 2018 Года](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| [Tencent](https://www.tencent.com) | Большие данные | Обработка данных | — | — | [Слайды на китайском языке, октябрь 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| [Убер](https://www.uber.com) | Такси | Регистрация | — | — | [Слайды, Февраль 2020 Года](https://presentations.clickhouse.tech/meetup40/uber.pdf) | -| [ВКонтакте](https://vk.com) | Социальная сеть | Статистика, Ведение Журнала | — | — | [Слайды на русском языке, август 2018 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| [Мудрецы](https://wisebits.com/) | IT-решение | Аналитика | — | — | [Слайды на русском языке, май 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Технология Сяосин.](https://www.xiaoheiban.cn/) | Образование | Общая цель | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| [Сималайя](https://www.ximalaya.com/) | Общий доступ к аудио | OLAP | — | — | [Слайды на английском языке, ноябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| [Облако Яндекса](https://cloud.yandex.ru/services/managed-clickhouse) | Публичное Облако | Главный продукт | — | — | [Разговор на русском языке, декабрь 2019 года](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| [DataLens Яндекс](https://cloud.yandex.ru/services/datalens) | Бизнес-разведка | Главный продукт | — | — | [Слайды на русском языке, декабрь 2019 года](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | -| [Яндекс Маркет](https://market.yandex.ru/) | электронная коммерция | Метрики, Ведение Журнала | — | — | [Разговор на русском языке, январь 2019 года](https://youtu.be/_l1qP0DyBcA?t=478) | -| [Яндекс Метрика](https://metrica.yandex.com) | Веб-аналитика | Главный продукт | 360 серверов в одном кластере, 1862 сервера в одном отделе | 66.41 ПИБ / 5.68 ПИБ | [Слайды, Февраль 2020 Года](https://presentations.clickhouse.tech/meetup40/introduction/#13) | -| [ЦВТ](https://htc-cs.ru/) | Разработка программного обеспечения | Метрики, Ведение Журнала | — | — | [Сообщение в блоге, март 2019 года, на русском языке](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| [МКБ](https://mkb.ru/) | Банк | Мониторинг веб-систем | — | — | [Слайды на русском языке, сентябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| [金数据](https://jinshuju.net) | Би аналитика | Главный продукт | — | — | [Слайды на китайском языке, октябрь 2019 года](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | - -[Оригинальная статья](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md new file mode 120000 index 00000000000..b9b77a27eb9 --- /dev/null +++ b/docs/ru/introduction/adopters.md @@ -0,0 +1 @@ +en/introduction/adopters.md \ No newline at end of file diff --git a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md deleted file mode 100644 index d2cc9738749..00000000000 --- a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Выборки Профилировщик Запросов {#sampling-query-profiler} - -ClickHouse запускает профилировщик выборок, который позволяет анализировать выполнение запросов. С помощью profiler можно найти подпрограммы исходного кода, которые наиболее часто используются во время выполнения запроса. Вы можете отслеживать процессорное время и время работы настенных часов, включая время простоя. - -Чтобы использовать профилировщик: - -- Настройка программы [журнал трассировки](../server_configuration_parameters/settings.md#server_configuration_parameters-trace_log) раздел конфигурации сервера. - - В этом разделе настраиваются следующие параметры: [журнал трассировки](../../operations/optimizing_performance/sampling_query_profiler.md#system_tables-trace_log) системная таблица, содержащая результаты работы профилировщика. Он настроен по умолчанию. Помните, что данные в этой таблице действительны только для работающего сервера. После перезагрузки сервера ClickHouse не очищает таблицу, и все сохраненные адреса виртуальной памяти могут стать недействительными. - -- Настройка программы [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) или [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) настройки. Обе настройки можно использовать одновременно. - - Эти параметры позволяют настроить таймеры профилировщика. Поскольку это параметры сеанса, вы можете получить различную частоту дискретизации для всего сервера, отдельных пользователей или профилей пользователей, для вашего интерактивного сеанса и для каждого отдельного запроса. - -Частота дискретизации по умолчанию составляет одну выборку в секунду, и включены как ЦП, так и реальные таймеры. Эта частота позволяет собрать достаточно информации о кластере ClickHouse. В то же время, работая с такой частотой, профилировщик не влияет на производительность сервера ClickHouse. Если вам нужно профилировать каждый отдельный запрос, попробуйте использовать более высокую частоту дискретизации. - -Для того чтобы проанализировать `trace_log` системная таблица: - -- Установите устройство `clickhouse-common-static-dbg` пакет. Видеть [Установка из пакетов DEB](../../getting_started/install.md#install-from-deb-packages). - -- Разрешить функции самоанализа с помощью [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) установка. - - По соображениям безопасности функции самоанализа по умолчанию отключены. - -- Используйте `addressToLine`, `addressToSymbol` и `demangle` [функции самоанализа](../../operations/optimizing_performance/sampling_query_profiler.md) чтобы получить имена функций и их позиции в коде ClickHouse. Чтобы получить профиль для какого-либо запроса, вам необходимо агрегировать данные из `trace_log` стол. Вы можете агрегировать данные по отдельным функциям или по всем трассировкам стека. - -Если вам нужно визуализировать `trace_log` информация, попробуйте [огнемет](../../interfaces/third-party/gui/#clickhouse-flamegraph) и [speedscope](https://github.com/laplab/clickhouse-speedscope). - -## Пример {#example} - -В этом примере мы: - -- Фильтрация `trace_log` данные по идентификатору запроса и текущей дате. - -- Агрегирование по трассировке стека. - -- Используя функции интроспекции, мы получим отчет о: - - - Имена символов и соответствующие им функции исходного кода. - - Расположение исходных кодов этих функций. - - - -``` sql -SELECT - count(), - arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym -FROM system.trace_log -WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) -GROUP BY trace -ORDER BY count() DESC -LIMIT 10 -``` - -``` text -{% include "operations/performance/sampling_query_profiler_example_result.txt" %} -``` diff --git a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md new file mode 120000 index 00000000000..565f39130fb --- /dev/null +++ b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md @@ -0,0 +1 @@ +en/operations/optimizing_performance/sampling_query_profiler.md \ No newline at end of file diff --git a/docs/ru/operations/performance_test.md b/docs/ru/operations/performance_test.md deleted file mode 100644 index 9b5c6f4fed3..00000000000 --- a/docs/ru/operations/performance_test.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Как Протестировать Ваше Оборудование С Помощью ClickHouse {#how-to-test-your-hardware-with-clickhouse} - -С помощью этой инструкции вы можете запустить базовый тест производительности ClickHouse на любом сервере без установки пакетов ClickHouse. - -1. Идти к «commits» страница: https://github.com/ClickHouse/ClickHouse/commits/master - -2. Нажмите на первую зеленую галочку или красный крест с зеленым цветом «ClickHouse Build Check» и нажмите на кнопку «Details» ссылка рядом «ClickHouse Build Check». - -3. Скопируйте ссылку на «clickhouse» двоичный код для amd64 или aarch64. - -4. ssh к серверу и скачать его с помощью wget: - - - - # For amd64: - wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse - # For aarch64: - wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578161264_binary/clickhouse - # Then do: - chmod a+x clickhouse - -1. Скачать конфиги: - - - - wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.xml - wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/users.xml - mkdir config.d - wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml - wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml - -1. Скачать тест файлы: - - - - wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/benchmark-new.sh - chmod a+x benchmark-new.sh - wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql - -1. Загрузите тестовые данные в соответствии с [Яндекс.Набор метрика](../getting_started/example_datasets/metrica.md) инструкция («hits» таблица, содержащая 100 миллионов строк). - - - - wget https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz - tar xvf hits_100m_obfuscated_v1.tar.xz -C . - mv hits_100m_obfuscated_v1/* . - -1. Запустите сервер: - - - - ./clickhouse server - -1. Проверьте данные: ssh на сервер в другом терминале - - - - ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" - 100000000 - -1. Отредактируйте текст benchmark-new.sh, изменение «clickhouse-client» к «./clickhouse client» и добавить «–max\_memory\_usage 100000000000» параметр. - - - - mcedit benchmark-new.sh - -1. Выполнить тест: - - - - ./benchmark-new.sh hits_100m_obfuscated - -1. Отправьте номера и информацию о конфигурации вашего оборудования по адресу clickhouse-feedback@yandex-team.com - -Все результаты опубликованы здесь: https://clickhouse-да.технология / benchmark\_hardware.HTML diff --git a/docs/ru/operations/performance_test.md b/docs/ru/operations/performance_test.md new file mode 120000 index 00000000000..3787adb92bd --- /dev/null +++ b/docs/ru/operations/performance_test.md @@ -0,0 +1 @@ +en/operations/performance_test.md \ No newline at end of file diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md deleted file mode 100644 index 5467a58676e..00000000000 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ /dev/null @@ -1,154 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# clickhouse-бенчмарк {#clickhouse-benchmark} - -Подключается к серверу ClickHouse и повторно отправляет указанные запросы. - -Синтаксис: - -``` bash -$ echo "single query" | clickhouse-benchmark [keys] -``` - -или - -``` bash -$ clickhouse-benchmark [keys] <<< "single query" -``` - -Если вы хотите отправить набор запросов, создайте текстовый файл и поместите каждый запрос в отдельную строку в этом файле. Например: - -``` sql -SELECT * FROM system.numbers LIMIT 10000000 -SELECT 1 -``` - -Затем передайте этот файл на стандартный вход `clickhouse-benchmark`. - -``` bash -clickhouse-benchmark [keys] < queries_file -``` - -## Ключи {#clickhouse-benchmark-keys} - -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` посылает одновременно. Значение по умолчанию: 1. -- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. -- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. Для [режим сравнения](#clickhouse-benchmark-comparison-mode) вы можете использовать несколько `-h` ключи. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [режим сравнения](#clickhouse-benchmark-comparison-mode) вы можете использовать несколько `-p` ключи. -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0. -- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. -- `-s`, `--secure` — Using TLS connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` прекращает отправку запросов по достижении указанного срока. Значение по умолчанию: 0 (ограничение по времени отключено). -- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [режим сравнения](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` выполняет следующие функции: [Независимый двухпробный t-тест Стьюдента](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) проверьте, не отличаются ли эти два распределения с выбранным уровнем достоверности. -- `--cumulative` — Printing cumulative data instead of data per interval. -- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` выводит отчет в указанный JSON-файл. -- `--user=USERNAME` — ClickHouse user name. Default value: `default`. -- `--password=PSWD` — ClickHouse user password. Default value: empty string. -- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` выводит трассировки стека исключений. -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` на указанном этапе. Возможное значение: `complete`, `fetch_columns`, `with_mergeable_state`. Значение по умолчанию: `complete`. -- `--help` — Shows the help message. - -Если вы хотите применить некоторые из них [настройки](../../operations/settings/index.md) для запросов передайте их в качестве ключа `--= SETTING_VALUE`. Например, `--max_memory_usage=1048576`. - -## Выход {#clickhouse-benchmark-output} - -По умолчанию, `clickhouse-benchmark` отчеты для каждого из них `--delay` интервал. - -Пример отчета: - -``` text -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, result RPS: 67721584.984, result MiB/s: 516.675. - -0.000% 0.145 sec. -10.000% 0.146 sec. -20.000% 0.146 sec. -30.000% 0.146 sec. -40.000% 0.147 sec. -50.000% 0.148 sec. -60.000% 0.148 sec. -70.000% 0.148 sec. -80.000% 0.149 sec. -90.000% 0.150 sec. -95.000% 0.150 sec. -99.000% 0.150 sec. -99.900% 0.150 sec. -99.990% 0.150 sec. -``` - -В отчете вы можете найти:: - -- Количество запросов в системе `Queries executed:` поле. - -- Строка состояния, содержащая (по порядку): - - - Конечная точка сервера ClickHouse. - - Количество обработанных запросов. - - QPS: QPS: сколько запросов сервер выполняет в секунду в течение периода, указанного в `--delay` аргумент. - - RPS: сколько строк сервер читает в секунду в течение периода, указанного в `--delay` аргумент. - - MiB/s: сколько мегабайт сервер читает в секунду в течение периода, указанного в `--delay` аргумент. - - result RPS: сколько строк помещается сервером в результат запроса в секунду в течение периода, указанного в `--delay` аргумент. - - результат MiB/s. сколько мебибайт помещается сервером в результат запроса в секунду в течение периода, указанного в `--delay` аргумент. - -- Процентили времени выполнения запросов. - -## Режим сравнения {#clickhouse-benchmark-comparison-mode} - -`clickhouse-benchmark` можно сравнить производительность для двух запущенных серверов ClickHouse. - -Чтобы использовать режим сравнения, укажите конечные точки обоих серверов по двум парам `--host`, `--port` ключи. Ключи, сопоставленные вместе по позиции в списке аргументов, первые `--host` сопоставляется с первым `--port` и так далее. `clickhouse-benchmark` устанавливает соединения с обоими серверами, а затем отправляет запросы. Каждый запрос адресован случайно выбранному серверу. Результаты отображаются для каждого сервера отдельно. - -## Пример {#clickhouse-benchmark-example} - -``` bash -$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 -``` - -``` text -Loaded 1 queries. - -Queries executed: 6. - -localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.159 sec. -30.000% 0.160 sec. -40.000% 0.160 sec. -50.000% 0.162 sec. -60.000% 0.164 sec. -70.000% 0.165 sec. -80.000% 0.166 sec. -90.000% 0.166 sec. -95.000% 0.167 sec. -99.000% 0.167 sec. -99.900% 0.167 sec. -99.990% 0.167 sec. - - - -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.160 sec. -30.000% 0.163 sec. -40.000% 0.164 sec. -50.000% 0.165 sec. -60.000% 0.166 sec. -70.000% 0.166 sec. -80.000% 0.167 sec. -90.000% 0.167 sec. -95.000% 0.170 sec. -99.000% 0.172 sec. -99.900% 0.172 sec. -99.990% 0.172 sec. -``` diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md new file mode 120000 index 00000000000..fda8b1a50c7 --- /dev/null +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -0,0 +1 @@ +en/operations/utilities/clickhouse-benchmark.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2017.md b/docs/ru/whats_new/changelog/2017.md deleted file mode 100644 index 1c820453901..00000000000 --- a/docs/ru/whats_new/changelog/2017.md +++ /dev/null @@ -1,266 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -### ClickHouse релиз 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} - -Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54318: - -- Исправлена ошибка с возможным состоянием гонки в репликации, которая могла привести к потере данных. Эта проблема затрагивает версии 1.1.54310 и 1.1.54318. Если вы используете одну из этих версий с Реплицированными таблицами, настоятельно рекомендуется обновить ее. Эта проблема отображается в журналах в предупреждающих сообщениях, таких как `Part ... from own log doesn't exist.` Эта проблема актуальна, даже если вы не видите эти сообщения в журналах. - -### ClickHouse релиз 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30} - -Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54310: - -- Исправлено некорректное удаление строк при слияниях в движке SummingMergeTree -- Исправлена утечка памяти в несложных движках MergeTree -- Исправлено снижение производительности при частых вставках в двигатели MergeTree -- Исправлена ошибка, из-за которой очередь репликации останавливалась -- Исправлена ротация и архивация журналов сервера - -### ClickHouse релиз 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01} - -#### Новые средства: {#new-features} - -- Пользовательский ключ секционирования для семейства движков таблиц MergeTree. -- [Кафка](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) настольный двигатель. -- Добавлена поддержка загрузки [CatBoost](https://catboost.yandex/) модели и их применение к данным, хранящимся в ClickHouse. -- Добавлена поддержка часовых поясов с нецелочисленными смещениями от UTC. -- Добавлена поддержка арифметических операций с временными интервалами. -- Диапазон значений для типов Date и DateTime расширен до 2105 года. -- Добавил тот `CREATE MATERIALIZED VIEW x TO y` запрос (указывает существующую таблицу для хранения данных материализованного представления). -- Добавил тот `ATTACH TABLE` запрос без аргументов. -- Логика обработки вложенных столбцов с именами, оканчивающимися на-Map в таблице SummingMergeTree, была извлечена в агрегатную функцию sumMap. Теперь вы можете указать такие столбцы явно. -- Максимальный размер словаря IP trie увеличен до 128 миллионов записей. -- Добавлена функция getSizeOfEnumType. -- Добавлена агрегатная функция sumWithOverflow. -- Добавлена поддержка формата ввода Cap'n Proto. -- Теперь вы можете настроить уровень сжатия при использовании алгоритма zstd. - -#### Назад несовместимые изменения: {#backward-incompatible-changes} - -- Создание временных таблиц с движком, отличным от памяти, не допускается. -- Явное создание таблиц с помощью движка View или MaterializedView не допускается. -- Во время создания таблицы новая проверка проверяет, что выражение ключа выборки включено в первичный ключ. - -#### Устранение ошибок: {#bug-fixes} - -- Исправлены зависания при синхронной вставке в распределенную таблицу. -- Исправлено неатомное добавление и удаление деталей в реплицируемых таблицах. -- Данные, вставленные в материализованное представление, не подвергаются ненужной дедупликации. -- Выполнение запроса к распределенной таблице, для которой локальная реплика отстает, а удаленные реплики недоступны, больше не приводит к ошибке. -- Пользователям не нужны разрешения на доступ к `default` база данных для создания временных таблиц больше не существует. -- Исправлен сбой при указании типа массива без аргументов. -- Исправлены зависания, когда дисковый том, содержащий журналы сервера, заполнен. -- Исправлено переполнение в функции toRelativeWeekNum для первой недели эпохи Unix. - -#### Улучшения сборки: {#build-improvements} - -- Несколько сторонних библиотек (особенно Poco) были обновлены и преобразованы в подмодули git. - -### ClickHouse релиз 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19} - -#### Новые средства: {#new-features-1} - -- Поддержка TLS в собственном протоколе (чтобы включить, установите `tcp_ssl_port` в `config.xml` ). - -#### Устранение ошибок: {#bug-fixes-1} - -- `ALTER` для реплицированных таблиц теперь пытается начать работать как можно скорее. -- Исправлен сбой при чтении данных с настройкой `preferred_block_size_bytes=0.` -- Исправлены сбои в работе `clickhouse-client` при нажатии на кнопку `Page Down` -- Правильная интерпретация некоторых сложных запросов с помощью `GLOBAL IN` и `UNION ALL` -- `FREEZE PARTITION` теперь он всегда работает атомарно. -- Пустые почтовые запросы теперь возвращают ответ с кодом 411. -- Исправлены ошибки интерпретации таких выражений, как `CAST(1 AS Nullable(UInt8)).` -- Исправлена ошибка при чтении `Array(Nullable(String))` колонки от `MergeTree` таблицы. -- Исправлен сбой при разборе таких запросов, как `SELECT dummy AS dummy, dummy AS b` -- Пользователи обновляются правильно с недопустимым `users.xml` -- Правильная обработка, когда исполняемый словарь возвращает ненулевой код ответа. - -### ClickHouse релиз 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20} - -#### Новые средства: {#new-features-2} - -- Добавил тот `pointInPolygon` функция для работы с координатами на координатной плоскости. -- Добавил тот `sumMap` агрегатная функция для вычисления суммы массивов, аналогичная `SummingMergeTree`. -- Добавил тот `trunc` функция. Улучшена производительность функций округления (`round`, `floor`, `ceil`, `roundToExp2`) и скорректировал логику их работы. Поменялась логика игры `roundToExp2` функция для дробей и отрицательных чисел. -- Исполняемый файл ClickHouse теперь меньше зависит от версии libc. Один и тот же исполняемый файл ClickHouse может работать на самых разных системах Linux. Существует еще зависимость при использовании скомпилированных запросов (с настройкой `compile = 1` , который не используется по умолчанию). -- Сократилось время, необходимое для динамической компиляции запросов. - -#### Устранение ошибок: {#bug-fixes-2} - -- Исправлена ошибка, которая иногда производилась `part ... intersects previous part` сообщения и ослабленная согласованность реплик. -- Исправлена ошибка, из-за которой сервер блокировался, если ZooKeeper был недоступен во время завершения работы. -- Удалено избыточное ведение журнала при восстановлении реплик. -- Исправлена ошибка в объединении всех реализаций. -- Исправлена ошибка в функции concat, возникшая, если первый столбец в блоке имеет тип массива. -- Прогресс теперь отображается в системе правильно.таблица слияний. - -### ClickHouse релиз 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13} - -#### Новые средства: {#new-features-3} - -- `SYSTEM` запросы для администрирования сервера: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. -- Добавлены функции для работы с массивами: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. -- Добавлен `root` и `identity` параметры для конфигурации ZooKeeper. Это позволяет изолировать отдельных пользователей в одном кластере ZooKeeper. -- Добавлены статистические функции `groupBitAnd`, `groupBitOr`, и `groupBitXor` (для совместимости они также доступны под названиями `BIT_AND`, `BIT_OR`, и `BIT_XOR`). -- Внешние словари можно загрузить из MySQL, указав сокет в файловой системе. -- Внешние словари могут быть загружены из MySQL по протоколу SSL (`ssl_cert`, `ssl_key`, `ssl_ca` параметры). -- Добавил тот `max_network_bandwidth_for_user` настройка для ограничения общего использования полосы пропускания для запросов на одного пользователя. -- Поддержка `DROP TABLE` для временных таблиц. -- Поддержка чтения `DateTime` значения в формате временных меток Unix от `CSV` и `JSONEachRow` форматы. -- Запаздывающие реплики в распределенных запросах теперь исключаются по умолчанию (пороговое значение по умолчанию-5 минут). -- Блокировка FIFO используется во время ALTER: запрос ALTER не блокируется бесконечно для непрерывно выполняемых запросов. -- Возможность установки `umask` в конфигурационном файле. -- Улучшенная производительность для запросов с помощью `DISTINCT` . - -#### Устранение ошибок: {#bug-fixes-3} - -- Улучшен процесс удаления старых узлов в ZooKeeper. Раньше старые узлы иногда не удалялись, если были очень частые вставки, что приводило к медленному завершению работы сервера, среди прочего. -- Исправлена рандомизация при выборе хостов для подключения к ZooKeeper. -- Исправлено исключение запаздывающих реплик в распределенных запросах, если реплика является localhost. -- Исправлена ошибка, когда часть данных в a `ReplicatedMergeTree` стол может быть сломан после запуска `ALTER MODIFY` на элементе в `Nested` структура. -- Исправлена ошибка, которая могла привести к тому, что запросы SELECT «hang». -- Улучшения в распределенных DDL-запросах. -- Исправлен запрос `CREATE TABLE ... AS `. -- Разрешен тупик в работе `ALTER ... CLEAR COLUMN IN PARTITION` запрос для `Buffer` таблицы. -- Исправлено недопустимое значение по умолчанию для `Enum` s (0 вместо минимума) при использовании `JSONEachRow` и `TSKV` форматы. -- Разрешен внешний вид зомби-процессов при использовании словаря с помощью `executable` источник. -- Исправлена обработка выхода онлайн / оффлайн для запроса. - -#### Улучшен рабочий процесс разработки и сборки ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse} - -- Вы можете использовать `pbuilder` чтобы построить ClickHouse. -- Вы можете использовать `libc++` вместо `libstdc++` для сборок на Linux. -- Добавлены инструкции по использованию инструментов статического анализа кода: `Coverage`, `clang-tidy`, `cppcheck`. - -#### Пожалуйста, обратите внимание при обновлении: {#please-note-when-upgrading} - -- Теперь существует более высокое значение по умолчанию для параметра MergeTree `max_bytes_to_merge_at_max_space_in_pool` (максимальный общий размер частей данных для слияния, в байтах): он увеличился со 100 гигабайт до 150 гигабайт. Это может привести к большим слияниям, выполняемым после обновления сервера, что может привести к увеличению нагрузки на дисковую подсистему. Если свободное пространство, доступное на сервере, меньше чем в два раза общего объема выполняемых слияний, это приведет к остановке всех других слияний, включая слияния небольших частей данных. В результате запросы INSERT завершатся ошибкой с сообщением «Merges are processing significantly slower than inserts.» Используйте `SELECT * FROM system.merges` запрос на мониторинг ситуации. Вы также можете проверить следующее: `DiskSpaceReservedForMerge` метрика в системе `system.metrics` таблица, или в графите. Вам не нужно ничего делать, чтобы исправить это, так как проблема будет решена сама собой, как только большие слияния закончатся. Если вы сочтете это неприемлемым, вы можете восстановить предыдущее значение для `max_bytes_to_merge_at_max_space_in_pool` установка. Чтобы сделать это, перейдите в раздел раздел в конфигурации.xml, набор ``` ``107374182400 ``` и перезагрузите сервер. - -### ClickHouse релиз 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29} - -- Это исправленный выпуск для предыдущей версии 1.1.54282. Он исправляет утечки в каталоге запчастей в ZooKeeper. - -### ClickHouse релиз 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23} - -Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54276: - -- Исправлено `DB::Exception: Assertion violation: !_path.empty()` при вставке в распределенную таблицу. -- Исправлен синтаксический анализ при вставке в формат RowBinary, если входные данные начинаются с';'. -- Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). - -### Clickhouse Релиз 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16} - -#### Новые средства: {#new-features-4} - -- Добавлен дополнительный раздел для запроса SELECT. Пример запроса: `WITH 1+1 AS a SELECT a, a*a` -- Вставка может быть выполнена синхронно в распределенной таблице: OK возвращается только после того, как все данные сохранены на всех осколках. Это активируется установкой insert\_distributed\_sync=1. -- Добавлен тип данных UUID для работы с 16-байтовыми идентификаторами. -- Добавлены псевдонимы CHAR, FLOAT и других типов для совместимости с таблицей. -- Добавлены функции toYYYYMM, toYYYYMMDD и toYYYYMMDDhhmmss для преобразования времени в числа. -- Вы можете использовать IP-адреса (вместе с именем хоста) для идентификации серверов для кластеризованных запросов DDL. -- Добавлена поддержка непостоянных аргументов и отрицательных смещений в функции `substring(str, pos, len).` -- Добавлен параметр max\_size для `groupArray(max_size)(column)` агрегатная функция и оптимизированная ее производительность. - -#### Основное изменение: {#main-changes} - -- Улучшения безопасности: все файлы сервера создаются с разрешениями 0640 (могут быть изменены с помощью параметр config). -- Улучшены сообщения об ошибках для запросов с неверным синтаксисом. -- Значительно сокращается потребление памяти и повышается производительность при слиянии больших разделов данных MergeTree. -- Значительно повысилась производительность слияний данных для заменяющего движка Mergetree. -- Улучшена производительность асинхронных вставок из распределенной таблицы за счет объединения нескольких исходных вставок. Чтобы включить эту функцию, используйте параметр distributed\_directory\_monitor\_batch\_inserts=1. - -#### Назад несовместимые изменения: {#backward-incompatible-changes-1} - -- Изменен двоичный формат агрегатных состояний `groupArray(array_column)` функции для массивов. - -#### Полный список изменений: {#complete-list-of-changes} - -- Добавил тот `output_format_json_quote_denormals` настройка, которая позволяет выводить значения nan и inf в формате JSON. -- Оптимизировано распределение потока при чтении из распределенной таблицы. -- Настройки можно настроить в режиме только для чтения, если значение не изменяется. -- Добавлена возможность извлечения нецелочисленных гранул движка MergeTree для выполнения ограничений на размер блока, указанных в параметре preferred\_block\_size\_bytes. Цель состоит в том, чтобы уменьшить потребление оперативной памяти и увеличить локальность кэша при обработке запросов из таблиц с большими столбцами. -- Эффективное использование индексов, содержащих такие выражения, как `toStartOfHour(x)` для таких условий, как `toStartOfHour(x) op сonstexpr.` -- Добавлены новые настройки для движков MergeTree (раздел merge\_tree в config.XML): - - replicated\_deduplication\_window\_seconds задает количество секунд, разрешенных для дедуплицирующих вставок в реплицируемые таблицы. - - cleanup\_delay\_period устанавливает, как часто нужно запустить программу очистки, чтобы удалить устаревшие данные. - - replicated\_can\_become\_leader может препятствовать тому, чтобы реплика становилась лидером (и назначала слияния). -- Ускоренная очистка для удаления устаревших данных из ZooKeeper. -- Множество улучшений и исправлений для кластеризованных DDL-запросов. Особый интерес представляет новая настройка distributed\_ddl\_task\_timeout, которая ограничивает время ожидания ответа от серверов в кластере. Если запрос ddl не был выполнен на всех хостах, ответ будет содержать ошибку таймаута, и запрос будет выполнен в асинхронном режиме. -- Улучшено отображение трассировок стека в журналах сервера. -- Добавил тот «none» значение для метода сжатия. -- Вы можете использовать несколько разделов dictionaries\_config в config.XML. -- Можно подключиться к MySQL через сокет в файловой системе. -- Система.в таблице деталей появился новый столбец с информацией о размере меток, в байтах. - -#### Устранение ошибок: {#bug-fixes-4} - -- Распределенные таблицы, использующие таблицу слияния, теперь корректно работают для запроса SELECT с условием на `_table` поле. -- Исправлено редкое состояние гонки в ReplicatedMergeTree при проверке частей данных. -- Исправлена возможная заморозка на «leader election» при запуске сервера. -- Параметр max\_replica\_delay\_for\_distributed\_queries был проигнорирован при использовании локальной реплики источника данных. Это было исправлено. -- Исправлено некорректное поведение `ALTER TABLE CLEAR COLUMN IN PARTITION` при попытке очистить несуществующий столбец. -- Исправлено исключение в функции multif при использовании пустых массивов или строк. -- Исправлено чрезмерное выделение памяти при десериализации собственного формата. -- Исправлено некорректное автоматическое обновление словарей Trie. -- Исправлено исключение при выполнении запросов с предложением GROUP BY из таблицы слияния при использовании SAMPLE. -- Исправлена ошибка, из группы при использовании distributed\_aggregation\_memory\_efficient=1. -- Теперь вы можете указать базу данных.таблицы в правой стороне и присоединиться. -- Слишком много потоков было использовано для параллельной агрегации. Это было исправлено. -- Исправлено как то «if» функция работает с аргументами FixedString. -- Выберите из распределенной таблицы неправильно сработавшие осколки с весом 0. Это было исправлено. -- Бегущий `CREATE VIEW IF EXISTS no longer causes crashes.` -- Исправлено некорректное поведение при установке input\_format\_skip\_unknown\_fields=1 и наличии отрицательных чисел. -- Исправлен бесконечный цикл в `dictGetHierarchy()` функция, если в словаре есть какие-то недопустимые данные. -- Исправлено `Syntax error: unexpected (...)` ошибки при выполнении распределенных запросов с вложенными запросами в предложении IN или JOIN и таблицах слияния. -- Исправлена неправильная интерпретация запроса SELECT из таблиц справочника. -- Исправлена ошибка «Cannot mremap» ошибка при использовании массивов в предложениях IN и JOIN с более чем 2 миллиардами элементов. -- Исправлена ошибка отработки отказа для словарей с MySQL в качестве источника. - -#### Улучшен рабочий процесс разработки и сборки ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse-1} - -- Сборки могут быть собраны в Аркадии. -- Вы можете использовать gcc 7 для компиляции ClickHouse. -- Параллельные сборки с использованием ccache+distcc теперь работают быстрее. - -### ClickHouse релиз 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04} - -#### Новые средства: {#new-features-5} - -- Распределенный DDL (например, `CREATE TABLE ON CLUSTER`) -- Реплицированный запрос `ALTER TABLE CLEAR COLUMN IN PARTITION.` -- Движок для таблиц словаря (доступ к данным словаря в виде таблицы). -- Компонент Dictionary database engine (этот тип базы данных автоматически содержит таблицы словарей, доступные для всех подключенных внешних словарей). -- Вы можете проверить наличие обновлений в словаре, отправив запрос источнику. -- Полные имена столбцов -- Цитирование идентификаторов с использованием двойных кавычек. -- Сеансы в интерфейсе HTTP. -- Запрос оптимизации для реплицированной таблицы может выполняться не только на лидере. - -#### Назад несовместимые изменения: {#backward-incompatible-changes-2} - -- Удалить набор глобальных. - -#### Несущественные изменения: {#minor-changes} - -- Теперь после срабатывания предупреждения журнал печатает полную трассировку стека. -- Ослаблена проверка количества поврежденных / лишних частей данных при запуске (было слишком много ложных срабатываний). - -#### Устранение ошибок: {#bug-fixes-5} - -- Исправлена плохая связь «sticking» при вставке в распределенную таблицу. -- GLOBAL IN теперь работает для запроса из таблицы слияния, которая смотрит на распределенную таблицу. -- Неверное количество ядер было обнаружено на виртуальной машине Google Compute Engine. Это было исправлено. -- Изменения в том, как работает исполняемый источник кэшированных внешних словарей. -- Исправлено сравнение строк, содержащих нулевые символы. -- Исправлено сравнение полей первичного ключа Float32 с константами. -- Ранее неверная оценка размера поля могла привести к чрезмерно большим распределениям. -- Исправлена ошибка, при отправке запроса столбец допускает значения NULL в таблицу с помощью инструкции Alter. -- Исправлена ошибка при сортировке по нулевому столбцу, если количество строк меньше предельного. -- Исправлен порядок по подзапросу, состоящему только из постоянных значений. -- Ранее реплицированная таблица могла оставаться в недопустимом состоянии после неудачного удаления таблицы. -- Псевдонимы для скалярных подзапросов с пустыми результатами больше не теряются. -- Теперь запрос, который использовал компиляцию, не завершается ошибкой, если файл .so поврежден. diff --git a/docs/ru/whats_new/changelog/2017.md b/docs/ru/whats_new/changelog/2017.md new file mode 120000 index 00000000000..f278c42f170 --- /dev/null +++ b/docs/ru/whats_new/changelog/2017.md @@ -0,0 +1 @@ +en/whats_new/changelog/2017.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2018.md b/docs/ru/whats_new/changelog/2018.md deleted file mode 100644 index 5de3ba68437..00000000000 --- a/docs/ru/whats_new/changelog/2018.md +++ /dev/null @@ -1,1061 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -## ClickHouse релиз 18.16 {#clickhouse-release-18-16} - -### ClickHouse релиз 18.16.1, 2018-12-21 {#clickhouse-release-18-16-1-2018-12-21} - -#### Устранение ошибок: {#bug-fixes} - -- Исправлена ошибка, которая приводила к проблемам с обновлением словарей с источником ODBC. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -- JIT-компиляция агрегатных функций теперь работает с колонками LowCardinality. [\#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) - -#### Улучшения: {#improvements} - -- Добавил тот `low_cardinality_allow_in_native_format` настройка (включена по умолчанию). Если этот параметр отключен, столбцы с низким коэффициентом полезности будут преобразованы в обычные столбцы для запросов SELECT, а обычные столбцы будут ожидаться для запросов INSERT. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) - -#### Улучшения сборки: {#build-improvements} - -- Исправления для сборок на macOS и ARM. - -### ClickHouse релиз 18.16.0, 2018-12-14 {#clickhouse-release-18-16-0-2018-12-14} - -#### Новые средства: {#new-features} - -- `DEFAULT` выражения вычисляются для пропущенных полей при загрузке данных в полуструктурированные входные форматы (`JSONEachRow`, `TSKV`). Эта функция включена с помощью `insert_sample_with_metadata` установка. [\#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) -- То `ALTER TABLE` запрос теперь имеет следующее значение `MODIFY ORDER BY` действие для изменения ключа сортировки при добавлении или удалении столбца таблицы. Это полезно для таблиц в `MergeTree` семейство, выполняющее дополнительные задачи при слиянии на основе этого ключа сортировки, например `SummingMergeTree`, `AggregatingMergeTree` и так далее. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [\#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) -- Для столиков в центре города `MergeTree` семья, теперь вы можете указать другой ключ сортировки (`ORDER BY`) и индекс (`PRIMARY KEY`). Ключ сортировки может быть длиннее индекса. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) -- Добавил тот `hdfs` функция таблицы и `HDFS` механизм таблиц для импорта и экспорта данных в HDFS. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) -- Добавлены функции для работы с base64: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/3350) -- Теперь вы можете использовать параметр для настройки точности `uniqCombined` агрегатная функция (выбор количества ячеек Гиперлога). [\#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) -- Добавил тот `system.contributors` таблица, содержащая имена всех, кто совершил коммиты в ClickHouse. [\#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) -- Добавлена возможность опустить Раздел для `ALTER TABLE ... FREEZE` запрос для резервного копирования всех разделов сразу. [\#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) -- Добавлен `dictGet` и `dictGetOrDefault` функции, которые не требуют указания типа возвращаемого значения. Тип определяется автоматически из описания словаря. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3564) -- Теперь вы можете указать комментарии для столбца в описании таблицы и изменить его с помощью `ALTER`. [\#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) -- Чтение поддерживается для `Join` введите таблицы с простыми ключами. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Теперь вы можете указать следующие параметры `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`, и `join_overflow_mode` при создании `Join` типизированная таблица. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Добавил тот `joinGet` функция, которая позволяет вам использовать a `Join` введите таблицу, как словарь. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Добавил тот `partition_key`, `sorting_key`, `primary_key`, и `sampling_key` колонны в сторону `system.tables` таблица для того, чтобы предоставить информацию о ключах таблицы. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -- Добавил тот `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`, и `is_in_sampling_key` колонны в сторону `system.columns` стол. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -- Добавил тот `min_time` и `max_time` колонны в сторону `system.parts` стол. Эти столбцы заполняются, когда ключ секционирования является выражением, состоящим из `DateTime` столбцы. [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) - -#### Устранение ошибок: {#bug-fixes-1} - -- Исправления и улучшения производительности для `LowCardinality` тип данных. `GROUP BY` с помощью `LowCardinality(Nullable(...))`. Получение значений `extremes`. Обработка функций высокого порядка. `LEFT ARRAY JOIN`. Распределенный `GROUP BY`. Функции, которые возвращают `Array`. Исполнение приказа `ORDER BY`. Написание в адрес `Distributed` таблицы (nicelulu). Обратная совместимость для `INSERT` запросы от старых клиентов, которые реализуют `Native` протокол. Поддержка `LowCardinality` для `JOIN`. Улучшена производительность при работе в одном потоке. [\#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [\#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [\#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [\#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [\#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [\#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [\#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [\#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [\#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [\#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [\#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [\#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [\#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) -- Исправлено как то `select_sequential_consistency` вариант работает. Ранее, когда этот параметр был включен, неполный результат иногда возвращался после начала записи в новый раздел. [\#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) -- Базы данных правильно задаются при выполнении DDL `ON CLUSTER` запросы и `ALTER UPDATE/DELETE`. [\#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -- Базы данных правильно задаются для вложенных запросов внутри представления. [\#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) -- Исправлена ошибка в работе `PREWHERE` с `FINAL` для `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) -- Теперь вы можете использовать `KILL QUERY` чтобы отменить запросы, которые еще не начались, потому что они ждут блокировки таблицы. [\#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) -- Исправлены расчеты даты и времени, если часы были перенесены назад в полночь (это происходит в Иране, а произошло в Москве с 1981 по 1983 год). Ранее это приводило к тому, что время сбрасывалось на день раньше необходимого, а также вызывало неправильное форматирование даты и времени в текстовом формате. [\#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) -- Исправлены ошибки в некоторых случаях `VIEW` и подзапросы, которые опускают базу данных. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3521) -- Исправлено состояние гонки при одновременном чтении из `MATERIALIZED VIEW` и удаление `MATERIALIZED VIEW` из-за того, что внутренняя дверь не запирается `MATERIALIZED VIEW`. [\#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [\#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) -- Исправлена ошибка `Lock handler cannot be nullptr.` [\#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) -- Исправлена обработка запросов, когда `compile_expressions` опция включена (она включена по умолчанию). Недетерминированные постоянные выражения, такие как `now` функции больше не разворачиваются. [\#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) -- Исправлена ошибка при указании непостоянного аргумента масштаба в `toDecimal32/64/128` функции. -- Исправлена ошибка при попытке вставить массив с помощью `NULL` элементы в системе `Values` форматирование в столбец типа `Array` без `Nullable` (если `input_format_values_interpret_expressions` = 1). [\#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [\#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) -- Исправлена непрерывная ошибка входа в систему `DDLWorker` если смотритель зоопарка не доступен. [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) -- Исправлен тип возврата для `quantile*` функции от `Date` и `DateTime` тип аргумента. [\#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) -- Исправлена ошибка `WITH` предложение, если оно указывает простой псевдоним без выражений. [\#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) -- Исправлена обработка запросов с именованными подзапросами и квалифицированными именами столбцов, когда `enable_optimize_predicate_expression` это включено. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3588) -- Исправлена ошибка `Attempt to attach to nullptr thread group` при работе с материализованными представлениями. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) -- Исправлена ошибка при передаче некоторых неверных аргументов в систему `arrayReverse` функция. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -- Исправлено переполнение буфера в системе `extractURLParameter` функция. Повышение производительности. Добавлена корректная обработка строк, содержащих ноль байт. [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) -- Исправлено переполнение буфера в системе `lowerUTF8` и `upperUTF8` функции. Удалена возможность выполнения этих функций сверх `FixedString` аргумент типа. [\#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) -- Исправлено редкое состояние гонки при удалении `MergeTree` таблицы. [\#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) -- Исправлено состояние гонки при чтении с `Buffer` таблицы и одновременно выполнять `ALTER` или `DROP` на целевых столах. [\#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) -- Исправлен сегфолт, если `max_temporary_non_const_columns` лимит был превышен. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Улучшения: {#improvements-1} - -- Сервер не записывает обработанные конфигурационные файлы в систему. `/etc/clickhouse-server/` каталог. Вместо этого он спасает их в будущем. `preprocessed_configs` каталог внутри `path`. Это означает, что `/etc/clickhouse-server/` Директория не имеет доступа на запись для `clickhouse` пользователь, что повышает безопасность. [\#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) -- То `min_merge_bytes_to_use_direct_io` по умолчанию параметр установлен на 10 гигабайт. Слияние, которое образует большие части таблиц из семейства MergeTree, будет выполнено в `O_DIRECT` режим, который предотвращает чрезмерное вытеснение кэша страниц. [\#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) -- Ускоренный запуск сервера при наличии очень большого количества таблиц. [\#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) -- Добавлен пул соединений и HTTP `Keep-Alive` для связи между репликами. [\#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) -- Если синтаксис запроса неверен, то `400 Bad Request` код возвращается в виде `HTTP` интерфейс (ранее было возвращено 500). [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) -- То `join_default_strictness` параметр установлен в значение `ALL` по умолчанию для обеспечения совместимости. [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) -- Удалено ведение журнала в `stderr` из `re2` библиотека для недопустимых или сложных регулярных выражений. [\#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) -- Добавлено в `Kafka` механизм таблиц: проверяет наличие подписок перед началом чтения из Kafka; параметр kafka\_max\_block\_size для таблицы. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) -- То `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`, и `murmurHash3_64` функции теперь работают для любого количества аргументов и для аргументов в виде кортежей. [\#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [\#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) -- То `arrayReverse` функция теперь работает с любыми типами массивов. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -- Добавлен необязательный параметр: размер слота для `timeSlots` функция. [Кирилл Шваков](https://github.com/ClickHouse/ClickHouse/pull/3724) -- Для `FULL` и `RIGHT JOIN`, этот `max_block_size` настройка используется для потока несвязанных данных из правой таблицы. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3699) -- Добавил тот `--secure` параметр командной строки в `clickhouse-benchmark` и `clickhouse-performance-test` чтобы включить TLS. [\#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [\#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) -- Тип преобразования, когда структура a `Buffer` таблица типов не соответствует структуре целевой таблицы. [Виталий Баранов](https://github.com/ClickHouse/ClickHouse/pull/3603) -- Добавил тот `tcp_keep_alive_timeout` опция для включения пакетов keep-alive после бездействия в течение заданного интервала времени. [\#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) -- Удалены ненужные кавычки значений для ключа раздела В разделе `system.parts` таблица, если она состоит из одного столбца. [\#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) -- Функция по модулю работает для `Date` и `DateTime` тип данных. [\#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) -- Добавлены синонимы для этого `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`, и `MID` функции. [\#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [\#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) Некоторые имена функций не зависят от регистра для обеспечения совместимости со стандартом SQL. Добавлен синтаксический сахар `SUBSTRING(expr FROM start FOR length)` для совместимости с SQL. [\#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) -- Добавлена возможность `mlock` страницы памяти, соответствующие `clickhouse-server` исполняемый код, чтобы предотвратить его вытеснение из памяти. По умолчанию эта функция отключена. [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) -- Улучшенная производительность при чтении с `O_DIRECT` (с помощью `min_bytes_to_use_direct_io` опция включена). [\#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) -- Улучшенная производительность системы `dictGet...OrDefault` функция для постоянного ключевого аргумента и непостоянного аргумента по умолчанию. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3563) -- То `firstSignificantSubdomain` функция теперь обрабатывает Домены `gov`, `mil`, и `edu`. [Игорь Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) Повышение производительности. [\#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) -- Возможность указать пользовательские переменные среды для запуска `clickhouse-server` с помощью `SYS-V init.d` сценарий по определению `CLICKHOUSE_PROGRAM_ENV` в `/etc/default/clickhouse`. - [Павел Башинский](https://github.com/ClickHouse/ClickHouse/pull/3612) -- Правильный код возврата для сценария clickhouse-server init. [\#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) -- То `system.metrics` таблица теперь имеет `VersionInteger` метрика и `system.build_options` есть ли добавленная строка `VERSION_INTEGER`, который содержит числовую форму версии ClickHouse, например `18016000`. [\#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) -- Удалена возможность сравнения `Date` введите с номером, чтобы избежать потенциальных ошибок, таких как `date = 2018-12-17`, где кавычки вокруг даты опущены по ошибке. [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) -- Исправлено поведение статусных функций, таких как `rowNumberInAllBlocks`. Ранее они выводили результат, который был на одно число больше из-за запуска во время анализа запроса. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3729) -- Если `force_restore_data` файл не может быть удален, отображается сообщение об ошибке. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3794) - -#### Улучшения сборки: {#build-improvements-1} - -- Обновлено приложение `jemalloc` библиотека, которая исправляет потенциальную утечку памяти. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3557) -- Профилирование с помощью `jemalloc` включен по умолчанию для отладки сборок. [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) -- Добавлена возможность запуска интеграционных тестов только тогда, когда `Docker` устанавливается в системе. [\#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) -- Добавлен тест выражения fuzz в запросах SELECT. [\#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) -- Добавлен стресс-тест для коммитов, который выполняет функциональные тесты параллельно и в случайном порядке, чтобы обнаружить больше условий гонки. [\#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) -- Улучшен метод запуска clickhouse-сервера в образе Docker. [Эльгазал Ахмед](https://github.com/ClickHouse/ClickHouse/pull/3663) -- Для Docker образ, добавлена поддержка для инициализации базы данных с помощью файлов в `/docker-entrypoint-initdb.d` каталог. [Константин Лебедев](https://github.com/ClickHouse/ClickHouse/pull/3695) -- Исправления опирается на руку. [\#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) - -#### Назад несовместимые изменения: {#backward-incompatible-changes} - -- Удалена возможность сравнения `Date` тип с номером. Вместо `toDate('2018-12-18') = 17883`, вы должны использовать явное преобразование типов `= toDate(17883)` [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) - -## ClickHouse релиз 18.14 {#clickhouse-release-18-14} - -### ClickHouse релиз 18.14.19, 2018-12-19 {#clickhouse-release-18-14-19-2018-12-19} - -#### Устранение ошибок: {#bug-fixes-2} - -- Исправлена ошибка, которая привела к проблемам с обновлением словарей с источником ODBC. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -- Базы данных правильно задаются при выполнении DDL `ON CLUSTER` запросы. [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -- Исправлен сегфолт, если `max_temporary_non_const_columns` лимит был превышен. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Улучшения сборки: {#build-improvements-2} - -- Исправления опирается на руку. - -### ClickHouse релиз 18.14.18, 2018-12-04 {#clickhouse-release-18-14-18-2018-12-04} - -#### Устранение ошибок: {#bug-fixes-3} - -- Исправлена ошибка в работе `dictGet...` функция для словарей типа `range`, если один из аргументов является постоянным, а другой-нет. [\#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) -- Исправлена ошибка, приводившая к появлению сообщений `netlink: '...': attribute type 1 has an invalid length` чтобы быть напечатанным в журнале ядра Linux, это происходило только на достаточно свежих версиях ядра Linux. [\#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) -- Исправлена обработка выхода онлайн / оффлайн в функции `empty` для аргументации из `FixedString` тип. [Дэниел, Дао Куанг Мин](https://github.com/ClickHouse/ClickHouse/pull/3703) -- Исправлено чрезмерное выделение памяти при использовании большого значения `max_query_size` настройка (фрагмент памяти из `max_query_size` байты были предварительно распределены сразу). [\#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) - -#### Изменения в сборке: {#build-changes} - -- Исправлена сборка с библиотеками LLVM/Clang версии 7 из пакетов ОС (эти библиотеки используются для компиляции запросов во время выполнения). [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse релиз 18.14.17, 2018-11-30 {#clickhouse-release-18-14-17-2018-11-30} - -#### Устранение ошибок: {#bug-fixes-4} - -- Исправлены случаи, когда процесс моста ODBC не завершался с основным серверным процессом. [\#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) -- Исправлено одновременное включение в `Distributed` таблица со списком столбцов, который отличается от списка столбцов удаленной таблицы. [\#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) -- Исправлено редкое состояние гонки, которое может привести к аварии при падении таблицы MergeTree. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -- Исправлена взаимоблокировка запросов в случае сбоя при создании потока запросов с помощью `Resource temporarily unavailable` ошибка. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -- Исправлен разбор текста `ENGINE` п. Когда `CREATE AS table` был использован синтаксис, а также `ENGINE` оговорка была указана еще до того, как `AS table` (ошибка привела к игнорированию указанного движка). [\#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) - -### ClickHouse релиз 18.14.15, 2018-11-21 {#clickhouse-release-18-14-15-2018-11-21} - -#### Устранение ошибок: {#bug-fixes-5} - -- Размер блока памяти был завышен при десериализации столбца типа `Array(String)` это приводит к тому, что «Memory limit exceeded» ошибки. Проблема появилась в версии 18.12.13. [\#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) - -### ClickHouse релиз 18.14.14, 2018-11-20 {#clickhouse-release-18-14-14-2018-11-20} - -#### Устранение ошибок: {#bug-fixes-6} - -- Исправлено `ON CLUSTER` запросы, когда кластер настроен как безопасный (флаг ``). [\#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) - -#### Изменения в сборке: {#build-changes-1} - -- Исправлены неполадки (llvm-7 от system, macos) [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse релиз 18.14.13, 2018-11-08 {#clickhouse-release-18-14-13-2018-11-08} - -#### Устранение ошибок: {#bug-fixes-7} - -- Исправлена ошибка `Block structure mismatch in MergingSorted stream` ошибка. [\#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) -- Исправлено `ON CLUSTER` запросы в случае, если в конфигурации кластера были включены защищенные соединения (the `` флаг). [\#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) -- Исправлена ошибка в запросах, которые использовались `SAMPLE`, `PREWHERE` и столбцы псевдонимов. [\#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) -- Исправлена редкая ошибка `unknown compression method` ошибка, когда `min_bytes_to_use_direct_io` настройка была включена. [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) - -#### Улучшения в производительности: {#performance-improvements} - -- Исправлена регрессия производительности запросов с помощью `GROUP BY` столбцов типа UInt16 или Date при выполнении на процессорах AMD EPYC. [Игорь Лапко](https://github.com/ClickHouse/ClickHouse/pull/3512) -- Исправлена регрессия производительности запросов, обрабатывающих длинные строки. [\#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) - -#### Улучшения сборки: {#build-improvements-3} - -- Улучшения для упрощения сборки Arcadia. [\#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [\#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) - -### ClickHouse релиз 18.14.12, 2018-11-02 {#clickhouse-release-18-14-12-2018-11-02} - -#### Устранение ошибок: {#bug-fixes-8} - -- Исправлена ошибка при соединении двух безымянных подзапросов. [\#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) -- Исправлена генерация некорректных запросов (с пустым именем `WHERE` пункт 2) при запросе внешних баз данных. [хотид](https://github.com/ClickHouse/ClickHouse/pull/3477) -- Исправлено использование неверного значения таймаута в словарях ODBC. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) - -### ClickHouse релиз 18.14.11, 2018-10-29 {#clickhouse-release-18-14-11-2018-10-29} - -#### Устранение ошибок: {#bug-fixes-9} - -- Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` в предельных запросах. [\#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) -- Исправлены ошибки при объединении данных в таблицах, содержащих массивы внутри вложенных структур. [\#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) -- Исправлены неправильные результаты запроса, если `merge_tree_uniform_read_distribution` настройка отключена (по умолчанию она включена). [\#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) -- Исправлена ошибка при вставках в распределенную таблицу в собственном формате. [\#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) - -### ClickHouse релиз 18.14.10, 2018-10-23 {#clickhouse-release-18-14-10-2018-10-23} - -- То `compile_expressions` настройка (JIT-компиляция выражений) по умолчанию отключена. [\#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) -- То `enable_optimize_predicate_expression` по умолчанию этот параметр отключен. - -### ClickHouse релиз 18.14.9, 2018-10-16 {#clickhouse-release-18-14-9-2018-10-16} - -#### Новые средства: {#new-features-1} - -- То `WITH CUBE` модификатор для `GROUP BY` (альтернативный синтаксис `GROUP BY CUBE(...)` также доступный). [\#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) -- Добавил тот `formatDateTime` функция. [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/2770) -- Добавил тот `JDBC` двигатель таблицы и `jdbc` табличная функция (требуется установка clickhouse-jdbc-bridge). [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/3210) -- Добавлены функции для работы с номером недели ISO: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, и `toDayOfYear`. [\#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) -- Теперь вы можете использовать `Nullable` колонки для `MySQL` и `ODBC` таблицы. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -- Вложенные структуры данных могут быть прочитаны как вложенные объекты в `JSONEachRow` формат. Добавил тот `input_format_import_nested_json` установка. [Веломан Юнкан](https://github.com/ClickHouse/ClickHouse/pull/3144) -- Параллельная обработка доступна для многих `MATERIALIZED VIEW`s при вставке данных. Смотрите сами `parallel_view_processing` установка. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) -- Добавил тот `SYSTEM FLUSH LOGS` запрос (принудительный сброс журнала в системные таблицы, такие как `query_log`) [\#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) -- Теперь вы можете использовать заранее определенные `database` и `table` макросы при объявлении `Replicated` таблицы. [\#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) -- Добавлена возможность чтения `Decimal` введите значения в инженерной нотации (с указанием степеней десять). [\#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) - -#### Экспериментальная возможность: {#experimental-features} - -- Оптимизация группы по предложению для `LowCardinality data types.` [\#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) -- Оптимизированный расчет выражений для `LowCardinality data types.` [\#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) - -#### Улучшения: {#improvements-2} - -- Значительно уменьшено потребление памяти для запросов с помощью `ORDER BY` и `LIMIT`. Смотрите сами `max_bytes_before_remerge_sort` установка. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -- В случае отсутствия `JOIN` (`LEFT`, `INNER`, …), `INNER JOIN` предполагается. [\#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) -- Квалифицированные звездочки корректно работают в запросах с `JOIN`. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3202) -- То `ODBC` механизм таблиц правильно выбирает метод для цитирования идентификаторов на диалекте SQL удаленной базы данных. [Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/3210) -- То `compile_expressions` настройка (JIT-компиляция выражений) включена по умолчанию. -- Исправлено поведение для одновременного удаления базы данных / таблицы, если она существует, и создания базы данных/таблицы, если она не существует. Ранее, а `CREATE DATABASE ... IF NOT EXISTS` запрос может вернуть сообщение об ошибке «File … already exists», и то `CREATE TABLE ... IF NOT EXISTS` и `DROP TABLE IF EXISTS` запросы могут вернуться `Table ... is creating or attaching right now`. [\#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) -- Как и в выражениях с постоянной правой половиной, они передаются на удаленный сервер при запросе из таблиц MySQL или ODBC. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -- Сравнения с постоянными выражениями в предложении WHERE передаются удаленному серверу при запросе из таблиц MySQL и ODBC. Раньше проходили только сравнения с константами. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -- Правильный расчет ширины строки в терминале для `Pretty` форматы, в том числе строки с иероглифами. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/3257). -- `ON CLUSTER` может быть указан для `ALTER UPDATE` запросы. -- Улучшенная производительность для считывания данных в `JSONEachRow` формат. [\#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) -- Добавлены синонимы для этого `LENGTH` и `CHARACTER_LENGTH` функции для обеспечения совместимости. То `CONCAT` функция больше не зависит от регистра. [\#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) -- Добавил тот `TIMESTAMP` синоним для этого `DateTime` тип. [\#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) -- В журналах сервера всегда есть место, зарезервированное для query\_id, даже если строка журнала не связана с запросом. Это упрощает синтаксический анализ текстовых журналов сервера с помощью сторонних инструментов. -- Потребление памяти запросом регистрируется, когда оно превышает следующий уровень целого числа гигабайт. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -- Добавлен режим совместимости для случая, когда клиентская библиотека, использующая собственный протокол, по ошибке отправляет меньше столбцов, чем сервер ожидает для запроса INSERT. Этот сценарий был возможен при использовании библиотеки clickhouse-cpp. Ранее этот сценарий приводил к сбою сервера. [\#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) -- В пользовательском выражении WHERE in `clickhouse-copier`, теперь вы можете использовать a `partition_key` псевдоним (для дополнительной фильтрации по исходному разделу таблицы). Это полезно, если схема секционирования изменяется во время копирования,но только незначительно. [\#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) -- Рабочий процесс компании `Kafka` движок был перемещен в фоновый пул потоков, чтобы автоматически снизить скорость считывания данных при высоких нагрузках. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -- Поддержка чтения `Tuple` и `Nested` значения таких структур, как `struct` в `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) -- Список доменов верхнего уровня для `firstSignificantSubdomain` функция теперь включает в себя домен `biz`. [деказеал](https://github.com/ClickHouse/ClickHouse/pull/3219) -- В конфигурации внешних справочников, `null_value` интерпретируется как значение типа данных по умолчанию. [\#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) -- Поддержка для the `intDiv` и `intDivOrZero` функции для `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) -- Поддержка для the `Date`, `DateTime`, `UUID`, и `Decimal` типы в качестве ключа для `sumMap` статистическая функция. [\#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) -- Поддержка для the `Decimal` тип данных во внешних справочниках. [\#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) -- Поддержка для the `Decimal` введите данные в поле `SummingMergeTree` таблицы. [\#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) -- Добавлены специализации для `UUID` в `if`. [\#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) -- Уменьшилось количество `open` и `close` системные вызовы для чтения `MergeTree table`. [\#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) -- A `TRUNCATE TABLE` запрос может быть выполнен на любой реплике (запрос передается в реплику лидера). [Кирилл Шваков](https://github.com/ClickHouse/ClickHouse/pull/3375) - -#### Устранение ошибок: {#bug-fixes-10} - -- Исправлена проблема с `Dictionary` таблицы для `range_hashed` словари. Эта ошибка произошла в версии 18.12.17. [\#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) -- Исправлена ошибка при загрузке `range_hashed` словари (сообщение `Unsupported type Nullable (...)`). Эта ошибка произошла в версии 18.12.17. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -- Исправлены ошибки в работе `pointInPolygon` функция обусловлена накоплением неточных вычислений для полигонов с большим количеством вершин, расположенных близко друг к другу. [\#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [\#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) -- Если после слияния частей данных контрольная сумма для результирующей части отличается от результата того же слияния в другой реплике, то результат слияния удаляется и часть данных загружается из другой реплики (это правильное поведение). Но после загрузки части данных она не могла быть добавлена в рабочий набор из-за ошибки, что часть уже существует (потому что часть данных была удалена с некоторой задержкой после слияния). Это привело к циклическим попыткам загрузить одни и те же данные. [\#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) -- Исправлено неправильное вычисление общего потребления памяти запросами (из-за неправильного вычисления `max_memory_usage_for_all_queries` установка сработала неправильно и то `MemoryTracking` метрика имела неверное значение). Эта ошибка произошла в версии 18.12.13. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) -- Исправлена функциональность программы `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` Эта ошибка произошла в версии 18.12.13. [\#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) -- Исправлена ненужная подготовка структур данных для `JOIN`s на сервере, который инициирует запрос, если `JOIN` выполняется только на удаленных серверах. [\#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) -- Исправлены ошибки в работе `Kafka` движок: взаимоблокировки после исключений при запуске чтения данных и блокировки по завершении работы [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -- Для `Kafka` таблицы, опциональные `schema` параметр не был передан (схема `Cap'n'Proto` формат). [Войтех Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) -- Если в ансамбле серверов ZooKeeper есть серверы, которые принимают соединение, но затем немедленно закрывают его вместо ответа на рукопожатие, ClickHouse выбирает для подключения другой сервер. Ранее это приводило к ошибке `Cannot read all data. Bytes read: 0. Bytes expected: 4.` и сервер не мог запуститься. [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) -- Если ансамбль серверов ZooKeeper содержит серверы, для которых DNS-запрос возвращает ошибку, эти серверы игнорируются. [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) -- Фиксированное преобразование типа между `Date` и `DateTime` при вставке данных в `VALUES` формат (если `input_format_values_interpret_expressions = 1`). Ранее преобразование производилось между числовым значением числа дней в Unix Epoch time и unix timestamp, что приводило к неожиданным результатам. [\#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) -- Исправлено преобразование типов между `Decimal` и целые числа. [\#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) -- Исправлены ошибки в работе `enable_optimize_predicate_expression` установка. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3231) -- Исправлена ошибка синтаксического анализа в формате CSV с числами с плавающей запятой, если используется нестандартный разделитель CSV, например `;` [\#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) -- Исправлена ошибка `arrayCumSumNonNegative` функция (она не накапливает отрицательных значений, если накопитель меньше нуля). [Алексей Студнев](https://github.com/ClickHouse/ClickHouse/pull/3163) -- Исправлено как `Merge` таблицы работают на верхней части `Distributed` таблицы при использовании `PREWHERE`. [\#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) -- Исправлены ошибки в системе `ALTER UPDATE` запрос. -- Исправлены ошибки в работе `odbc` табличная функция, появившаяся в версии 18.12. [\#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) -- Исправлена работа агрегатных функций с помощью `StateArray` комбинаторы. [\#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) -- Исправлена ошибка при делении `Decimal` значение по нулю. [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) -- Фиксированный вывод типов для использования операций `Decimal` и целочисленные аргументы. [\#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) -- Исправлена обработка выхода онлайн / оффлайн в `GROUP BY` на `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) -- То `log_query_threads` настройка (протоколирование информации о каждом потоке выполнения запроса) теперь вступает в силу только в том случае, если `log_queries` параметр (протоколирование информации о запросах) имеет значение 1. Поскольку `log_query_threads` опция включена по умолчанию, информация о потоках ранее регистрировалась, даже если ведение журнала запросов было отключено. [\#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) -- Исправлена ошибка в распределенной работе агрегатной функции квантилей (сообщение об ошибке `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) -- Исправлена проблема совместимости при работе с кластером серверов версии 18.12.17 и более старых серверов одновременно. Для распределенных запросов с ключами GROUP BY как фиксированной, так и не фиксированной длины при наличии большого объема данных для агрегирования возвращаемые данные не всегда были полностью агрегированы (две разные строки содержали одни и те же ключи агрегирования). [\#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) -- Исправлена обработка подстановок в `clickhouse-performance-test`, если запрос содержит только часть подстановок, объявленных в тесте. [\#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) -- Исправлена ошибка при использовании `FINAL` с `PREWHERE`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -- Исправлена ошибка при использовании `PREWHERE` над столбцами, которые были добавлены во время `ALTER`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -- Добавлена проверка на отсутствие `arrayJoin` для `DEFAULT` и `MATERIALIZED` выражения. Ранее, `arrayJoin` это привело к ошибке при вставке данных. [\#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) -- Добавлена проверка на отсутствие `arrayJoin` в `PREWHERE` пункт. Ранее это приводило к таким сообщениям, как `Size ... doesn't match` или `Unknown compression method` при выполнении запросов. [\#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) -- Исправлена ошибка segfault, которая могла возникнуть в редких случаях после оптимизации, которая заменила и цепочки из оценок равенства с соответствующим выражением IN. [люимин-бытданс](https://github.com/ClickHouse/ClickHouse/pull/3339) -- Незначительные исправления к `clickhouse-benchmark`: раньше информация о клиенте не отправлялась на сервер, теперь количество выполненных запросов вычисляется более точно при выключении и ограничении количества итераций. [\#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [\#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) - -#### Назад несовместимые изменения: {#backward-incompatible-changes-1} - -- Удалил то `allow_experimental_decimal_type` вариант. То `Decimal` тип данных доступен для использования по умолчанию. [\#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) - -## ClickHouse релиз 18.12 {#clickhouse-release-18-12} - -### ClickHouse релиз 18.12.17, 2018-09-16 {#clickhouse-release-18-12-17-2018-09-16} - -#### Новые средства: {#new-features-2} - -- `invalidate_query` (возможность задать запрос для проверки необходимости обновления внешнего словаря) реализована для `clickhouse` источник. [\#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) -- Добавлена возможность использования `UInt*`, `Int*`, и `DateTime` типы данных (вместе с `Date` типа) как `range_hashed` внешний ключ словаря, определяющий границы диапазонов. Сейчас `NULL` может использоваться для обозначения открытого диапазона. [Василий Немков](https://github.com/ClickHouse/ClickHouse/pull/3123) -- То `Decimal` тип теперь поддерживает `var*` и `stddev*` статистическая функция. [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -- То `Decimal` тип теперь поддерживает математические функции (`exp`, `sin` и так далее.) [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -- То `system.part_log` таблица теперь имеет `partition_id` колонка. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Устранение ошибок: {#bug-fixes-11} - -- `Merge` теперь работает правильно на `Distributed` таблицы. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3159) -- Исправлена несовместимость (ненужная зависимость от `glibc` версия), что сделало невозможным запуск ClickHouse на `Ubuntu Precise` и более старые версии. Несовместимость возникла в версии 18.12.13. [\#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) -- Исправлены ошибки в работе `enable_optimize_predicate_expression` установка. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3107) -- Исправлена незначительная проблема с обратной совместимостью, возникшая при работе с кластером реплик на версиях ранее 18.12.13 и одновременном создании новой реплики таблицы на сервере с более новой версией (показано в сообщении `Can not clone replica, because the ... updated to new ClickHouse version`, что вполне логично, но не должно произойти). [\#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) - -#### Назад несовместимые изменения: {#backward-incompatible-changes-2} - -- То `enable_optimize_predicate_expression` опция включена по умолчанию (что довольно оптимистично). Если возникают ошибки анализа запросов, связанные с поиском имен столбцов, установите `enable_optimize_predicate_expression` до 0. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3107) - -### ClickHouse релиз 18.12.14, 2018-09-13 {#clickhouse-release-18-12-14-2018-09-13} - -#### Новые средства: {#new-features-3} - -- Добавлена поддержка для `ALTER UPDATE` запросы. [\#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) -- Добавил тот `allow_ddl` опция, которая ограничивает доступ пользователя к DDL-запросам. [\#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) -- Добавил тот `min_merge_bytes_to_use_direct_io` вариант для `MergeTree` движки, которые позволяют установить пороговое значение для общего размера слияния (при превышении порогового значения файлы частей данных будут обрабатываться с помощью O\_DIRECT). [\#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) -- То `system.merges` системная таблица теперь содержит `partition_id` колонка. [\#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) - -#### Улучшения {#improvements-3} - -- Если часть данных остается неизменной во время мутации, она не загружается репликами. [\#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) -- Автозаполнение доступно для имен настроек при работе с ними `clickhouse-client`. [\#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) - -#### Устранение ошибок: {#bug-fixes-12} - -- Добавлена проверка размеров массивов, являющихся элементами `Nested` введите поля при вставке. [\#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) -- Исправлена ошибка обновления внешних словарей с помощью `ODBC` источник и `hashed` место хранения. Эта ошибка произошла в версии 18.12.13. -- Исправлена ошибка при создании временной таблицы из запроса с помощью `IN` состояние. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3098) -- Исправлена ошибка в агрегатных функциях для массивов, которые могут иметь `NULL` элементы. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/3097) - -### ClickHouse релиз 18.12.13, 2018-09-10 {#clickhouse-release-18-12-13-2018-09-10} - -#### Новые средства: {#new-features-4} - -- Добавил тот `DECIMAL(digits, scale)` тип данных (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). Чтобы включить его, используйте параметр `allow_experimental_decimal_type`. [\#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [\#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [\#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [\#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) -- Новый `WITH ROLLUP` модификатор для `GROUP BY` (альтернативный синтаксис: `GROUP BY ROLLUP(...)`). [\#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) -- В запросах с соединением символ звезды расширяется до списка столбцов во всех таблицах в соответствии со стандартом SQL. Вы можете восстановить старое поведение, установив `asterisk_left_columns_only` до 1 на уровне конфигурации пользователя. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2787) -- Добавлена поддержка соединения с табличными функциями. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2907) -- Автозаполнение осуществляется нажатием клавиши Tab в clickhouse-клиенте. [Сергей Щербин](https://github.com/ClickHouse/ClickHouse/pull/2447) -- Сочетание клавиш CTRL+C в clickhouse-клиент сбрасывает запрос, который был введен. [\#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) -- Добавил тот `join_default_strictness` уставка: `"`, `'any'`, `'all'`). Это позволяет вам не указывать `ANY` или `ALL` для `JOIN`. [\#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) -- Каждая строка журнала сервера, связанная с обработкой запросов, показывает идентификатор запроса. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Теперь вы можете получить журналы выполнения запросов в clickhouse-клиенте (используйте `send_logs_level` установочный). При распределенной обработке запросов журналы каскадируются со всех серверов. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- То `system.query_log` и `system.processes` (`SHOW PROCESSLIST`) таблицы теперь содержат информацию обо всех измененных настройках при выполнении запроса (вложенная структура запроса). `Settings` данные). Добавил тот `log_query_settings` установка. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- То `system.query_log` и `system.processes` теперь в таблицах отображается информация о количестве потоков, участвующих в выполнении запроса (см. `thread_numbers` колонка). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Добавлен `ProfileEvents` счетчики, которые измеряют время, затраченное на чтение и запись по сети и чтение и запись на диск, количество сетевых ошибок и время ожидания, когда пропускная способность сети ограничена. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Добавлен `ProfileEvents`счетчики, содержащие системные метрики из rusage (их можно использовать для получения информации об использовании ЦП в пользовательском пространстве и ядре, сбоях страниц и переключателях контекста), а также метрики taskstats (используйте их для получения информации о времени ожидания ввода-вывода, времени ожидания ЦП и объеме данных, считываемых и записываемых как с помощью кэша страниц, так и без него). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- То `ProfileEvents` счетчики применяются глобально и для каждого запроса, а также для каждого потока выполнения запроса, что позволяет детально профилировать потребление ресурсов запросом. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Добавил тот `system.query_thread_log` таблица, содержащая информацию о каждом потоке выполнения запроса. Добавил тот `log_query_threads` установка. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- То `system.metrics` и `system.events` таблицы теперь имеют встроенную документацию. [\#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) -- Добавил тот `arrayEnumerateDense` функция. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2975) -- Добавил тот `arrayCumSumNonNegative` и `arrayDifference` функции. [Алексей Студнев](https://github.com/ClickHouse/ClickHouse/pull/2942) -- Добавил тот `retention` статистическая функция. [Вашим Ли](https://github.com/ClickHouse/ClickHouse/pull/2887) -- Теперь вы можете добавить (объединить) состояния агрегатных функций с помощью оператора плюс и умножить состояния агрегатных функций на неотрицательную константу. [\#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [\#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) -- Таблицы в семействе MergeTree теперь имеют виртуальный столбец `_partition_id`. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Экспериментальная возможность: {#experimental-features-1} - -- Добавил тот `LowCardinality(T)` тип данных. Этот тип данных автоматически создает локальный словарь значений и позволяет обрабатывать данные без распаковки словаря. [\#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) -- Добавлен кэш JIT-скомпилированных функций и счетчик количества использований перед компиляцией. Чтобы выполнить JIT-компиляцию выражений, включите `compile_expressions` установка. [\#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [\#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) - -#### Улучшения: {#improvements-4} - -- Исправлена проблема с неограниченным накоплением журнала репликации при наличии брошенных реплик. Добавлен эффективный режим восстановления для реплик с длительным запаздыванием. -- Улучшенная производительность `GROUP BY` с несколькими полями агрегации, когда одно из них является строковым, а другие-фиксированной длины. -- Улучшенная производительность при использовании `PREWHERE` и с неявной передачей выражений в `PREWHERE`. -- Улучшена производительность синтаксического анализа для текстовых форматов (`CSV`, `TSV`). [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2977) [\#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) -- Улучшена производительность чтения строк и массивов в двоичных форматах. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2955) -- Повышенная производительность и снижение потребления памяти для запросов к `system.tables` и `system.columns` при наличии очень большого количества таблиц на одном сервере. [\#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) -- Исправлена проблема производительности в случае большого потока запросов, приводящих к ошибке (the `_dl_addr` функция видна в `perf top`, но сервер не использует много процессора). [\#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) -- Условия бросаются в поле зрения (когда `enable_optimize_predicate_expression` включен). [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2907) -- Улучшения в функциональности для `UUID` тип данных. [\#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [\#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) -- То `UUID` тип данных поддерживается в словарях-Alchemist. [\#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) -- То `visitParamExtractRaw` функция корректно работает с вложенными структурами. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2974) -- Когда `input_format_skip_unknown_fields` настройка включена, поля объекта в поле `JSONEachRow` формат пропущен правильно. [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) -- Для `CASE` выражение с условиями теперь можно опустить `ELSE`, что эквивалентно `ELSE NULL`. [\#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) -- Тайм-аут операции теперь можно настроить при работе с ZooKeeper. [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) -- Вы можете указать смещение для `LIMIT n, m` как `LIMIT n OFFSET m`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -- Вы можете использовать `SELECT TOP n` синтаксис как альтернатива для `LIMIT`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -- Увеличен размер очереди для записи в системные таблицы, так что `SystemLog parameter queue is full` ошибки случаются не так часто. -- То `windowFunnel` агрегатная функция теперь поддерживает события, удовлетворяющие нескольким условиям. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2801) -- Повторяющиеся столбцы могут быть использованы в a `USING` пунктом `JOIN`. [\#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) -- `Pretty` форматы теперь имеют ограничение на выравнивание столбцов по ширине. Используйте `output_format_pretty_max_column_pad_width` установка. Если значение больше, то оно все равно будет отображаться полностью, но другие ячейки таблицы не будут слишком широкими. [\#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) -- То `odbc` функция таблицы теперь позволяет указать имя базы данных / схемы. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2885) -- Добавлена возможность использовать имя Пользователя, указанное в `clickhouse-client` конфигурационный файл. [Владимир Козбин](https://github.com/ClickHouse/ClickHouse/pull/2909) -- То `ZooKeeperExceptions` счетчик был разделен на три счетчика: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, и `ZooKeeperOtherExceptions`. -- `ALTER DELETE` запросы работают для материализованных представлений. -- Добавлена рандомизация при периодическом запуске потока очистки для `ReplicatedMergeTree` таблицы во избежание периодических скачков нагрузки при наличии очень большого количества `ReplicatedMergeTree` таблицы. -- Поддержка `ATTACH TABLE ... ON CLUSTER` запросы. [\#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) - -#### Устранение ошибок: {#bug-fixes-13} - -- Исправлена проблема с `Dictionary` таблицы (бросает то `Size of offsets doesn't match size of column` или `Unknown compression method` исключение). Эта ошибка появилась в версии 18.10.3. [\#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) -- Исправлена ошибка при слиянии `CollapsingMergeTree` таблицы, если одна из частей данных пуста (эти части формируются во время слияния или `ALTER DELETE` если все данные были удалены), а также `vertical` для слияния был использован алгоритм. [\#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) -- Исправлено состояние гонки во время `DROP` или `TRUNCATE` для `Memory` столы с одновременным `SELECT`, что может привести к сбоям сервера. Эта ошибка появилась в версии 1.1.54388. [\#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) -- Исправлена возможность потери данных при вставке в систему `Replicated` таблицы, если `Session is expired` возвращается ошибка (потеря данных может быть обнаружена с помощью `ReplicatedDataLoss` метрический). Эта ошибка произошла в версии 1.1.54378. [\#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [\#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [\#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) -- Исправлен сегфолт при `JOIN ... ON`. [\#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) -- Исправлена ошибка поиска имен столбцов, когда `WHERE` выражение полностью состоит из квалифицированного имени столбца, например `WHERE table.column`. [\#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) -- Исправлена ошибка «Not found column» ошибка, возникшая при выполнении распределенных запросов, если с удаленного сервера запрашивается один столбец, состоящий из выражения IN с вложенным запросом. [\#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) -- Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` ошибка, возникшая для распределенных запросов, если один из сегментов является локальным, а другой-нет, и оптимизация перемещения в `PREWHERE` это срабатывает. [\#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [\#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [\#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [\#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [\#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [\#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [\#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) -- Исправлена ошибка `pointInPolygon` функция для некоторых случаев невыпуклых многоугольников. [\#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) -- Исправлен неверный результат при сравнении `nan` с целыми числами. [\#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) -- Исправлена ошибка в системе `zlib-ng` библиотека, которая в редких случаях может привести к segfault. [\#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) -- Исправлена утечка памяти при вставке в таблицу с помощью `AggregateFunction` столбцы, если состояние агрегатной функции не простое (выделяет память отдельно), и если один запрос на вставку приводит к нескольким небольшим блокам. [\#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) -- Исправлено состояние гонки при создании и удалении одного и того же объекта `Buffer` или `MergeTree` стол одновременно. -- Исправлена возможность segfault при сравнении кортежей, составленных из определенных нетривиальных типов, таких как кортежи. [\#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) -- Исправлена возможность возникновения segfault при запуске некоторых `ON CLUSTER` запросы. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2960) -- Исправлена ошибка в системе `arrayDistinct` функция для `Nullable` элемент массива. [\#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [\#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) -- То `enable_optimize_predicate_expression` опция теперь корректно поддерживает случаи с `SELECT *`. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2929) -- Исправлена ошибка segfault при повторной инициализации сеанса ZooKeeper. [\#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) -- Исправлена блокировка при работе с зоопарка. -- Исправлен неверный код для добавления вложенных структур данных в a `SummingMergeTree`. -- При выделении памяти для состояний агрегатных функций корректно учитывается выравнивание, что позволяет использовать операции, требующие выравнивания при реализации состояний агрегатных функций. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) - -#### Исправление безопасности: {#security-fix} - -- Безопасное использование источников данных ODBC. Взаимодействие с драйверами ODBC использует отдельный интерфейс `clickhouse-odbc-bridge` процесс. Ошибки в сторонних драйверах ODBC больше не вызывают проблем со стабильностью сервера или уязвимостями. [\#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [\#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [\#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [\#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [\#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) -- Исправлена неправильная проверка пути к файлу в системе `catBoostPool` табличная функция. [\#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) -- Содержание системных таблиц (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, и `replication_queue`) фильтруются в соответствии с настроенным пользователем доступом к базам данных (`allow_databases`). [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2856) - -#### Назад несовместимые изменения: {#backward-incompatible-changes-3} - -- В запросах с соединением символ звезды расширяется до списка столбцов во всех таблицах в соответствии со стандартом SQL. Вы можете восстановить старое поведение, установив `asterisk_left_columns_only` до 1 на уровне конфигурации пользователя. - -#### Изменения в сборке: {#build-changes-2} - -- Большинство интеграционных тестов теперь можно запускать с помощью commit. -- Проверка стиля кода также может выполняться с помощью commit. -- То `memcpy` реализация выбрана правильно при построении на CentOS7/Fedora. [Этьен Шампетье](https://github.com/ClickHouse/ClickHouse/pull/2912) -- При использовании clang для сборки, некоторые предупреждения от `-Weverything` были добавлены, в дополнение к обычным `-Wall-Wextra -Werror`. [\#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) -- Отладка сборки использует следующие методы: `jemalloc` вариант отладки. -- Интерфейс библиотеки для взаимодействия с ZooKeeper объявлен абстрактным. [\#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) - -## ClickHouse релиз 18.10 {#clickhouse-release-18-10} - -### ClickHouse релиз 18.10.3, 2018-08-13 {#clickhouse-release-18-10-3-2018-08-13} - -#### Новые средства: {#new-features-5} - -- HTTPS можно использовать для репликации. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) -- Добавлены функции `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, и `murmurHash3_128` в дополнение к существующим `murmurHash2_32`. [\#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) -- Поддержка типы, допускающие значения NULL в драйвере ODBC ClickHouse (`ODBCDriver2` выходной формат). [\#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) -- Поддержка `UUID` в ключевых колонках. - -#### Улучшения: {#improvements-5} - -- Кластеры могут быть удалены без перезагрузки сервера, когда они удаляются из конфигурационных файлов. [\#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) -- Внешние словари могут быть удалены без перезагрузки сервера, когда они удаляются из конфигурационных файлов. [\#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) -- Добавлен `SETTINGS` поддержка для the `Kafka` настольный двигатель. [Александр Маршалов](https://github.com/ClickHouse/ClickHouse/pull/2781) -- Улучшения для компании `UUID` тип данных (еще не полный). [\#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) -- Поддержка для пустых частей после слияния в `SummingMergeTree`, `CollapsingMergeTree` и `VersionedCollapsingMergeTree` двигатели. [\#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) -- Старые записи завершенных мутаций удаляются (`ALTER DELETE`). [\#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) -- Добавил тот `system.merge_tree_settings` стол. [Кирилл Шваков](https://github.com/ClickHouse/ClickHouse/pull/2841) -- То `system.tables` таблица теперь имеет столбцы зависимостей: `dependencies_database` и `dependencies_table`. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2851) -- Добавил тот `max_partition_size_to_drop` вариант конфигурации. [\#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) -- Добавил тот `output_format_json_escape_forward_slashes` вариант. [Александр Бочаров](https://github.com/ClickHouse/ClickHouse/pull/2812) -- Добавил тот `max_fetch_partition_retries_count` установка. [\#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) -- Добавил тот `prefer_localhost_replica` настройка для отключения предпочтения для локальной реплики и перехода к локальной реплике без межпроцессного взаимодействия. [\#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) -- То `quantileExact` возвращает агрегатная функция `nan` в случае агрегации на пустом месте `Float32` или `Float64` набор. [Вашим Ли](https://github.com/ClickHouse/ClickHouse/pull/2855) - -#### Устранение ошибок: {#bug-fixes-14} - -- Удалено ненужное экранирование параметров строки подключения для ODBC, что сделало невозможным установление соединения. Эта ошибка произошла в версии 18.6.0. -- Исправлена логика обработки `REPLACE PARTITION` команды в очереди репликации. Если их будет двое `REPLACE` команды для одного и того же раздела, неправильная логика может привести к тому, что один из них останется в очереди репликации и не будет выполнен. [\#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) -- Исправлена ошибка слияния, когда все части данных были пусты (части, которые были сформированы из слияния или из `ALTER DELETE` если все данные были удалены). Эта ошибка появилась в версии 18.1.0. [\#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) -- Исправлена ошибка при одновременном использовании `Set` или `Join`. [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2823) -- Исправлена ошибка `Block structure mismatch in UNION stream: different number of columns` ошибка, которая произошла для `UNION ALL` запросы внутри подзапроса, если один из `SELECT` запросы содержат повторяющиеся имена столбцов. [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2094) -- Исправлена утечка памяти, если при подключении к серверу MySQL возникало исключение. -- Исправлен неверный код ответа clickhouse-клиента в случае ошибки запроса. -- Исправлено некорректное поведение материализованных представлений, содержащих отличия. [\#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) - -#### Назад несовместимые изменения {#backward-incompatible-changes-4} - -- Удалена поддержка запросов CHECK TABLE для распределенных таблиц. - -#### Изменения в сборке: {#build-changes-3} - -- Распределитель был заменен: `jemalloc` теперь используется вместо `tcmalloc`. В некоторых сценариях это увеличивает скорость до 20%. Однако есть запросы, которые замедлились до 20%. Потребление памяти было уменьшено приблизительно на 10% в некоторых сценариях, с улучшенной стабильностью. При высокой конкурентной нагрузке использование процессора в пользовательском пространстве и в системе показывает лишь небольшое увеличение. [\#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) -- Использование libressl из подмодуля. [\#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [\#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) -- Использование unixodbc из подмодуля. [\#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) -- Использование mariadb-connector-c из подмодуля. [\#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) -- Добавлены функциональные тестовые файлы в репозиторий, зависящие от доступности тестовых данных (пока без самих тестовых данных). - -## ClickHouse релиз 18.6 {#clickhouse-release-18-6} - -### ClickHouse релиз 18.6.0, 2018-08-02 {#clickhouse-release-18-6-0-2018-08-02} - -#### Новые средства: {#new-features-6} - -- Добавлена поддержка выражений для соединения на синтаксис: - `JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` - Выражение должно быть цепочкой равенств, Соединенных оператором и. Каждая сторона равенства может быть произвольным выражением над столбцами одной из таблиц. Поддерживается использование полных имен столбцов (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) для правильного стола. [\#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) -- HTTPS может быть включен для репликации. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) - -#### Улучшения: {#improvements-6} - -- Сервер передает клиенту компонент исправлений своей версии. Данные о компоненте версии патча находятся в `system.processes` и `query_log`. [\#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) - -## ClickHouse релиз 18.5 {#clickhouse-release-18-5} - -### ClickHouse релиз 18.5.1, 2018-07-31 {#clickhouse-release-18-5-1-2018-07-31} - -#### Новые средства: {#new-features-7} - -- Добавлена хэш-функция `murmurHash2_32` [\#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). - -#### Улучшения: {#improvements-7} - -- Теперь вы можете использовать `from_env` [\#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) атрибут для установки значений в конфигурационных файлах из переменных окружения. -- Добавлены версии с нечувствительностью к регистру символов `coalesce`, `ifNull`, и `nullIf functions` [\#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). - -#### Устранение ошибок: {#bug-fixes-15} - -- Исправлена возможная ошибка при запуске реплики [\#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). - -## ClickHouse релиз 18.4 {#clickhouse-release-18-4} - -### ClickHouse релиз 18.4.0, 2018-07-28 {#clickhouse-release-18-4-0-2018-07-28} - -#### Новые средства: {#new-features-8} - -- Добавлены системные таблицы: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [\#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). -- Добавлена возможность использовать табличную функцию вместо таблицы в качестве аргумента a `remote` или `cluster table function` [\#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). -- Поддержка `HTTP Basic` аутентификация в протоколе репликации [\#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). -- То `has` функция теперь позволяет искать числовое значение в массиве `Enum` ценности [Максим Хрисанфов](https://github.com/ClickHouse/ClickHouse/pull/2699). -- Поддержка добавления произвольных разделителей сообщений при чтении из `Kafka` [Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2701). - -#### Улучшения: {#improvements-8} - -- То `ALTER TABLE t DELETE WHERE` запрос не перезаписывает части данных, которые не были затронуты условием WHERE [\#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). -- То `use_minimalistic_checksums_in_zookeeper` вариант для `ReplicatedMergeTree` таблицы включены по умолчанию. Этот параметр был добавлен в версии 1.1.54378, 2018-04-16. Версии, которые старше 1.1.54378, больше не могут быть установлены. -- Поддержка для бега `KILL` и `OPTIMIZE` запросы, которые определяют `ON CLUSTER` [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2689). - -#### Устранение ошибок: {#bug-fixes-16} - -- Исправлена ошибка `Column ... is not under an aggregate function and not in GROUP BY` для агрегатирования с выражением. Эта ошибка появилась в версии 18.1.0. ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) -- Исправлена ошибка в системе `windowFunnel aggregate function` [Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2735). -- Исправлена ошибка в системе `anyHeavy` статистическая функция ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) -- Исправлен сбой сервера при использовании `countArray()` статистическая функция. - -#### Назад несовместимые изменения: {#backward-incompatible-changes-5} - -- Параметры для `Kafka` двигатель был изменен с `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` к `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. Если ваши таблицы используют `kafka_schema` или `kafka_num_consumers` параметры, вы должны вручную редактировать файлы метаданных `path/metadata/database/table.sql` и добавить `kafka_row_delimiter` параметр с `''` ценность. - -## ClickHouse релиз 18.1 {#clickhouse-release-18-1} - -### ClickHouse релиз 18.1.0, 2018-07-23 {#clickhouse-release-18-1-0-2018-07-23} - -#### Новые средства: {#new-features-9} - -- Поддержка для the `ALTER TABLE t DELETE WHERE` запрос на нереплицируемые MergeTree таблицы ([\#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). -- Поддержка произвольных типов для `uniq*` семейство агрегатных функций ([\#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). -- Поддержка произвольных типов в операторах сравнения ([\#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). -- То `users.xml` файл позволяет установить маску подсети в формате `10.0.0.1/255.255.255.0`. Это необходимо для использования масок для сетей IPv6 с нулями посередине ([\#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). -- Добавил тот `arrayDistinct` функция ([\#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). -- Движок SummingMergeTree теперь может работать со столбцами типа AggregateFunction ([Константин Сергеевич Пан](https://github.com/ClickHouse/ClickHouse/pull/2566)). - -#### Улучшения: {#improvements-9} - -- Изменена схема нумерации для версий выпуска. Теперь первая часть содержит год выпуска (A. D., Московский часовой пояс, минус 2000), вторая часть содержит номер для крупных изменений (увеличивается для большинства релизов), а третья часть-это патч-версия. Релизы по-прежнему имеют обратную совместимость, если в списке изменений не указано иное. -- Более быстрое преобразование чисел с плавающей запятой в строку ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2664)). -- Если некоторые строки были пропущены во время вставки из-за ошибок синтаксического анализа (это возможно с помощью `input_allow_errors_num` и `input_allow_errors_ratio` настройки включены), количество пропущенных строк теперь записывается в журнал сервера ([Леонардо Чекки](https://github.com/ClickHouse/ClickHouse/pull/2669)). - -#### Устранение ошибок: {#bug-fixes-17} - -- Исправлена команда усечения для временных таблиц ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2624)). -- Исправлена редкая тупиковая ситуация в клиентской библиотеке ZooKeeper, возникшая при возникновении сетевой ошибки при чтении ответа ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). -- Исправлена ошибка во время бросания на типы, допускающие значение null ([\#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). -- Исправлен неверный результат работы системы `maxIntersection()` функция, когда границы интервалов совпадают ([Майкл Фурмур](https://github.com/ClickHouse/ClickHouse/pull/2657)). -- Исправлено некорректное преобразование цепочки выражений OR в аргумент функции ([chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). -- Исправлено снижение производительности для запросов, содержащих `IN (subquery)` выражения внутри другого подзапроса ([\#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). -- Исправлена несовместимость серверов с различными версиями в распределенных запросах, использующих a `CAST` функция, которая не написана прописными буквами ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). -- Добавлено отсутствующее цитирование идентификаторов для запросов к внешней СУБД ([\#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). - -#### Назад несовместимые изменения: {#backward-incompatible-changes-6} - -- Преобразование строки, содержащей нулевое число, в DateTime не работает. Пример: `SELECT toDateTime('0')`. Это также является причиной того, что `DateTime DEFAULT '0'` не работает в таблицах, а также `0` в словарях. Решение: заменить `0` с `0000-00-00 00:00:00`. - -## ClickHouse release 1.1 {#clickhouse-release-1-1} - -### ClickHouse релиз 1.1.54394, 2018-07-12 {#clickhouse-release-1-1-54394-2018-07-12} - -#### Новые средства: {#new-features-10} - -- Добавил тот `histogram` статистическая функция ([Михаил Сурин](https://github.com/ClickHouse/ClickHouse/pull/2521)). -- Сейчас `OPTIMIZE TABLE ... FINAL` может использоваться без указания разделов для `ReplicatedMergeTree` ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2600)). - -#### Устранение ошибок: {#bug-fixes-18} - -- Исправлена проблема с очень малым таймаутом для сокетов (одна секунда) для чтения и записи при отправке и загрузке реплицированных данных, что делало невозможной загрузку больших частей при наличии нагрузки на сеть или диск (это приводило к циклическим попыткам загрузки частей). Эта ошибка произошла в версии 1.1.54388. -- Исправлены проблемы при использовании chroot в ZooKeeper, если вы вставили дубликаты блоков данных в таблицу. -- То `has` функция теперь корректно работает для массива с нулевыми элементами ([\#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). -- То `system.tables` таблица теперь работает корректно при использовании в распределенных запросах. То `metadata_modification_time` и `engine_full` столбцы теперь не являются виртуальными. Исправлена ошибка, возникавшая при запросе из таблицы только этих столбцов. -- Исправлено как пустой `TinyLog` таблица работает после вставки пустого блока данных ([\#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). -- То `system.zookeeper` таблица работает, если значение узла в ZooKeeper равно NULL. - -### ClickHouse релиз 1.1.54390, 2018-07-06 {#clickhouse-release-1-1-54390-2018-07-06} - -#### Новые средства: {#new-features-11} - -- Запросы могут быть отправлены в `multipart/form-data` формат (в виде `query` поле), что полезно, если внешние данные также отправляются для обработки запросов ([Ольга Хвостикова](https://github.com/ClickHouse/ClickHouse/pull/2490)). -- Добавлена возможность включения или отключения обработки одинарных или двойных кавычек при чтении данных в формате CSV. Вы можете настроить это в разделе `format_csv_allow_single_quotes` и `format_csv_allow_double_quotes` настройки ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2574)). -- Сейчас `OPTIMIZE TABLE ... FINAL` может использоваться без указания раздела для нереплицированных вариантов `MergeTree` ([Амос Птица](https://github.com/ClickHouse/ClickHouse/pull/2599)). - -#### Улучшения: {#improvements-10} - -- Улучшенная производительность, снижение потребления памяти и корректное отслеживание потребления памяти с использованием оператора IN, когда можно использовать табличный индекс ([\#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). -- Удалена избыточная проверка контрольных сумм при добавлении части данных. Это важно при наличии большого количества реплик, так как в этих случаях общее число проверок было равно N^2. -- Добавлена поддержка для `Array(Tuple(...))` аргументы в пользу этого `arrayEnumerateUniq` функция ([\#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). -- Добавлен `Nullable` поддержка для the `runningDifference` функция ([\#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). -- Улучшена производительность анализа запросов при наличии очень большого количества выражений ([\#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). -- Более быстрый выбор частей данных для слияния в `ReplicatedMergeTree` таблицы. Более быстрое восстановление сеанса смотрителя зоопарка ([\#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). -- То `format_version.txt` файл для `MergeTree` таблицы создаются заново, если они отсутствуют, что имеет смысл, если ClickHouse запускается после копирования структуры каталогов без файлов ([Киприан Хакман](https://github.com/ClickHouse/ClickHouse/pull/2593)). - -#### Устранение ошибок: {#bug-fixes-19} - -- Исправлена ошибка при работе с ZooKeeper, которая могла сделать невозможным восстановление сеанса и состояний таблиц только для чтения перед перезапуском сервера. -- Исправлена ошибка при работе с ZooKeeper, которая могла привести к тому, что старые узлы не удалялись, если сеанс прерывался. -- Исправлена ошибка в системе `quantileTDigest` функции для Аргументов с плавающей точкой (эта ошибка была введена в версии 1.1.54388) ([Михаил Сурин](https://github.com/ClickHouse/ClickHouse/pull/2553)). -- Исправлена ошибка в индексе для таблиц MergeTree, если столбец первичного ключа находится внутри функции преобразования типов между знаковыми и беззнаковыми целыми числами одинакового размера ([\#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). -- Исправлена обработка выхода онлайн / оффлайн если `macros` используются, но их нет в файле конфигурации ([\#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). -- Исправлено переключение на базу данных по умолчанию при повторном подключении клиента ([\#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). -- Исправлена ошибка, возникшая при появлении `use_index_for_in_with_subqueries` настройка была отключена. - -#### Исправление безопасности: {#security-fix-1} - -- Отправка файлов больше не возможна при подключении к MySQL (`LOAD DATA LOCAL INFILE`). - -### ClickHouse релиз 1.1.54388, 2018-06-28 {#clickhouse-release-1-1-54388-2018-06-28} - -#### Новые средства: {#new-features-12} - -- Поддержка для the `ALTER TABLE t DELETE WHERE` запрос для реплицированных таблиц. Добавил тот `system.mutations` таблица для отслеживания хода выполнения запросов этого типа. -- Поддержка для the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` запрос для таблиц \* MergeTree. -- Поддержка для the `TRUNCATE TABLE` запрос ([Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2260)) -- Несколько новый `SYSTEM` запросы к реплицируемым таблицам (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). -- Добавлена возможность записи в таблицу с помощью движка MySQL и соответствующей табличной функции ([Санди-ли](https://github.com/ClickHouse/ClickHouse/pull/2294)). -- Добавил тот `url()` функция таблицы и `URL` настольный двигатель ([Александр Сапин](https://github.com/ClickHouse/ClickHouse/pull/2501)). -- Добавил тот `windowFunnel` статистическая функция ([Санди-ли](https://github.com/ClickHouse/ClickHouse/pull/2352)). -- Новый `startsWith` и `endsWith` функции для строк ([Вадим Плахтинский](https://github.com/ClickHouse/ClickHouse/pull/2429)). -- То `numbers()` функция таблицы теперь позволяет указать смещение ([Зимний Чжан](https://github.com/ClickHouse/ClickHouse/pull/2535)). -- Пароль к нему `clickhouse-client` может быть введен в интерактивном режиме. -- Теперь журналы сервера можно отправлять в системный журнал ([Александр Крашенинников](https://github.com/ClickHouse/ClickHouse/pull/2459)). -- Поддержка входа в словари с общим источником библиотеки ([Александр Сапин](https://github.com/ClickHouse/ClickHouse/pull/2472)). -- Поддержка пользовательских разделителей CSV ([Иван Жуков](https://github.com/ClickHouse/ClickHouse/pull/2263)) -- Добавил тот `date_time_input_format` установка. Если вы переключите этот параметр на `'best_effort'`, Значения DateTime будут считываться в широком диапазоне форматов. -- Добавил тот `clickhouse-obfuscator` утилита для запутывания данных. Пример использования: публикация данных, используемых в тестах производительности. - -#### Экспериментальная возможность: {#experimental-features-2} - -- Добавлена возможность расчета `and` аргументы только там, где они нужны ([Анастасия Царькова](https://github.com/ClickHouse/ClickHouse/pull/2272)) -- JIT компиляция в машинный код теперь доступна для некоторых выражений ([Плес](https://github.com/ClickHouse/ClickHouse/pull/2277)). - -#### Устранение ошибок: {#bug-fixes-20} - -- Дубликаты больше не появляются для запроса с `DISTINCT` и `ORDER BY`. -- Запросы с помощью `ARRAY JOIN` и `arrayFilter` больше не возвращайте неверный результат. -- Исправлена ошибка при чтении столбца массива из вложенной структуры ([\#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). -- Исправлена ошибка при анализе запросов с предложением HAVING, например `HAVING tuple IN (...)`. -- Исправлена ошибка при анализе запросов с рекурсивными псевдонимами. -- Исправлена ошибка при чтении из ReplacingMergeTree с условием в PREWHERE, которое фильтрует все строки ([\#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). -- Настройки профиля пользователя не применялись при использовании сеансов в интерфейсе HTTP. -- Исправлено применение настроек из параметров командной строки в clickhouse-local. -- Клиентская библиотека ZooKeeper теперь использует тайм-аут сеанса, полученный от сервера. -- Исправлена ошибка в клиентской библиотеке ZooKeeper, когда клиент ждал ответа сервера дольше, чем тайм-аут. -- Исправлена обрезка деталей для запросов с условиями по ключевым столбцам разделов ([\#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). -- Слияния теперь возможны после `CLEAR COLUMN IN PARTITION` ([\#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). -- Исправлено отображение типов в функции таблицы ODBC ([Санди-ли](https://github.com/ClickHouse/ClickHouse/pull/2268)). -- Сравнение типов было исправлено для `DateTime` с часовым поясом и без него ([Александр Бочаров](https://github.com/ClickHouse/ClickHouse/pull/2400)). -- Исправлен синтаксический разбор и форматирование текста `CAST` оператор. -- Исправлена вставка в материализованный вид для механизма распределенных таблиц ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). -- Исправлено состояние гонки при записи данных из `Kafka` двигатель к материализованным представлениям ([Янкуань Лю](https://github.com/ClickHouse/ClickHouse/pull/2448)). -- Исправлена ошибка SSRF в функции таблицы remote (). -- Исправлено поведение выхода из системы `clickhouse-client` в многострочном режиме ([\#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). - -#### Улучшения: {#improvements-11} - -- Фоновые задачи в реплицированных таблицах теперь выполняются в пуле потоков, а не в отдельных потоках ([Сильвиу Развивается](https://github.com/ClickHouse/ClickHouse/pull/1722)). -- Улучшена производительность сжатия LZ4. -- Более быстрый анализ запросов с большим количеством соединений и подзапросов. -- Кэш DNS теперь обновляется автоматически, когда возникает слишком много сетевых ошибок. -- Вставка таблицы больше не происходит, если вставка в один из материализованных видов невозможна из-за слишком большого количества деталей. -- Исправлено несоответствие в счетчиках событий `Query`, `SelectQuery`, и `InsertQuery`. -- Такие выражения, как `tuple IN (SELECT tuple)` разрешены, если типы кортежей совпадают. -- Сервер с реплицированными таблицами может запуститься, даже если вы еще не настроили ZooKeeper. -- При расчете количества доступных ядер ЦП теперь учитываются ограничения на контрольные группы ([Атри Шарма](https://github.com/ClickHouse/ClickHouse/pull/2325)). -- Добавлено меню для конфигурации каталогов в файл systemd конфиг ([Михаил Ширяев](https://github.com/ClickHouse/ClickHouse/pull/2421)). - -#### Изменения в сборке: {#build-changes-4} - -- Компилятор gcc8 можно использовать для сборки. -- Добавлена возможность построения llvm из подмодуля. -- Версия библиотеки librdkafka была обновлена с v0.11.4. -- Добавлена возможность использования системной библиотеки libcpuid. Версия библиотеки была обновлена до версии 0.4.0. -- Исправлена сборка с использованием библиотеки vectorclass ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). -- Cmake теперь генерирует файлы для ninja по умолчанию (например, при использовании `-G Ninja`). -- Добавлена возможность использовать библиотеку libtinfo вместо libtermcap ([Георгий Кондратьев](https://github.com/ClickHouse/ClickHouse/pull/2519)). -- Исправлен конфликт заголовочных файлов в Fedora Rawhide ([\#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). - -#### Назад несовместимые изменения: {#backward-incompatible-changes-7} - -- Удален побег в `Vertical` и `Pretty*` форматы и удалил `VerticalRaw` формат. -- Если серверы с версией 1.1.54388 (или более поздней) и серверы с более старой версией используются одновременно в распределенном запросе, то запрос имеет следующее значение: `cast(x, 'Type')` выражение лица без лица `AS` ключевое слово и не имеет этого слова `cast` в верхнем регистре исключение будет выдано с сообщением типа `Not found column cast(0, 'UInt8') in block`. Решение: обновите сервер на всем кластере. - -### ClickHouse релиз 1.1.54385, 2018-06-01 {#clickhouse-release-1-1-54385-2018-06-01} - -#### Устранение ошибок: {#bug-fixes-21} - -- Исправлена ошибка,которая в некоторых случаях приводила к блокировке операций ZooKeeper. - -### ClickHouse релиз 1.1.54383, 2018-05-22 {#clickhouse-release-1-1-54383-2018-05-22} - -#### Устранение ошибок: {#bug-fixes-22} - -- Исправлено замедление очереди репликации, если таблица содержит много реплик. - -### ClickHouse релиз 1.1.54381, 2018-05-14 {#clickhouse-release-1-1-54381-2018-05-14} - -#### Устранение ошибок: {#bug-fixes-23} - -- Исправлена утечка узлов в ZooKeeper, когда ClickHouse теряет соединение с сервером ZooKeeper. - -### ClickHouse релиз 1.1.54380, 2018-04-21 {#clickhouse-release-1-1-54380-2018-04-21} - -#### Новые средства: {#new-features-13} - -- Добавлена функция таблицы `file(path, format, structure)`. Пример чтения байтов из `/dev/urandom`: ``` ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10" ```. - -#### Улучшения: {#improvements-12} - -- Вложенные запросы могут быть обернуты в `()` скобки для повышения удобочитаемости запросов. Например: `(SELECT 1) UNION ALL (SELECT 1)`. -- Простой `SELECT` запросы от компании `system.processes` таблица не входит в состав `max_concurrent_queries` предел. - -#### Устранение ошибок: {#bug-fixes-24} - -- Исправлено некорректное поведение устройства `IN` оператор при выборе из `MATERIALIZED VIEW`. -- Исправлена некорректная фильтрация по индексу раздела в выражениях типа `partition_key_column IN (...)`. -- Исправлена невозможность выполнения `OPTIMIZE` запрос на реплику без лидера, если `REANAME` было исполнено на столе. -- Исправлена ошибка авторизации при выполнении `OPTIMIZE` или `ALTER` запросы к реплике, не являющейся лидером. -- Исправлено зависание `KILL QUERY`. -- Исправлена ошибка в клиентской библиотеке ZooKeeper, которая приводила к потере часов, замораживанию распределенной очереди DDL и замедлению работы очереди репликации, если она была непустой. `chroot` префикс используется в конфигурации ZooKeeper. - -#### Назад несовместимые изменения: {#backward-incompatible-changes-8} - -- Удалена поддержка таких выражений как `(a, b) IN (SELECT (a, b))` (вы можете использовать эквивалентное выражение `(a, b) IN (SELECT a, b)`). В предыдущих выпусках эти выражения приводили к неопределенным `WHERE` фильтрация или вызванные ошибки. - -### ClickHouse релиз 1.1.54378, 2018-04-16 {#clickhouse-release-1-1-54378-2018-04-16} - -#### Новые средства: {#new-features-14} - -- Уровень ведения журнала можно изменить без перезагрузки сервера. -- Добавил тот `SHOW CREATE DATABASE` запрос. -- То `query_id` может быть передан в `clickhouse-client` (локтевой зал). -- Новая настройка: `max_network_bandwidth_for_all_users`. -- Добавлена поддержка для `ALTER TABLE ... PARTITION ...` для `MATERIALIZED VIEW`. -- Добавлена информация о размере частей данных в несжатом виде в системной таблице. -- Поддержка межсерверного шифрования распределенных таблиц (`1` в конфигурации реплики in ``). -- Конфигурация уровня таблицы для `ReplicatedMergeTree` семья для того, чтобы свести к минимуму объем данных, хранящихся в Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` -- Конфигурация системы `clickhouse-client` срочный. По умолчанию имена серверов теперь выводятся в приглашение. Отображаемое имя сервера может быть изменено. Он также отправлен в США. `X-ClickHouse-Display-Name` Заголовок HTTP (Кирилл Шваков). -- Несколько разделенных запятыми `topics` может быть указан для `Kafka` двигатель (Тобиас Адамсон) -- Когда запрос остановлен `KILL QUERY` или `replace_running_query`, клиент получает `Query was canceled` исключение вместо неполного результата. - -#### Улучшения: {#improvements-13} - -- `ALTER TABLE ... DROP/DETACH PARTITION` запросы выполняются в передней части очереди репликации. -- `SELECT ... FINAL` и `OPTIMIZE ... FINAL` может использоваться даже в том случае, если таблица содержит одну часть данных. -- A `query_log` таблица воссоздается на лету, если она была удалена вручную (Кирилл Шваков). -- То `lengthUTF8` функция работает быстрее (zhang2014). -- Улучшенная производительность синхронных вставок в `Distributed` таблицы (`insert_distributed_sync = 1`) при наличии очень большого количества осколков. -- Сервер принимает следующее: `send_timeout` и `receive_timeout` настройки от клиента и применяет их при подключении к клиенту (они применяются в обратном порядке: сокет сервера `send_timeout` устанавливается в положение `receive_timeout` ценность, полученная от клиента, и наоборот). -- Более надежное аварийное восстановление для асинхронной вставки в систему `Distributed` таблицы. -- Возвращаемый тип объекта `countEqual` функция изменяется от `UInt32` к `UInt64` (谢磊). - -#### Устранение ошибок: {#bug-fixes-25} - -- Исправлена ошибка с помощью `IN` когда левая сторона выражения является `Nullable`. -- Правильные результаты теперь возвращаются при использовании кортежей с `IN` когда некоторые компоненты кортежа находятся в индексе таблицы. -- То `max_execution_time` limit теперь корректно работает с распределенными запросами. -- Исправлены ошибки при вычислении размера составных столбцов в системе `system.columns` стол. -- Исправлена ошибка при создании временной таблицы `CREATE TEMPORARY TABLE IF NOT EXISTS.` -- Исправлены ошибки в работе `StorageKafka` (\#\#2075) -- Исправлены сбои сервера из-за недопустимых аргументов некоторых агрегатных функций. -- Исправлена ошибка, которая помешала `DETACH DATABASE` запрос от остановки фоновых задач для `ReplicatedMergeTree` таблицы. -- `Too many parts` состояние с меньшей вероятностью произойдет при вставке в агрегированные материализованные представления (\#\#2084). -- Исправлена рекурсивная обработка подстановок в конфигурации, если за подстановкой должна следовать другая подстановка на том же уровне. -- Исправлен синтаксис в файле метаданных при создании `VIEW` это использует запрос с `UNION ALL`. -- `SummingMergeTree` теперь корректно работает суммирование вложенных структур данных с помощью составного ключа. -- Исправлена возможность возникновения расового состояния при выборе лидера для участия в гонке. `ReplicatedMergeTree` таблицы. - -#### Изменения в сборке: {#build-changes-5} - -- Сборка поддерживает `ninja` вместо `make` и использует `ninja` по умолчанию для построения релизов. -- Переименованные пакеты: `clickhouse-server-base` в `clickhouse-common-static`; `clickhouse-server-common` в `clickhouse-server`; `clickhouse-common-dbg` в `clickhouse-common-static-dbg`. Для установки используйте `clickhouse-server clickhouse-client`. Пакеты со старыми именами по-прежнему будут загружаться в репозитории для обеспечения обратной совместимости. - -#### Назад несовместимые изменения: {#backward-incompatible-changes-9} - -- Удалена специальная интерпретация выражения IN, если массив указан с левой стороны. Ранее выражение `arr IN (set)` было истолковано как «at least one `arr` element belongs to the `set`». Чтобы получить такое же поведение в новой версии, напишите `arrayExists(x -> x IN (set), arr)`. -- Отключено неправильное использование опции сокета `SO_REUSEPORT`, который был неправильно включен по умолчанию в библиотеке Poco. Обратите внимание, что в Linux больше нет никаких причин одновременно указывать адреса `::` и `0.0.0.0` for listen – use just `::`, что позволяет прослушивать соединение как по IPv4, так и по IPv6 (с настройками конфигурации ядра по умолчанию). Вы также можете вернуться к поведению из предыдущих версий, указав `1` в конфигурации. - -### ClickHouse релиз 1.1.54370, 2018-03-16 {#clickhouse-release-1-1-54370-2018-03-16} - -#### Новые средства: {#new-features-15} - -- Добавил тот `system.macros` таблица и автоматическое обновление макросов при изменении конфигурационного файла. -- Добавил тот `SYSTEM RELOAD CONFIG` запрос. -- Добавил тот `maxIntersections(left_col, right_col)` агрегатная функция, возвращающая максимальное количество одновременно пересекающихся интервалов `[left; right]`. То `maxIntersectionsPosition(left, right)` функция возвращает начало строки «maximum» интервал. ([Майкл Фурмур](https://github.com/ClickHouse/ClickHouse/pull/2012)). - -#### Улучшения: {#improvements-14} - -- При вставке данных в `Replicated` таблица, меньше запросов делается к `ZooKeeper` (и большинство ошибок на уровне пользователя исчезли с экрана. `ZooKeeper` бревно). -- Добавлена возможность создавать псевдонимы для наборов данных. Пример: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. - -#### Устранение ошибок: {#bug-fixes-26} - -- Исправлена ошибка `Illegal PREWHERE` ошибка при чтении из таблиц слияния для `Distributed`таблицы. -- Добавлены исправления, позволяющие запускать clickhouse-сервер в контейнерах Docker только для IPv4. -- Исправлено состояние гонки при считывании из системы `system.parts_columns tables.` -- Удалена двойная буферизация во время синхронной вставки в a `Distributed` таблица, которая могла бы вызвать тайм-аут соединения. -- Исправлена ошибка, приводившая к чрезмерно долгому ожиданию недоступной реплики перед началом работы. `SELECT` запрос. -- Исправлены неверные даты в программе `system.parts` стол. -- Исправлена ошибка, из-за которой невозможно было вставить данные в `Replicated` таблица если `chroot` был непустым в конфигурации системы. `ZooKeeper` скопление. -- Исправлен алгоритм вертикального слияния для пустого объекта `ORDER BY` стол. -- Восстановлена возможность использования словарей в запросах к удаленным таблицам, даже если эти словари отсутствуют на сервере-запросчике. Эта функциональность была потеряна в выпуске 1.1.54362. -- Восстановлено поведение для таких запросов, как `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` когда правая сторона `IN` следует использовать пульт дистанционного управления `default.table` а не какой-нибудь местный. Это поведение было нарушено в версии 1.1.54358. -- Удалено постороннее протоколирование уровня ошибок `Not found column ... in block`. - -### Clickhouse Релиз 1.1.54362, 2018-03-11 {#clickhouse-release-1-1-54362-2018-03-11} - -#### Новые средства: {#new-features-16} - -- Агрегация без `GROUP BY` для пустого набора (например, `SELECT count(*) FROM table WHERE 0`) теперь возвращает результат с одной строкой с нулевыми значениями для агрегатных функций, в соответствии со стандартом SQL. Чтобы восстановить старое поведение (вернуть пустой результат), установите `empty_result_for_aggregation_by_empty_set` до 1. -- Добавлено преобразование типов для `UNION ALL`. Здесь разрешены разные псевдонимы `SELECT` должности в `UNION ALL`, в соответствии со стандартом SQL. -- Произвольные выражения поддерживаются в `LIMIT BY` статьи. Ранее можно было использовать только столбцы, полученные в результате `SELECT`. -- Индекс из `MergeTree` таблицы используются, когда `IN` применяется к кортежу выражений из столбцов первичного ключа. Пример: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Анастасия Царькова). -- Добавил тот `clickhouse-copier` инструмент для копирования между кластерами и пересчета данных (бета-версия). -- Добавлены последовательные функции хэширования: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. Они могут быть использованы в качестве ключа сегментирования для уменьшения объема сетевого трафика во время последующих повторных сегментирования. -- Добавленные функции: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. -- Добавил тот `arrayCumSum` функция (Хави Сантана). -- Добавил тот `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, и `parseDateTimeBestEffortOrNull` функции для чтения DateTime из строки, содержащей текст в широком спектре возможных форматов. -- Данные могут быть частично перезагружены из внешних словарей во время обновления (загружаются только те записи, в которых значение указанного поля больше, чем в предыдущей загрузке) (Арсен Акопян). -- Добавил тот `cluster` табличная функция. Пример: `cluster(cluster_name, db, table)`. То `remote` табличная функция может принять имя кластера в качестве первого аргумента, если оно указано в качестве идентификатора. -- То `remote` и `cluster` функции таблицы можно использовать в `INSERT` запросы. -- Добавил тот `create_table_query` и `engine_full` виртуальные столбцы для `system.tables`стол. То `metadata_modification_time` колонка виртуальная. -- Добавил тот `data_path` и `metadata_path` колонны до `system.tables`и`system.databases` таблицы, а также добавил `path` колонка к столу `system.parts` и `system.parts_columns` таблицы. -- Добавлена дополнительная информация о слияниях в системе `system.part_log` стол. -- Для этого можно использовать произвольный ключ секционирования. `system.query_log` стол (Кирилл Шваков). -- То `SHOW TABLES` запрос теперь также показывает временные таблицы. Добавлены временные таблицы и `is_temporary` столбец `system.tables` (zhang2014). -- Добавлен `DROP TEMPORARY TABLE` и `EXISTS TEMPORARY TABLE` запросы (zhang2014). -- Поддержка `SHOW CREATE TABLE` для временных таблиц (zhang2014). -- Добавил тот `system_profile` параметр конфигурации для параметров, используемых внутренними процессами. -- Поддержка для загрузки `object_id` в качестве атрибута `MongoDB` словари (Павел Литвиненко). -- Чтение `null` в качестве значения по умолчанию при загрузке данных для внешнего словаря с помощью `MongoDB` источник (Павел Литвиненко). -- Чтение `DateTime` значения в системе `Values` форматирование из временной метки Unix без одинарных кавычек. -- Отказоустойчивость поддерживается в `remote` табличные функции для случаев, когда некоторые реплики отсутствуют в запрашиваемой таблице. -- Параметры конфигурации могут быть переопределены в командной строке при запуске `clickhouse-server`. Пример: `clickhouse-server -- --logger.level=information`. -- Реализовано следующее `empty` функция от `FixedString` аргумент: функция возвращает 1, если строка полностью состоит из нулевых байтов (zhang2014). -- Добавил тот `listen_try`параметр конфигурации для прослушивания хотя бы одного из прослушиваемых адресов без выхода из системы, если некоторые адреса не могут быть прослушаны (полезно для систем с отключенной поддержкой IPv4 или IPv6). -- Добавил тот `VersionedCollapsingMergeTree` настольный двигатель. -- Поддержка строк и произвольных числовых типов для `library` источник словаря. -- `MergeTree` таблицы можно использовать и без первичного ключа (необходимо указать `ORDER BY tuple()`). -- A `Nullable` тип может быть `CAST` не-`Nullable` введите если аргумент не является таковым `NULL`. -- `RENAME TABLE` может быть выполнена для `VIEW`. -- Добавил тот `throwIf` функция. -- Добавил тот `odbc_default_field_size` опция, которая позволяет расширить максимальный размер значения, загруженного из источника ODBC (по умолчанию это 1024). -- То `system.processes` стол и `SHOW PROCESSLIST` теперь у вас есть `is_cancelled` и `peak_memory_usage` столбцы. - -#### Улучшения: {#improvements-15} - -- Ограничения и квоты на результат больше не применяются к промежуточным данным для `INSERT SELECT` запросы или для `SELECT` подзапросы. -- Меньше ложных срабатываний `force_restore_data` при проверке состояния `Replicated` таблицы при запуске сервера. -- Добавил тот `allow_distributed_ddl` вариант. -- Недетерминированные функции не допускаются в выражениях для `MergeTree` ключи от стола. -- Файлы с заменами из `config.d` каталоги загружаются в алфавитном порядке. -- Улучшенная производительность системы `arrayElement` функция в случае постоянного многомерного массива с пустым массивом в качестве одного из элементов. Пример: `[[1], []][x]`. -- Теперь сервер запускается быстрее при использовании конфигурационных файлов с очень большими заменами (например, очень большими списками IP-сетей). -- При выполнении запроса функции с табличным значением выполняются один раз. Ранее, `remote` и `mysql` функции с табличным значением дважды выполняли один и тот же запрос для получения структуры таблицы с удаленного сервера. -- То `MkDocs` используется генератор документации. -- При попытке удалить столбец таблицы, который `DEFAULT`/`MATERIALIZED` выражения других столбцов зависят от того, возникает ли исключение (zhang2014). -- Добавлена возможность разбирать пустую строку в текстовых форматах как число 0 для `Float` тип данных. Эта функция была ранее доступна, но была потеряна в выпуске 1.1.54342. -- `Enum` значения могут быть использованы в `min`, `max`, `sum` и некоторые другие функции. В этих случаях он использует соответствующие числовые значения. Эта функция была ранее доступна, но была потеряна в выпуске 1.1.54337. -- Добавлен `max_expanded_ast_elements` чтобы ограничить размер AST после рекурсивного расширения псевдонимов. - -#### Устранение ошибок: {#bug-fixes-27} - -- Исправлены случаи, когда ненужные столбцы были удалены из подзапросов по ошибке или не были удалены из подзапросов, содержащих `UNION ALL`. -- Исправлена ошибка в слияниях для `ReplacingMergeTree` таблицы. -- Исправлены синхронные вставки в `Distributed` таблицы (`insert_distributed_sync = 1`). -- Исправлена обработка выхода онлайн / оффлайн для определенного использования `FULL` и `RIGHT JOIN` с повторяющимися столбцами в подзапросах. -- Исправлена ошибка segfault для некоторых видов использования `replace_running_query` и `KILL QUERY`. -- Исправлен порядок следования `source` и `last_exception` колонны в центре города `system.dictionaries` стол. -- Исправлена ошибка, когда `DROP DATABASE` запрос не удалил файл с метаданными. -- Исправлена ошибка `DROP DATABASE` запрос для `Dictionary` база данных. -- Исправлена низкая точность `uniqHLL12` и `uniqCombined` функции для кардинальностей, превышающих 100 миллионов единиц (Алексей Бочаров). -- Исправлено вычисление неявных значений по умолчанию при необходимости одновременного вычисления явных выражений по умолчанию в `INSERT` запросы (zhang2014). -- Исправлен редкий случай, когда запрос к a `MergeTree` стол не смог закончить (chenxing-xc). -- Исправлена ошибка, возникшая при запуске программы `CHECK` запрос для `Distributed` таблицы, если все осколки являются локальными (chenxing.xc). -- Исправлена небольшая регрессия производительности с функциями, использующими регулярные выражения. -- Исправлена регрессия производительности при создании многомерных массивов из сложных выражений. -- Исправлена ошибка, которая могла привести к дополнительному `FORMAT` раздел, который будет отображаться в `.sql` файл с метаданными. -- Исправлена ошибка, которая вызвала `max_table_size_to_drop` ограничение для применения при попытке удалить a `MATERIALIZED VIEW` глядя на явно заданную таблицу. -- Исправлена несовместимость со старыми клиентами (старые клиенты иногда отправляли данные вместе со старыми клиентами). `DateTime('timezone')` типа, которого они не понимают). -- Исправлена ошибка при чтении `Nested` элементы столбцов структур, которые были добавлены с помощью `ALTER` но это пусто для старых разделов, когда условия для этих столбцов переместились в `PREWHERE`. -- Исправлена ошибка при фильтрации таблиц по виртуальным `_table` столбцы в запросах к `Merge` таблицы. -- Исправлена ошибка при использовании `ALIAS` колонны внутри `Distributed` таблицы. -- Исправлена ошибка, которая делала невозможной динамическую компиляцию запросов с агрегатными функциями из `quantile` семья. -- Исправлено условие гонки в конвейере выполнения запросов, которое возникало в очень редких случаях при использовании `Merge` таблицы с большим количеством таблиц, а при использовании `GLOBAL` подзапросы. -- Исправлена ошибка при передаче массивов разных размеров в `arrayReduce` функция при использовании агрегатных функций из нескольких аргументов. -- Запрещено использование запросов с помощью `UNION ALL` в `MATERIALIZED VIEW`. -- Исправлена ошибка при инициализации программы. `part_log` системная таблица при запуске сервера (по умолчанию, `part_log` отключен). - -#### Назад несовместимые изменения: {#backward-incompatible-changes-10} - -- Удалил то `distributed_ddl_allow_replicated_alter` вариант. Это поведение включено по умолчанию. -- Удалил то `strict_insert_defaults` установка. Если вы использовали эту функцию, напишите нам `clickhouse-feedback@yandex-team.com`. -- Удалил то `UnsortedMergeTree` двигатель. - -### Clickhouse Релиз 1.1.54343, 2018-02-05 {#clickhouse-release-1-1-54343-2018-02-05} - -- Добавлена поддержка макросов для определения имен кластеров в распределенных DDL запросах и конструкторах распределенных таблиц: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. -- Теперь такие запросы, как `SELECT ... FROM table WHERE expr IN (subquery)` обрабатываются с помощью `table` индекс. -- Улучшена обработка дубликатов при вставке в реплицируемые таблицы, поэтому они больше не замедляют выполнение очереди репликации. - -### Clickhouse Релиз 1.1.54342, 2018-01-22 {#clickhouse-release-1-1-54342-2018-01-22} - -Этот выпуск содержит исправления ошибок для предыдущей версии 1.1.54337: - -- Исправлена регрессия в 1.1.54337: если пользователь по умолчанию имеет доступ только для чтения, то сервер отказывается запускаться с сообщением `Cannot create database in readonly mode`. -- Исправлена регрессия в 1.1.54337: в системах с systemd журналы всегда записываются в syslog независимо от конфигурации; сценарий watchdog все еще использует init.д. -- Исправлена регрессия в 1.1.54337: неправильная конфигурация по умолчанию в образе Docker. -- Исправлено недетерминированное поведение GraphiteMergeTree (вы можете увидеть его в сообщениях журнала `Data after merge is not byte-identical to the data on another replicas`). -- Исправлена ошибка, которая могла привести к несогласованным слияниям после оптимизации запроса к Реплицируемым таблицам (вы можете увидеть это в сообщениях журнала `Part ... intersects the previous part`). -- Буферные таблицы теперь работают правильно, когда материализованные столбцы присутствуют в целевой таблице (по zhang2014). -- Исправлена ошибка в реализации NULL. - -### Clickhouse Релиз 1.1.54337, 2018-01-18 {#clickhouse-release-1-1-54337-2018-01-18} - -#### Новые средства: {#new-features-17} - -- Добавлена поддержка хранения многомерных массивов и кортежей (`Tuple` тип данных) в таблицах. -- Поддержка функций таблицы для `DESCRIBE` и `INSERT` запросы. Добавлена поддержка вложенных запросов в `DESCRIBE`. Примеры: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Поддержка `INSERT INTO TABLE` в дополнение к `INSERT INTO`. -- Улучшена поддержка часовых поясов. То `DateTime` тип данных может быть аннотирован с помощью часового пояса, который используется для синтаксического анализа и форматирования в текстовых форматах. Пример: `DateTime('Europe/Moscow')`. Когда часовые пояса указаны в функциях для `DateTime` аргументы, возвращаемый тип будет отслеживать часовой пояс, и значение будет отображаться, как и ожидалось. -- Добавлены функции `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. То `toRelativeHour`/`Minute`/`Second` функции могут принимать значение типа `Date` в качестве аргумента. То `now` имя функции чувствительно к регистру. -- Добавил тот `toStartOfFifteenMinutes` функция (Кирилл Шваков). -- Добавил тот `clickhouse format` инструмент для форматирования запросов. -- Добавил тот `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` формат. Файлы схемы могут быть расположены только в указанном каталоге. -- Добавлена поддержка подстановок конфигураций (`incl` и `conf.d`) для настройки внешних словарей и моделей (Павел Якунин). -- Добавлена колонка с документацией для `system.settings` стол (Кирилл Шваков). -- Добавил тот `system.parts_columns` таблица с информацией о размерах столбцов в каждой части данных `MergeTree` таблицы. -- Добавил тот `system.models` таблица с информацией о загруженных данных `CatBoost` модели машинного обучения. -- Добавил тот `mysql` и `odbc` таблица функций и соответствующих `MySQL` и `ODBC` табличные движки для доступа к удаленным базам данных. Эта функциональность находится в стадии бета-тестирования. -- Добавлена возможность передачи аргумента типа `AggregateFunction` для `groupArray` агрегатная функция (таким образом, вы можете создать массив состояний некоторой агрегатной функции). -- Сняты ограничения на различные комбинации комбинаторов агрегатных функций. Например, вы можете использовать `avgForEachIf` так же как `avgIfForEach` агрегатные функции, которые имеют различное поведение. -- То `-ForEach` комбинатор агрегатных функций расширен для случая агрегатных функций с несколькими аргументами. -- Добавлена поддержка агрегатных функций `Nullable` аргументы даже в тех случаях, когда функция возвращает не --`Nullable` результат (добавлено с вкладом Сильвиу Карагеа). Пример: `groupArray`, `groupUniqArray`, `topK`. -- Добавил тот `max_client_network_bandwidth` для `clickhouse-client` (Кирилл Шваков). -- Пользователи с помощью `readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT…) (Kirill Shvakov). -- Добавлена поддержка использования нескольких потребителей с помощью `Kafka` двигатель. Расширенные параметры конфигурации для `Kafka` (Marek Vavruša). -- Добавил тот `intExp3` и `intExp4` функции. -- Добавил тот `sumKahan` статистическая функция. -- Добавлены функции to \* Number\* OrNull, где \* Number\* - это числовой тип. -- Добавлена поддержка для `WITH` положения для `INSERT SELECT` запрос (автор: zhang2014). -- Добавлены настройки: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. В частности, эти параметры используются для загрузки частей данных для репликации. Изменение этих параметров позволяет ускорить отработку отказа при перегрузке сети. -- Добавлена поддержка для `ALTER` для таблиц типа `Null` (Анастасия Царькова). -- То `reinterpretAsString` функция расширена для всех типов данных, которые хранятся последовательно в памяти. -- Добавил тот `--silent` вариант для самого `clickhouse-local` инструмент. Он подавляет печать информации о выполнении запроса в stderr. -- Добавлена поддержка считывания значений типа `Date` из текста в формате, где месяц и / или день месяца указывается с использованием одной цифры вместо двух цифр (Amos Bird). - -#### Оптимизация производительности: {#performance-optimizations} - -- Улучшена производительность агрегатных функций `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` из строковых аргументов. -- Улучшенная производительность функций `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. -- Улучшена производительность синтаксического анализа и форматирования `Date` и `DateTime` введите значения в текстовом формате. -- Улучшена производительность и точность синтаксического анализа чисел с плавающей запятой. -- Пониженное использование памяти для `JOIN` в том случае, когда левая и правая части имеют столбцы с одинаковыми именами, которые не содержатся в `USING` . -- Улучшена производительность агрегатных функций `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` за счет снижения вычислительной стабильности. Старые функции доступны под названиями `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. - -#### Устранение ошибок: {#bug-fixes-28} - -- Исправлена дедупликация данных после запуска a `DROP` или `DETACH PARTITION` запрос. В предыдущей версии удаление раздела и вставка тех же данных снова не работали, потому что вставленные блоки считались дубликатами. -- Исправлена ошибка, которая могла привести к неправильной интерпретации текста. `WHERE` пунктом `CREATE MATERIALIZED VIEW` запросы с помощью `POPULATE` . -- Исправлена ошибка в использовании `root_path` параметр в поле `zookeeper_servers` конфигурация. -- Исправлены непредвиденные результаты прохождения теста `Date` аргумент в пользу `toStartOfDay` . -- Исправлена ошибка `addMonths` и `subtractMonths` функции и арифметика для `INTERVAL n MONTH` в тех случаях, когда результат имеет предыдущий год. -- Добавлена отсутствующая поддержка для `UUID` тип данных для `DISTINCT` , `JOIN` , и `uniq` агрегатные функции и внешние словари (Евгений Иванов). Поддержка `UUID` он все еще не завершен. -- Исправлено `SummingMergeTree` поведение в тех случаях, когда строки суммируются до нуля. -- Различные исправления для `Kafka` engine (Marek Vavruša). -- Исправлено некорректное поведение устройства `Join` настольный движок (птица Амос). -- Исправлено некорректное поведение распределителя под FreeBSD и OS X. -- То `extractAll` функция теперь поддерживает пустые матчи. -- Исправлена ошибка, которая блокировала использование `libressl` вместо `openssl` . -- Исправлена ошибка `CREATE TABLE AS SELECT` запрос из временных таблиц. -- Исправлена неатомность обновления очереди репликации. Это может привести к тому, что реплики будут несинхронизированы до тех пор, пока сервер не перезагрузится. -- Исправлено возможное переполнение внутри `gcd` , `lcm` и `modulo` (`%` оператор) (Макс Скороход). -- `-preprocessed` файлы теперь создаются после изменения `umask` (`umask` можно изменить в конфигурации). -- Исправлена ошибка в фоновой проверке деталей (`MergeTreePartChecker` ) при использовании пользовательского ключа раздела. -- Исправлен разбор кортежей (значения `Tuple` тип данных) в текстовых форматах. -- Улучшены сообщения об ошибках о несовместимых типах, передаваемых в `multiIf` , `array` и некоторые другие функции. -- Переработанная поддержка для `Nullable` типы. Исправлены ошибки, которые могут привести к сбою сервера. Исправлены почти все другие ошибки, связанные с `NULL` поддержка: некорректное преобразование типов в вставьте выберите, недостаточная поддержка значения NULL в наличии и PREWHERE, `join_use_nulls` режим, типы, допускающие значения NULL в качестве аргументов `OR` оператор и т. д. -- Исправлены различные ошибки, связанные с внутренней семантикой типов данных. Примеры: ненужное суммирование `Enum` поля, тип в `SummingMergeTree` ; выравнивание `Enum` напечатать `Pretty` форматы и т. д. -- Более строгие проверки допустимых комбинаций составных столбцов. -- Исправлено переполнение при указании очень большого параметра для `FixedString` тип данных. -- Исправлена ошибка в системе `topK` агрегатная функция в общем случае. -- Добавлена недостающая проверка на равенство размеров массива в аргументах n-арных вариантов агрегатных функций с АНА - `-Array` комбинатор. -- Исправлена ошибка в работе `--pager` для `clickhouse-client` (автор: кс1322). -- Исправлена точность установки `exp10` функция. -- Исправлено поведение объекта `visitParamExtract` функция для лучшего соответствия документации. -- Исправлена ошибка при указании неверных типов данных. -- Исправлено поведение `DISTINCT` в том случае, когда все столбцы являются константами. -- Исправлено форматирование запроса в случае использования `tupleElement` функция со сложным постоянным выражением в качестве индекса элемента кортежа. -- Исправлена ошибка в работе `Dictionary` таблицы для `range_hashed` словари. -- Исправлена ошибка, приводившая к избыточным строкам в результате `FULL` и `RIGHT JOIN` (Эймос Берд). -- Исправлен сбой сервера при создании и удалении временных файлов в системе `config.d` каталоги во время перезагрузки конфигурации. -- Исправлена ошибка `SYSTEM DROP DNS CACHE` запрос: Кэш был очищен, но адреса узлов кластера не были обновлены. -- Исправлено поведение `MATERIALIZED VIEW` после выполнения `DETACH TABLE` for the table under the view (Marek Vavruša). - -#### Улучшения сборки: {#build-improvements-4} - -- То `pbuilder` инструмент используется для сборки. Процесс сборки практически полностью независим от среды узла сборки. -- Одна сборка используется для разных версий ОС. Пакеты и двоичные файлы были сделаны совместимыми с широким спектром систем Linux. -- Добавил тот `clickhouse-test` пакет. Он может быть использован для выполнения функциональных тестов. -- Исходный тарбол теперь можно опубликовать в репозитории. Он может быть использован для воспроизведения сборки без использования GitHub. -- Добавлена ограниченная интеграция с Travis CI. Из-за ограничений на время сборки в Travis тестируется только отладочная сборка и выполняется ограниченное подмножество тестов. -- Добавлена поддержка для `Cap'n'Proto` в сборке по умолчанию. -- Изменен формат источников документации с `Restricted Text` к `Markdown`. -- Добавлена поддержка для `systemd` (Владимир Смирнов). Он отключен по умолчанию из-за несовместимости с некоторыми образами ОС и может быть включен вручную. -- Для динамической генерации кода, `clang` и `lld` они встроены в систему `clickhouse` двоичный. Они также могут быть вызваны как `clickhouse clang` и `clickhouse lld` . -- Удалено использование расширений GNU из кода. Включил эту функцию `-Wextra` вариант. При строительстве с помощью `clang` значение по умолчанию равно `libc++` вместо `libstdc++`. -- Извлеченный `clickhouse_parsers` и `clickhouse_common_io` библиотеки для ускорения сборки различных инструментов. - -#### Назад несовместимые изменения: {#backward-incompatible-changes-11} - -- Формат для отметок в `Log` введите таблицы, которые содержат `Nullable` колонны были изменены обратно несовместимым образом. Если у вас есть эти таблицы, вы должны преобразовать их в следующие: `TinyLog` введите текст перед запуском новой версии сервера. Чтобы сделать это, замените `ENGINE = Log` с `ENGINE = TinyLog` в соответствующем разделе `.sql` файл в папке `metadata` каталог. Если ваш стол не имеет `Nullable` столбцы или если тип вашей таблицы не указан `Log`- тогда вам ничего не нужно делать. -- Удалил то `experimental_allow_extended_storage_definition_syntax` установка. Теперь эта функция включена по умолчанию. -- То `runningIncome` функция была переименована в `runningDifferenceStartingWithFirstvalue` избежать недоразумений. -- Удалил то `FROM ARRAY JOIN arr` синтаксис, когда соединение массива задается непосредственно после FROM без таблицы (Amos Bird). -- Удалил то `BlockTabSeparated` формат, который использовался исключительно в демонстрационных целях. -- Изменен формат состояния для агрегатных функций `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. Если вы сохранили состояния этих агрегатных функций в таблицах (с помощью `AggregateFunction` тип данных или материализованные представления с соответствующими состояниями), пожалуйста, напишите нам clickhouse-feedback@yandex-team.com-да. -- В предыдущих версиях сервера существовала недокументированная функция: если агрегатная функция зависит от параметров, то вы все равно можете указать ее без параметров в типе данных AggregateFunction. Пример: `AggregateFunction(quantiles, UInt64)` вместо `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. Эта особенность была утеряна. Хотя он был недокументирован, мы планируем снова поддержать его в будущих выпусках. -- Типы данных Enum не могут использоваться в агрегатных функциях min/max. Эта способность будет возвращена в следующем выпуске. - -#### Пожалуйста, обратите внимание при обновлении: {#please-note-when-upgrading} - -- При выполнении скользящего обновления в кластере в тот момент, когда некоторые реплики работают под управлением старой версии ClickHouse, а некоторые-под управлением новой версии, репликация временно прекращается и появляется сообщение `unknown parameter 'shard'` появляется в журнале регистрации. Репликация будет продолжена после обновления всех реплик кластера. -- Если на серверах кластера запущены разные версии ClickHouse, то вполне возможно, что распределенные запросы, использующие следующие функции, будут иметь неверные результаты: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. Вы должны обновить все узлы кластера. - -## [Список изменений на 2017 год](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2017.md) {#changelog-for-2017} diff --git a/docs/ru/whats_new/changelog/2018.md b/docs/ru/whats_new/changelog/2018.md new file mode 120000 index 00000000000..675c07e8bbb --- /dev/null +++ b/docs/ru/whats_new/changelog/2018.md @@ -0,0 +1 @@ +en/whats_new/changelog/2018.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2019.md b/docs/ru/whats_new/changelog/2019.md deleted file mode 100644 index ea5bffd74c9..00000000000 --- a/docs/ru/whats_new/changelog/2019.md +++ /dev/null @@ -1,2072 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -## Релиз ClickHouse в19.17 {#clickhouse-release-v19-17} - -### Релиз ClickHouse в19.17.6.36, 2019-12-27 {#clickhouse-release-v19-17-6-36-2019-12-27} - -#### Исправление ошибок {#bug-fix} - -- Исправлено потенциальное переполнение буфера при распаковке. Злонамеренный пользователь может передавать сфабрикованные сжатые данные,которые могут вызвать чтение после буфера. Эту проблему обнаружил Эльдар Зайтов из команды информационной безопасности Яндекса. [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена возможная ошибка сервера (`std::terminate`) когда сервер не может отправлять или записывать данные в формате JSON или XML со значениями строкового типа данных (которые требуют проверки UTF-8) или при сжатии результирующих данных с помощью алгоритма Brotli или в некоторых других редких случаях. [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлены словари с исходным кодом из clickhouse `VIEW`, теперь чтение таких словарей не вызывает ошибки `There is no query`. [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлена проверка, разрешен ли клиентский хост с помощью host\_regexp, указанного в users.XML. [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Виталий Баранов](https://github.com/vitlibar)) -- `RENAME TABLE` для распределенной таблицы теперь переименовывается папка, содержащая вставленные данные перед отправкой в сегменты. Это исправляет проблему с последовательными переименованиями `tableA->tableB`, `tableC->tableA`. [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([тавплубикс](https://github.com/tavplubix)) -- `range_hashed` внешние словари, созданные запросами DDL, теперь допускают диапазоны произвольных числовых типов. [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([алесапин](https://github.com/alesapin)) -- Исправлено `INSERT INTO table SELECT ... FROM mysql(...)` табличная функция. [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена обработка выхода онлайн / оффлайн в `INSERT INTO TABLE FUNCTION file()` при вставке в файл, который не существует. Теперь в этом случае файл будет создан, а затем вставка будет обработана. [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Исправлена ошибка bitmapAnd при пересечении агрегированного растрового изображения и скалярного растрового изображения. [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Юе Хуанг](https://github.com/moon03432)) -- Исправлена обработка выхода онлайн / оффлайн, когда `EXISTS` запрос был использован без `TABLE` или `DICTIONARY` квалификатор, совсем как `EXISTS t`. [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Фиксированный тип возврата для функций `rand` и `randConstant` в случае ничтожного аргумента. Теперь функции всегда возвращаются `UInt32` и никогда `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлено `DROP DICTIONARY IF EXISTS db.dict`, теперь он не бросает исключение, если `db` его просто не существует. [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Виталий Баранов](https://github.com/vitlibar)) -- Если таблица не была полностью удалена из-за сбоя сервера, сервер попытается восстановить и загрузить ее [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([тавплубикс](https://github.com/tavplubix)) -- Исправлен тривиальный запрос count для распределенной таблицы, если существует более двух локальных таблиц shard. [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) -- Исправлена ошибка, приводившая к гонке данных в DB:: BlockStreamProfileInfo:: calculateRowsBeforeLimit() [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Александр казаков](https://github.com/Akazz)) -- Исправлено `ALTER table MOVE part` выполняется сразу же после слияния указанной детали, что может привести к перемещению детали, в которую данная деталь была объединена. Теперь он правильно перемещает указанную деталь. [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Владимир Чеботарев](https://github.com/excitoon)) -- Теперь выражения для словарей можно задавать в виде строк. Это полезно для вычисления атрибутов при извлечении данных из источников, отличных от ClickHouse, поскольку позволяет использовать синтаксис, отличающийся от ClickHouse, для этих выражений. [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([алесапин](https://github.com/alesapin)) -- Исправлена очень редкая гонка в `clickhouse-copier` из-за переполнения в ZXid. [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Дин Сян Фэй](https://github.com/dingxiangfei2009)) -- Исправлена ошибка, когда после неудачного запроса (из-за «Too many simultaneous queries» например) он не будет читать информацию о внешних таблицах, а также - следующий запрос будет интерпретировать эту информацию как начало следующего запроса, вызывающего ошибку типа `Unknown packet from client`. [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Азат Хужин](https://github.com/azat)) -- Избежать разыменования null после «Unknown packet X from server» [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Азат Хужин](https://github.com/azat)) -- Восстановите поддержку всех локалей ICU, добавьте возможность применять параметры сортировки для постоянных выражений и добавьте имя языка в систему.таблица сортировки. [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([алесапин](https://github.com/alesapin)) -- Количество потоков для чтения из `StorageFile` и `StorageHDFS` теперь он ограничен, чтобы не превысить лимит памяти. [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([алесапин](https://github.com/alesapin)) -- Исправлено `CHECK TABLE` запрос для `*MergeTree` таблицы без ключа. [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([алесапин](https://github.com/alesapin)) -- Удалил номер мутации из имени детали на тот случай, если мутаций не было. Это удаление улучшило совместимость с более старыми версиями. [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка, что мутации пропускаются для некоторых присоединенных частей из-за их data\_version больше, чем версия мутации таблицы. [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Ю](https://github.com/yuzhichang)) -- Разрешить запуск сервера с избыточными копиями деталей после их перемещения на другое устройство. [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправлена ошибка «Sizes of columns doesn’t match» это может появиться при использовании столбцов агрегатной функции. [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Борис Гранво](https://github.com/bgranvea)) -- Теперь исключение будет сделано в случае использования с привязками рядом с LIMIT BY. И теперь его можно использовать сверху предел. [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Никита Михайлов](https://github.com/nikitamikhaylov)) -- Исправьте перезагрузку словаря, если она есть `invalidate_query`, который остановил обновления и некоторые исключения при предыдущих попытках обновления. [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([алесапин](https://github.com/alesapin)) - -### Релиз ClickHouse в19.17.4.11, 2019-11-22 {#clickhouse-release-v19-17-4-11-2019-11-22} - -#### Назад Несовместимые Изменения {#backward-incompatible-change} - -- Использование столбца вместо AST для хранения скалярных результатов подзапросов для повышения производительности. Установка `enable_scalar_subquery_optimization` был добавлен в 19.17, и он был включен по умолчанию. Это приводит к таким ошибкам, как [этот](https://github.com/ClickHouse/ClickHouse/issues/7851) во время обновления до 19.17.2 или 19.17.3 с предыдущих версий. Этот параметр был отключен по умолчанию в 19.17.4, чтобы сделать возможным обновление с 19.16 и более старых версий без ошибок. [\#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([Амос Птица](https://github.com/amosbird)) - -#### Новая функция {#new-feature} - -- Добавьте возможность создавать словари с запросами DDL. [\#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([алесапин](https://github.com/alesapin)) -- Сделай `bloom_filter` тип поддержки индекса `LowCardinality` и `Nullable` [\#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [\#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Функция add `isValidJSON` чтобы проверить, что переданная строка является допустимым json. [\#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [\#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Вдимир](https://github.com/Vdimir)) -- Осуществлять `arrayCompact` функция [\#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([Меморандум](https://github.com/Joeywzr)) -- Созданная функция `hex` для десятичных чисел. Это работает так `hex(reinterpretAsString())`, но не удаляет последние нулевые байты. [\#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([Михаил Коротов](https://github.com/millb)) -- Добавь `arrayFill` и `arrayReverseFill` функции, которые заменяют элементы другими элементами спереди/сзади от них в массиве. [\#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) -- Добавь `CRC32IEEE()`/`CRC64()` поддержка [\#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Азат Хужин](https://github.com/azat)) -- Осуществлять `char` функция, аналогичная одной в [в MySQL](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [\#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([сундили](https://github.com/sundy-li)) -- Добавь `bitmapTransform` функция. Он преобразует массив значений в растровом изображении в другой массив значений, в результате чего получается новое растровое изображение [\#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([Zhichang Ю](https://github.com/yuzhichang)) -- Реализованный `javaHashUTF16LE()` функция [\#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([ачимбаб](https://github.com/achimbab)) -- Добавь `_shard_num` виртуальный столбец для распределенного движка [\#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Азат Хужин](https://github.com/azat)) - -#### Экспериментальная возможность {#experimental-feature} - -- Поддержка процессоров (новый конвейер выполнения запросов) в `MergeTree`. [\#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([Николай Кочетов](https://github.com/KochetovNicolai)) - -#### Исправление ошибок {#bug-fix-1} - -- Исправить неправильный парсинг float в `Values` [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([тавплубикс](https://github.com/tavplubix)) -- Исправьте редкий тупик, который может произойти, когда trace\_log включен. [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([Филимонов](https://github.com/filimonov)) -- Предотвратите дублирование сообщений при создании таблицы Кафки, в которой есть любой MVs, выбирающий из нее [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Иван](https://github.com/abyss7)) -- Поддержка `Array(LowCardinality(Nullable(String)))` в `IN`. Разрешает [\#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [\#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([ачимбаб](https://github.com/achimbab)) -- Добавить обработку данных `SQL_TINYINT` и `SQL_BIGINT`, и исправьте обработку `SQL_FLOAT` типы источников данных в ODBC мост. [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Денис Глазачев](https://github.com/traceon)) -- Исправить агрегации (`avg` и квантили) над пустыми десятичными столбцами [\#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([Андрей Коняев](https://github.com/akonyaev90)) -- Чинить `INSERT` в распределенный с `MATERIALIZED` столбцы [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Азат Хужин](https://github.com/azat)) -- Сделай `MOVE PARTITION` работайте, если некоторые части раздела уже находятся на целевом диске или томе [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправлена ошибка, из-за которой жесткие ссылки не создавались во время мутаций в `ReplicatedMergeTree` в конфигурациях с несколькими дисками. [\#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправлена ошибка с мутацией на MergeTree, когда вся часть остается неизменной, а лучшее место находится на другом диске [\#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправлена ошибка с `keep_free_space_ratio` не считывается с конфигурации дисков [\#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправлена ошибка с таблицей содержит только `Tuple` столбцы или столбцы со сложными путями. Исправления [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [\#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([алесапин](https://github.com/alesapin)) -- Не учитывайте память для буферного движка в ограничении max\_memory\_usage [\#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Азат Хужин](https://github.com/azat)) -- Исправлена окончательная отметка использования в `MergeTree` таблицы, заказанные по `tuple()`. В редких случаях это может привести к тому, что `Can't adjust last granule` ошибка при выборе. [\#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена ошибка в мутациях, которые имеют предикат с действиями, требующими контекста (например, функции для json), что может привести к сбоям или странным исключениям. [\#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([алесапин](https://github.com/alesapin)) -- Исправлено несоответствие экранирования имен баз данных и таблиц `data/` и `shadow/` справочники [\#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Александр Бурмак](https://github.com/Alex-Burmak)) -- Support duplicated keys in RIGHT\|FULL JOINs, e.g. `ON t.x = u.x AND t.x = u.y`. Исправьте сбой в этом случае. [\#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Артем Зуйков](https://github.com/4ertus2)) -- Чинить `Not found column in block` при соединении по выражению с правым или полным соединением. [\#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Артем Зуйков](https://github.com/4ertus2)) -- Еще одна попытка исправить бесконечный цикл в `PrettySpace` формат [\#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Исправлена ошибка в работе `concat` функция, когда все аргументы были `FixedString` такого же размера. [\#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([алесапин](https://github.com/alesapin)) -- Исправлено исключение в случае использования 1 аргумента при определении хранилищ S3, URL и HDFS. [\#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправлена область действия InterpreterSelectQuery для представлений с запросом [\#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Азат Хужин](https://github.com/azat)) - -#### Улучшение {#improvement} - -- `Nullable` столбцы признал и NULL-значения будут корректно обработаны в ODBC-мост [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Василий Немков](https://github.com/Enmk)) -- Напишите текущий пакет для распределенной отправки атомарно [\#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Азат Хужин](https://github.com/azat)) -- Вызовите исключение, если мы не можем обнаружить таблицу для имени столбца в запросе. [\#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавь `merge_max_block_size` установка `MergeTreeSettings` [\#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Артем Зуйков](https://github.com/4ertus2)) -- Запросы с помощью `HAVING` и без него `GROUP BY` предположим, что группа по константе. Так, `SELECT 1 HAVING 1` теперь возвращает результат. [\#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([Амос Птица](https://github.com/amosbird)) -- Поддержка синтаксического анализа `(X,)` как Кортеж похож на python. [\#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [\#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([Амос Птица](https://github.com/amosbird)) -- Сделай `range` функциональное поведение почти как у питона. [\#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([сундили](https://github.com/sundy-li)) -- Добавь `constraints` столбцы в таблицу `system.settings` [\#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([Виталий Баранов](https://github.com/vitlibar)) -- Лучший нулевой формат для обработчика tcp, так что его можно использовать `select ignore() from table format Null` для измерения производительности через clickhouse-клиент [\#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([Амос Птица](https://github.com/amosbird)) -- Такие запросы, как `CREATE TABLE ... AS (SELECT (1, 2))` разбираются правильно [\#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) - -#### Улучшение производительности {#performance-improvement} - -- Улучшена производительность агрегирования по коротким строковым ключам. [\#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Александр Кузьменков](https://github.com/akuzm), [Амос Птица](https://github.com/amosbird)) -- Выполните еще один проход синтаксического анализа / анализа выражений, чтобы получить потенциальную оптимизацию после того, как постоянные предикаты будут свернуты. [\#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([Амос Птица](https://github.com/amosbird)) -- Использовать для хранения мета-информации, чтобы оценить тривиально `SELECT count() FROM table;` [\#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([Амос Птица](https://github.com/amosbird), [Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Векторизация обработки `arrayReduce` аналогично агрегатору `addBatch`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Амос Птица](https://github.com/amosbird)) -- Незначительные улучшения в производительности `Kafka` потребление [\#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([Иван](https://github.com/abyss7)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement} - -- Добавьте поддержку кросс-компиляции в архитектуру процессора AARCH64. Сценарий рефакторинга упаковщика. [\#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [\#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([Иван](https://github.com/abyss7)) -- Распакуйте цепочки инструментов darwin-x86\_64 и linux-aarch64 в смонтированный том Docker при сборке пакетов [\#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([Иван](https://github.com/abyss7)) -- Обновление образа Docker для двоичного упаковщика [\#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([Иван](https://github.com/abyss7)) -- Исправлены ошибки компиляции на MacOS Catalina [\#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([Эрнест Полетаев](https://github.com/ernestp)) -- Некоторые рефакторинги в логике анализа запросов: разделение сложного класса на несколько простых. [\#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена сборка без подмодулей [\#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) -- Лучше `add_globs` в файлах CMake [\#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([Амос Птица](https://github.com/amosbird)) -- Удалить жестко закодированные пути в `unwind` цель [\#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Константин Подшумок](https://github.com/podshumok)) -- Разрешить использовать формат mysql без ssl [\#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) - -#### Другой {#other} - -- Добавлена грамматика ANTLR4 для диалекта ClickHouse SQL [\#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [\#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -## Релиз ClickHouse в19.16 {#clickhouse-release-v19-16} - -#### Релиз Clickhouse в19.16.14.65, 2020-03-25 {#clickhouse-release-v19-16-14-65-2020-03-25} - -- Исправлена ошибка в пакетных вычислениях тернарных логических операций по нескольким аргументам (более 10). [\#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([Александр казаков](https://github.com/Akazz)) Это исправление было возвращено в версию 19.16 по специальному запросу Altinity. - -#### Релиз Clickhouse в19.16.14.65, 2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} - -- Исправлена несовместимость распределенных подзапросов с более старыми версиями CH. Исправления [\#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) - [(tabplubix)](https://github.com/tavplubix) -- При выполнении `CREATE` запрос, сложите постоянные выражения в аргументах механизма хранения. Замените пустое имя базы данных текущей базой данных. Исправления [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Также исправлена проверка наличия локального адреса в системе `ClickHouseDictionarySource`. - [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) -- Теперь фон сливается воедино `*MergeTree` семейство движков таблиц более точно сохраняет порядок объема политики хранения. - [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Владимир Чеботарев](https://github.com/excitoon)) -- Предотвращение потери данных в `Kafka` в редких случаях, когда исключение происходит после чтения суффикса, но до фиксации. Исправления [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Связанный: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) - [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(Филимонов)](https://github.com/filimonov) -- Исправлена ошибка, приводящая к завершению работы сервера при попытке использовать / drop `Kafka` таблица создана с неверными параметрами. Исправления [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Включает [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). - [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(Филимонов)](https://github.com/filimonov) -- Разрешить использование `MaterializedView` с подзапросами выше `Kafka` таблицы. - [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([Филимонов](https://github.com/filimonov)) - -#### Новая функция {#new-feature-1} - -- Добавь `deduplicate_blocks_in_dependent_materialized_views` возможность управления поведением идемпотентных вставок в таблицы с материализованными представлениями. Эта новая функция была добавлена в релиз исправления ошибок по специальному запросу от Altinity. - [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(урыхи)](https://github.com/urykhy) - -### Релиз ClickHouse в19.16.2.2, 2019-10-30 {#clickhouse-release-v19-16-2-2-2019-10-30} - -#### Назад Несовместимые Изменения {#backward-incompatible-change-1} - -- Добавьте недостающую проверку arity для count/countif. - [\#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) - [\#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Вдимир](https://github.com/Vdimir)) -- Удаление устаревших `asterisk_left_columns_only` настройка (по умолчанию она была отключена). - [\#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Артем - Зуйков](https://github.com/4ertus2)) -- Строки формата для формата данных шаблона теперь задаются в файлах. - [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) - ([тавплубикс](https://github.com/tavplubix)) - -#### Новая функция {#new-feature-2} - -- Введите uniqCombined64 () для вычисления мощности, большей, чем UINT\_MAX. - [\#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), - [\#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Азат - Хужин](https://github.com/azat)) -- Поддержка индексов Bloom filter для столбцов массива. - [\#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) - ([ачимбаб](https://github.com/achimbab)) -- Добавление функции `getMacro(name)` это возвращает строку со значением соответствующего `` - из конфигурации сервера. [\#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) - ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Установите два параметра конфигурации для словаря, основанного на источнике HTTP: `credentials` и - `http-headers`. [\#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Гийом - Тассери](https://github.com/YiuRULE)) -- Добавьте новый ProfileEvent `Merge` это подсчитывает количество запущенных фоновых слияний. - [\#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Михаил - Коротов](https://github.com/millb)) -- Добавьте функцию fullHostName, которая возвращает полное доменное имя. - [\#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) - [\#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([сундили](https://github.com/sundy-li)) -- Функция add `arraySplit` и `arrayReverseSplit` которые разделяют массив на «cut off» - условия. Они полезны при обработке временных последовательностей. - [\#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) -- Добавьте новые функции, возвращающие массив всех сопоставленных индексов в семействе функций multiMatch. - [\#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Данила - Кутенин](https://github.com/danlark1)) -- Добавление нового компонента Database engine `Lazy` то есть оптимизирован для хранения большого количества мелких логов - таблицы. [\#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Никита - Васильев](https://github.com/nikvas0)) -- Добавьте агрегатные функции groupBitmapAnd, - Or, - Xor для растровых столбцов. [\#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Чжичан - Ю](https://github.com/yuzhichang)) -- Добавьте комбинаторы агрегатных функций-OrNull и-OrDefault, которые возвращают значение null - или значения по умолчанию, когда агрегировать нечего. - [\#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) - ([hcz](https://github.com/hczhcz)) -- Представьте пользовательский разделенный формат данных, который поддерживает пользовательское экранирование и - правила разграничения. [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) - ([тавплубикс](https://github.com/tavplubix)) -- Поддержка Redis в качестве источника внешнего словаря. [\#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [\#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Антон - Попов](https://github.com/CurtizJ)) - -#### Исправление ошибок {#bug-fix-2} - -- Исправьте неправильный результат запроса, если он есть `WHERE IN (SELECT ...)` раздел и `optimize_read_in_order` является - использованный. [\#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Антон - Попов](https://github.com/CurtizJ)) -- Отключен плагин аутентификации MariaDB, который зависит от файлов вне проекта. - [\#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Юрий Владимирович - Баранов](https://github.com/yurriy)) -- Исправить исключение `Cannot convert column ... because it is constant but values of constants are different in source and result` что редко может произойти, когда функции `now()`, `today()`, - `yesterday()`, `randConstant()` не использовать. - [\#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Николай - Кочетов](https://github.com/KochetovNicolai)) -- Исправлена проблема с использованием HTTP, оставьте в живых тайм-аут вместо TCP оставить в живых тайм-аут. - [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Василий - Немков](https://github.com/Enmk)) -- Исправлена ошибка сегментации в groupBitmapOr (проблема [\#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). - [\#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Чжичан - Ю](https://github.com/yuzhichang)) -- Для материализованных представлений фиксация для Кафки вызывается после того, как все данные были записаны. - [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Иван](https://github.com/abyss7)) -- Исправлена ошибка `duration_ms` значение в `system.part_log` стол. Это было в десять раз хуже. - [\#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Владимир - Чеботарев](https://github.com/excitoon)) -- Быстрое исправление для устранения сбоя в таблице LIVE VIEW и повторного включения всех тестов LIVE VIEW. - [\#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) - ([взаказников](https://github.com/vzakaznikov)) -- Правильно сериализовать значение NULL значений в мин/макс показатели MergeTree части. - [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Александр - Кузьменков](https://github.com/akuzm)) -- Не ставьте виртуальные столбцы .метаданные sql при создании таблицы в виде `CREATE TABLE AS`. - [\#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Иван](https://github.com/abyss7)) -- Исправлена ошибка сегментации в `ATTACH PART` запрос. - [\#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) - ([алесапин](https://github.com/alesapin)) -- Исправьте неправильный результат для некоторых запросов, задаваемых оптимизацией empty в подзапросах и empty - INNER/RIGHT JOIN. [\#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Николай - Кочетов](https://github.com/KochetovNicolai)) -- Исправление ошибок в системах живой вид getHeader() метод. - [\#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) - ([взаказников](https://github.com/vzakaznikov)) - -#### Улучшение {#improvement-1} - -- Добавьте сообщение в случае ожидания queue\_wait\_max\_ms. - [\#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Азат - Хужин](https://github.com/azat)) -- Выполнена установка `s3_min_upload_part_size` уровень таблицы. - [\#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Владимир - Чеботарев](https://github.com/excitoon)) -- Проверьте TTL в StorageFactory. [\#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) - ([сундили](https://github.com/sundy-li)) -- Сквош левых блоков в частичном объединении слиянием (оптимизация). - [\#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Артем - Зуйков](https://github.com/4ertus2)) -- Не допускайте недетерминированных функций в мутациях реплицируемых движков таблиц, поскольку это - может привести к несогласованности между репликами. - [\#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Александр - Казаков](https://github.com/Akazz)) -- Отключите отслеживание памяти при преобразовании трассировки стека исключений в строку. Это может предотвратить потерю - сообщений об ошибках типа `Memory limit exceeded` на сервере, который вызвал `Attempt to read after eof` исключение для клиента. [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) - ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Различные улучшения формата. Разрешает - [\#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), - [\#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), - [\#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), - [\#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) - [\#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) - ([тавплубикс](https://github.com/tavplubix)) -- ClickHouse игнорирует значения в правой части оператора IN, которые не могут быть преобразованы в левую - side type. Make it work properly for compound types – Array and Tuple. - [\#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Александр - Кузьменков](https://github.com/akuzm)) -- Поддержка отсутствует Неравенство для следующих присоединиться. Можно объединить менее-или-равный вариант и строгий - больше и меньше вариантов для столбцов, следующих на синтаксис. - [\#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Артем - Зуйков](https://github.com/4ertus2)) -- Оптимизируйте частичное объединение слиянием. [\#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) - ([Артем Зуйков](https://github.com/4ertus2)) -- Не используйте больше, чем 98К памяти в функции uniqCombined. - [\#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), - [\#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Азат - Хужин](https://github.com/azat)) -- Промыть части правой соединительной таблицы на диске в PartialMergeJoin (если их недостаточно - память). Загружайте данные обратно, когда это необходимо. [\#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) - ([Артем Зуйков](https://github.com/4ertus2)) - -#### Улучшение производительности {#performance-improvement-1} - -- Ускорьте joinGet с аргументами const, избегая дублирования данных. - [\#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Амос - Птица](https://github.com/amosbird)) -- Возвращайтесь раньше, если подзапрос пуст. - [\#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) -- Оптимизируйте синтаксический анализ SQL-выражения в значениях. - [\#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) - ([тавплубикс](https://github.com/tavplubix)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-1} - -- Отключите некоторые вклады для кросс-компиляции в Mac OS. - [\#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Иван](https://github.com/abyss7)) -- Добавьте недостающую ссылку с PocoXML для clickhouse\_common\_io. - [\#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Азат - Хужин](https://github.com/azat)) -- Примите несколько аргументов тестового фильтра в clickhouse-test. - [\#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Александр - Кузьменков](https://github.com/akuzm)) -- Включите musl и jemalloc для ARM. [\#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) - ([Амос Птица](https://github.com/amosbird)) -- Добавлен `--client-option` параметр to `clickhouse-test` чтобы передать клиенту дополнительные параметры. - [\#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Николай - Кочетов](https://github.com/KochetovNicolai)) -- Сохраните существующие конфигурации при обновлении пакета rpm. - [\#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) - ([Филимонов](https://github.com/filimonov)) -- Исправление ошибок, обнаруженных ПВС. [\#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Артем - Зуйков](https://github.com/4ertus2)) -- Исправьте сборку для Дарвина. [\#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) - ([Иван](https://github.com/abyss7)) -- совместимость с glibc 2.29. [\#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Амос - Птица](https://github.com/amosbird)) -- Убедитесь, что dh\_clean не касается потенциальных исходных файлов. - [\#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Амос - Птица](https://github.com/amosbird)) -- Попытка избежать конфликта при обновлении с altinity rpm - он имеет конфигурационный файл, упакованный отдельно - в clickhouse-server-common. [\#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) - ([Филимонов](https://github.com/filimonov)) -- Оптимизируйте некоторые заголовочные файлы для более быстрого восстановления. - [\#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), - [\#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Александр - Кузьменков](https://github.com/akuzm)) -- Добавьте тесты производительности для Date и DateTime. [\#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Василий - Немков](https://github.com/Enmk)) -- Исправьте некоторые тесты, которые содержали недетерминированные мутации. - [\#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Александр - Казаков](https://github.com/Akazz)) -- Добавьте сборку с MemorySanitizer в CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) - ([Александр Кузьменков](https://github.com/akuzm)) -- Избегайте использования неинициализированных значений в MetricsTransmitter. - [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Азат - Хужин](https://github.com/azat)) -- Исправьте некоторые проблемы в полях, найденных MemorySanitizer. - [\#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), - [\#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Александр - Кузьменков](https://github.com/akuzm)), [\#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) - ([Амос Птица](https://github.com/amosbird)) -- Исправьте неопределенное поведение в murmurhash32. [\#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Амос - Птица](https://github.com/amosbird)) -- Исправьте неопределенное поведение в StoragesInfoStream. [\#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) - ([тавплубикс](https://github.com/tavplubix)) -- Исправлено сворачивание постоянных выражений для внешних движков баз данных (MySQL, ODBC, JDBC). В предыдущих случаях - версии он не работал для нескольких постоянных выражений и вообще не работал для даты, - Дата-время и UUID. Это исправление [\#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) - [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) - ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправление ошибки гонки данных ThreadSanitizer в режиме реального времени при обращении к переменной no\_users\_thread. - [\#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) - ([взаказников](https://github.com/vzakaznikov)) -- Избавьтесь от символов malloc в libcommon - [\#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), - [\#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Амос - Птица](https://github.com/amosbird)) -- Добавьте глобальный флаг ENABLE\_LIBRARIES для отключения всех библиотек. - [\#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) - ([proller](https://github.com/proller)) - -#### Очистка кода {#code-cleanup} - -- Обобщите репозиторий конфигурации для подготовки к DDL для словарей. [\#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) - ([алесапин](https://github.com/alesapin)) -- Парсер для словарей DDL без всякой семантики. - [\#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) - ([алесапин](https://github.com/alesapin)) -- Разделите ParserCreateQuery на различные более мелкие Парсеры. - [\#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) - ([алесапин](https://github.com/alesapin)) -- Небольшой рефакторинг и переименование рядом с внешними словарями. - [\#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) - ([алесапин](https://github.com/alesapin)) -- Рефакторинг некоторого кода для подготовки к управлению доступом на основе ролей. [\#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Виталий - Баранов](https://github.com/vitlibar)) -- Некоторые улучшения в коде DatabaseOrdinary. - [\#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Никита - Васильев](https://github.com/nikvas0)) -- Не используйте итераторы в методах find () и emplace () хэш-таблиц. - [\#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Александр - Кузьменков](https://github.com/akuzm)) -- Исправьте getMultipleValuesFromConfig в случае, если корень параметра не пуст. [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) - ([Михаил Коротов](https://github.com/millb)) -- Удалите часть copy-paste (TemporaryFile и TemporaryFileStream) - [\#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Артем - Зуйков](https://github.com/4ertus2)) -- Немного улучшена читаемость кода (`MergeTreeData::getActiveContainingPart`). - [\#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Владимир - Чеботарев](https://github.com/excitoon)) -- Дождитесь всех запланированных заданий, которые используют локальные объекты, если `ThreadPool::schedule(...)` бросает - исключение. Переименовать `ThreadPool::schedule(...)` к `ThreadPool::scheduleOrThrowOnError(...)` и - исправьте комментарии, чтобы сделать очевидным, что он может бросить. - [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) - ([тавплубикс](https://github.com/tavplubix)) - -## ClickHouse релиз 19.15 {#clickhouse-release-19-15} - -### ClickHouse релиз 19.15.4.10, 2019-10-31 {#clickhouse-release-19-15-4-10-2019-10-31} - -#### Исправление ошибок {#bug-fix-3} - -- Добавлена обработка SQL\_TINYINT и SQL\_BIGINT, а также исправлена обработка типов источников данных SQL\_FLOAT в Мосте ODBC. - [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Денис Глазачев](https://github.com/traceon)) -- Разрешается иметь некоторые части на целевом диске или Томе в разделе перемещения. - [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Владимир Чеботарев](https://github.com/excitoon)) -- Фиксированное значение NULL-значений в столбцы, допускающие значения null через ODBC-мост. - [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Василий Немков](https://github.com/Enmk)) -- Исправлена вставка в распределенный нелокальный узел с материализованными столбцами. - [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Азат Хужин](https://github.com/azat)) -- Исправлена функция getMultipleValuesFromConfig. - [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Михаил Коротов](https://github.com/millb)) -- Исправлена проблема с использованием HTTP, оставьте в живых тайм-аут вместо TCP оставить в живых тайм-аут. - [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Василий Немков](https://github.com/Enmk)) -- Дождитесь завершения всех заданий по исключению (исправлены редкие сегменты). - [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([тавплубикс](https://github.com/tavplubix)) -- Не нажимайте на MVs при вставке в таблицу Кафки. - [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Иван](https://github.com/abyss7)) -- Отключите отслеживание памяти для стека исключений. - [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлен неверный код при преобразовании запроса для внешней базы данных. - [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Избегайте использования неинициализированных значений в MetricsTransmitter. - [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Азат Хужин](https://github.com/azat)) -- Добавлен пример конфигурации с макросами для тестов ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.15.3.6, 2019-10-09 {#clickhouse-release-19-15-3-6-2019-10-09} - -#### Исправление ошибок {#bug-fix-4} - -- Исправлена ошибка bad\_variant в хэшированном словаре. - ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка с ошибкой сегментации в запросе ATTACH PART. - ([алесапин](https://github.com/alesapin)) -- Расчет фиксированного времени в `MergeTreeData`. - ([Владимир Чеботарев](https://github.com/excitoon)) -- Посвятите себя Кафке явно после того, как написание будет завершено. - [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Иван](https://github.com/abyss7)) -- Правильно сериализовать значение NULL значений в мин/макс показатели MergeTree части. - [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Александр Кузьменков](https://github.com/akuzm)) - -### ClickHouse релиз 19.15.2.2, 2019-10-01 {#clickhouse-release-19-15-2-2-2019-10-01} - -#### Новая функция {#new-feature-3} - -- Многоуровневое хранение: поддержка использования нескольких томов хранения для таблиц с движком MergeTree. Можно хранить свежие данные на SSD и автоматически перемещать старые данные на жесткий диск. ([пример](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [\#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Игр](https://github.com/ObjatieGroba)) [\#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([алесапин](https://github.com/alesapin)) -- Добавить функцию таблицы `input` для считывания входящих данных в `INSERT SELECT` запрос. [\#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([паласоник1](https://github.com/palasonic1)) [\#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([Антон Попов](https://github.com/CurtizJ)) -- Добавить а `sparse_hashed` компоновка словаря, которая функционально эквивалентна `hashed` макет, но более эффективен для работы с памятью. Он использует примерно в два раза меньше памяти за счет более медленного извлечения значений. [\#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Азат Хужин](https://github.com/azat)) -- Реализована возможность определения списка пользователей для доступа к словарям. Используется только текущая подключенная база данных. [\#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([Гийом Тассери](https://github.com/YiuRULE)) -- Добавь `LIMIT` опцион на `SHOW` запрос. [\#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Филипп Мальковский](https://github.com/malkfilipp)) -- Добавь `bitmapSubsetLimit(bitmap, range_start, limit)` функция, возвращающая подмножество наименьшего числа `limit` значения в наборе, который не меньше, чем `range_start`. [\#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([Zhichang Ю](https://github.com/yuzhichang)) -- Добавь `bitmapMin` и `bitmapMax` функции. [\#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([Zhichang Ю](https://github.com/yuzhichang)) -- Функция add `repeat` относится к [выпуск-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [\#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([Флинн](https://github.com/ucasFL)) - -#### Экспериментальная возможность {#experimental-feature-1} - -- Реализуйте (в памяти) вариант соединения слиянием, который не изменяет текущий конвейер. Результат частично сортируется по ключу слияния. Набор `partial_merge_join = 1` чтобы использовать эту функцию. Соединение слиянием все еще находится в разработке. [\#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавь `S3` функция двигателя и таблицы. Он все еще находится в разработке (пока нет поддержки аутентификации). [\#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Владимир Чеботарев](https://github.com/excitoon)) - -#### Улучшение {#improvement-2} - -- Каждое сообщение, прочитанное от Кафки, вставляется атомарно. Это решает почти все известные проблемы с двигателем Kafka. [\#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([Иван](https://github.com/abyss7)) -- Улучшения для отработки отказа распределенных запросов. Сократите время восстановления, также оно теперь конфигурируемо и может быть увидено в `system.clusters`. [\#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([Василий Немков](https://github.com/Enmk)) -- Поддержка числовых значений для перечислений непосредственно в `IN` раздел. \#6766 [\#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) -- Поддержка (опционально, по умолчанию отключен) перенаправляет на URL хранение. [\#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) -- Добавьте информационное сообщение, когда клиент с более старой версией подключается к серверу. [\#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Филипп Мальковский](https://github.com/malkfilipp)) -- Удалите максимальное ограничение времени ожидания обратного отсчета для отправки данных в распределенных таблицах [\#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Азат Хужин](https://github.com/azat)) -- Добавьте возможность отправлять графиту события профиля (счетчики) с кумулятивными значениями. Его можно включить в разделе `` в серверах `config.xml`. [\#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Азат Хужин](https://github.com/azat)) -- Добавить автоматическое приведение типа `T` к `LowCardinality(T)` при вставке данных в столбец типа `LowCardinality(T)` в родном формате через HTTP. [\#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Добавить возможность использования функции `hex` без использования `reinterpretAsString` для `Float32`, `Float64`. [\#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([Михаил Коротов](https://github.com/millb)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-2} - -- Добавьте gdb-индекс в двоичный файл clickhouse с отладочной информацией. Это ускорит время запуска системы. `gdb`. [\#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([алесапин](https://github.com/alesapin)) -- Ускорить деб упаковки с исправленными помощью dpkg-деб, которая использует `pigz`. [\#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([алесапин](https://github.com/alesapin)) -- Набор `enable_fuzzing = 1` чтобы включить инструментирование libfuzzer всего кода проекта. [\#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) -- Добавить сплит построить тест в КИ. [\#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([алесапин](https://github.com/alesapin)) -- Добавьте сборку с MemorySanitizer в CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Александр Кузьменков](https://github.com/akuzm)) -- Заменять `libsparsehash` с `sparsehash-c11` [\#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Азат Хужин](https://github.com/azat)) - -#### Исправление ошибок {#bug-fix-5} - -- Исправлено снижение производительности индексного анализа по сложным ключам на больших таблицах. Это исправляет \#6924. [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена логическая ошибка, вызывающая segfaults при выборе из Кафки пустой темы. [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Иван](https://github.com/abyss7)) -- Исправлено слишком раннее закрытие соединения MySQL `MySQLBlockInputStream.cpp`. [\#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) -- Возвращена поддержка очень старых ядер Linux (исправление [\#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [\#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить возможную потерю данных в `insert select` запрос в случае пустого блока во входном потоке. \#6834 \#6862 [\#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправление для функции `АrrayEnumerateUniqRanked` с пустыми массивами в парах [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -- Исправьте сложные запросы с помощью соединений массивов и глобальных подзапросов. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Иван](https://github.com/abyss7)) -- Чинить `Unknown identifier` ошибка в порядке ПО и группировка ПО с несколькими соединениями [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлено `MSan` предупреждение при выполнении функции с помощью `LowCardinality` аргумент. [\#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([Николай Кочетов](https://github.com/KochetovNicolai)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-2} - -- Изменен формат сериализации состояний растровых \* агрегатных функций для повышения производительности. Сериализованные состояния растрового изображения\* из предыдущих версий не могут быть прочитаны. [\#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([Zhichang Ю](https://github.com/yuzhichang)) - -## ClickHouse релиз 19.14 {#clickhouse-release-19-14} - -### ClickHouse релиз 19.14.7.15, 2019-10-02 {#clickhouse-release-19-14-7-15-2019-10-02} - -#### Исправление ошибок {#bug-fix-6} - -- Этот релиз также содержит все исправления ошибок от 19.11.12.69. -- Исправлена совместимость для распределенных запросов между 19.14 и более ранними версиями. Это исправление [\#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [\#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.14.6.12, 2019-09-19 {#clickhouse-release-19-14-6-12-2019-09-19} - -#### Исправление ошибок {#bug-fix-7} - -- Исправление для функции `АrrayEnumerateUniqRanked` с пустыми массивами в парах. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -- Исправлено имя подзапроса в запросах с `ARRAY JOIN` и `GLOBAL IN subquery` с псевдонимом. Используйте псевдоним подзапроса для внешнего имени таблицы, если оно указано. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Иван](https://github.com/abyss7)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-3} - -- Чинить [хлопанье](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) тест `00715_fetch_merged_or_mutated_part_zookeeper` переписывая его в оболочку скриптов, потому что он должен ждать, пока мутации применятся. [\#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([Александр казаков](https://github.com/Akazz)) -- Исправлены системные и MemSan сбой в функции `groupUniqArray` с аргументом массива emtpy. Это было вызвано размещением пустых `PaddedPODArray` в хэш-таблицу нулевая ячейка, потому что конструктор для нулевого значения ячейки не вызывался. [\#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([Амос Птица](https://github.com/amosbird)) - -### ClickHouse релиз 19.14.3.3, 2019-09-10 {#clickhouse-release-19-14-3-3-2019-09-10} - -#### Новая функция {#new-feature-4} - -- `WITH FILL` модификатор для `ORDER BY`. (продолжение работы [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Антон Попов](https://github.com/CurtizJ)) -- `WITH TIES` модификатор для `LIMIT`. (продолжение работы [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Антон Попов](https://github.com/CurtizJ)) -- Разобрать некотируемых `NULL` буквальное значение NULL (если настройки `format_csv_unquoted_null_literal_as_null=1`). Инициализируйте нулевые поля значениями по умолчанию, если тип данных этого поля не является нулевым (если задано значение `input_format_null_as_default=1`). [\#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [\#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([тавплубикс](https://github.com/tavplubix)) -- Поддержка подстановочных знаков в путях табличных функций `file` и `hdfs`. Если путь содержит подстановочные знаки, то таблица будет доступна только для чтения. Пример использования: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` и `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [\#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Новый `system.metric_log` таблица, в которой хранятся значения `system.events` и `system.metrics` с заданным интервалом времени. [\#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [\#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([Никита Михайлов](https://github.com/nikitamikhaylov)) [\#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Разрешить запись текстовых журналов ClickHouse в `system.text_log` стол. [\#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [\#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([Никита Михайлов](https://github.com/nikitamikhaylov)) [\#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Показывать частные символы в трассировках стека (это делается с помощью синтаксического анализа таблиц символов файлов ELF). Добавлена информация о файле и номере строки в трассировках стека, если присутствует отладочная информация. Ускоренный поиск имени символа с индексацией символов, присутствующих в программе. Добавлены новые функции SQL для самоанализа: `demangle` и `addressToLine`. Переименованная функция `symbolizeAddress` к `addressToSymbol` для последовательности. Функция `addressToSymbol` вернет искалеченное имя по соображениям производительности и вам придется подать заявку `demangle`. Добавлена настройка `allow_introspection_functions` который по умолчанию отключен. [\#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Табличная функция `values` (имя не чувствительно к регистру). Это позволяет читать из `VALUES` список предложенных в [\#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). Пример: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) -- Добавлена возможность изменять настройки хранения. Синтаксис: `ALTER TABLE MODIFY SETTING = `. [\#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [\#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [\#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([алесапин](https://github.com/alesapin)) -- Опора для снятия отсоединенных деталей. Синтаксис: `ALTER TABLE DROP DETACHED PART ''`. [\#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([тавплубикс](https://github.com/tavplubix)) -- Ограничения таблицы. Позволяет добавить ограничение к определению таблицы,которое будет проверяться при вставке. [\#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([Глеб Новиков](https://github.com/NanoBjorn)) [\#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Суппорт для каскадных материализованных представлений. [\#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([Амос Птица](https://github.com/amosbird)) -- Включите профилировщик запросов по умолчанию, чтобы один раз в секунду выполнять выборку каждого потока выполнения запроса. [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Входной формат `ORC`. [\#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [\#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([аконяев90](https://github.com/akonyaev90)) -- Добавлены две новые функции: `sigmoid` и `tanh` (которые полезны для приложений машинного обучения). [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Функция `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` чтобы проверить, находится ли данный токен в стоге сена. Токен-это подстрока максимальной длины между двумя не буквенно-цифровыми символами ASCII (или границами стога сена). Токен должен быть постоянной строкой. Поддерживается специализацией индекса tokenbf\_v1. [\#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [\#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([Василий Немков](https://github.com/Enmk)) -- Новая функция `neighbor(value, offset[, default_value])`. Позволяет достичь значения prev / next внутри столбца в блоке данных. [\#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Алекс Краш](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Алексей Миловидов](https://github.com/alexey-milovidov) -- Создал функцию `currentUser()`, возвращая логин авторизованного пользователя. Добавлен псевдоним `user()` для совместимости с MySQL. [\#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Алекс Краш](https://github.com/alex-krash)) -- Новые агрегатные функции `quantilesExactInclusive` и `quantilesExactExclusive` которые были предложены в [\#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [\#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) -- Функция `bitmapRange(bitmap, range_begin, range_end)` который возвращает новый набор с заданным диапазоном (не включает в себя `range_end`). [\#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([Zhichang Ю](https://github.com/yuzhichang)) -- Функция `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` что создает массив прецизионных длинных строк геохаш-боксов, покрывающих заданную площадь. [\#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([Василий Немков](https://github.com/Enmk)) -- Реализуйте поддержку запроса INSERT с помощью `Kafka` таблицы. [\#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([Иван](https://github.com/abyss7)) -- Добавлена поддержка для `_partition` и `_timestamp` виртуальные колонки для движка Кафки. [\#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([Иван](https://github.com/abyss7)) -- Возможность удаления конфиденциальных данных из `query_log`, журналы серверов, список процессов с правилами на основе регулярных выражений. [\#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([Филимонов](https://github.com/filimonov)) - -#### Экспериментальная возможность {#experimental-feature-2} - -- Формат входных и выходных данных `Template`. Он позволяет указать строку пользовательского формата для ввода и вывода. [\#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [\#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([тавплубикс](https://github.com/tavplubix)) -- Реализация проекта `LIVE VIEW` таблицы, которые были первоначально предложены в [\#2898](https://github.com/ClickHouse/ClickHouse/pull/2898), подготовленные в [\#3925](https://github.com/ClickHouse/ClickHouse/issues/3925), а затем обновляется в [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). Видеть [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) для детального описания. [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([взаказников](https://github.com/vzakaznikov)) [\#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([Николай Кочетов](https://github.com/KochetovNicolai)) [\#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([взаказников](https://github.com/vzakaznikov)) Заметить что `LIVE VIEW` функция может быть удалена в следующих версиях. - -#### Исправление ошибок {#bug-fix-8} - -- Этот релиз также содержит все исправления ошибок от 19.13 и 19.11. -- Исправьте ошибку сегментации, когда в таблице есть индексы пропуска и происходит вертикальное слияние. [\#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([алесапин](https://github.com/alesapin)) -- Исправьте ТТЛ для каждого столбца с нетривиальными значениями по умолчанию для столбцов. Ранее в случае принудительного слияния TTL с `OPTIMIZE ... FINAL` запрос, истекшие значения были заменены типом defaults вместо заданных пользователем значений столбца defaults. [\#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена проблема дублирования сообщений Кафки при обычном перезапуске сервера. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Иван](https://github.com/abyss7)) -- Исправлена бесконечная петля при чтении сообщений Кафки. Не приостанавливайте/возобновляйте потребительскую подписку вообще - в противном случае она может быть приостановлена на неопределенный срок в некоторых сценариях. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([Иван](https://github.com/abyss7)) -- Чинить `Key expression contains comparison between inconvertible types` исключение в `bitmapContains` функция. [\#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [\#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [\#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) -- Исправлена обработка выхода онлайн / оффлайн с поддержкой `optimize_skip_unused_shards` и пропал ключ от осколков. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлен неправильный код в мутациях, которые могут привести к повреждению памяти. Исправлена обработка выхода онлайн / оффлайн чтения адреса `0x14c0` это может произойти из-за совпадения `DROP TABLE` и `SELECT` от `system.parts` или `system.parts_columns`. Фиксированное состояние расы при подготовке запросов мутаций. Исправлена тупиковая ситуация, вызванная `OPTIMIZE` реплицированных таблиц и параллельных операций модификации, таких как ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Убраны лишние подробный вход в интерфейс для MySQL [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Возвращает возможность разбора логических настроек из ‘true’ и ‘false’ в конфигурационном файле. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([алесапин](https://github.com/alesapin)) -- Исправить сбой в работе `quantile` и `median` функции `Nullable(Decimal128)`. [\#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлен возможный неполный результат возвращаемый компанией `SELECT` запрос с помощью `WHERE` условие о первичном ключе содержало преобразование в тип Float. Это было вызвано неправильной проверкой монотонности в `toFloat` функция. [\#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -- Проверять `max_expanded_ast_elements` установка для мутаций. Ясные мутации после `TRUNCATE TABLE`. [\#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправьте результаты соединения для ключевых столбцов при использовании с `join_use_nulls`. Прикрепите значения null вместо столбцов по умолчанию. [\#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка пропуска индексов с вертикальным слиянием и alter. Исправить для `Bad size of marks file` исключение. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [\#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([алесапин](https://github.com/alesapin)) -- Исправлена редкая ошибка в `ALTER MODIFY COLUMN` и вертикальное слияние, когда одна из Объединенных/измененных частей пуста (0 строк) [\#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка в преобразовании `LowCardinality` напечатать `AggregateFunctionFactory`. Это исправление [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправить неправильное поведение и возможные вылеты в `topK` и `topKWeighted` агрегированные функции. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлен небезопасный код вокруг `getIdentifier` функция. [\#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [\#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка в протоколе MySQL wire (используется при подключении к ClickHouse form MySQL client). Вызвано переполнением буфера кучи в `PacketPayloadWriteBuffer`. [\#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([Юрий Баранов](https://github.com/yurriy)) -- Исправлена утечка памяти внутри `bitmapSubsetInRange` функция. [\#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([Zhichang Ю](https://github.com/yuzhichang)) -- Исправлена редкая ошибка, когда мутация выполнялась после изменения детализации. [\#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([алесапин](https://github.com/alesapin)) -- Разрешить сообщение protobuf со всеми полями по умолчанию. [\#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([Виталий Баранов](https://github.com/vitlibar)) -- Устраните ошибку с помощью `nullIf` функция, когда мы посылаем `NULL` аргумент по второму аргументу. [\#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([Гийом Тассери](https://github.com/YiuRULE)) -- Исправлена редкая ошибка с неправильным выделением/освобождением памяти в сложных ключевых словарях кэша со строковыми полями, что приводит к бесконечному потреблению памяти (похоже на утечку памяти). Ошибка воспроизводится, когда размер строки был равен степени два, начиная с восьми (8, 16, 32 и т. д.). [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([алесапин](https://github.com/alesapin)) -- Исправлено кодирование горилл на небольших последовательностях, которое вызывало исключение `Cannot write after end of buffer`. [\#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Василий Немков](https://github.com/Enmk)) -- Разрешить использовать не обнуляемые типы В соединениях с `join_use_nulls` включен. [\#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Артем Зуйков](https://github.com/4ertus2)) -- Отключать `Poco::AbstractConfiguration` подстановки в запрос `clickhouse-client`. [\#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Избегайте тупиковых ситуаций в `REPLACE PARTITION`. [\#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- С помощью `arrayReduce` для постоянных аргументов может привести к обработка выхода онлайн / оффлайн. [\#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте несогласованные детали, которые могут появиться, если реплика была восстановлена после этого `DROP PARTITION`. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([тавплубикс](https://github.com/tavplubix)) -- Исправлено зависание `JSONExtractRaw` функция. [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка с неправильной сериализацией индексов пропуска и агрегацией с адаптивной детализацией. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [\#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([алесапин](https://github.com/alesapin)) -- Чинить `WITH ROLLUP` и `WITH CUBE` модификаторы `GROUP BY` с двухуровневой агрегацией. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена ошибка с написанием вторичных индексных меток с адаптивной детализацией. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([алесапин](https://github.com/alesapin)) -- Исправьте порядок инициализации при запуске сервера. С `StorageMergeTree::background_task_handle` инициализируется в `startup()` то `MergeTreeBlockOutputStream::write()` возможно, вы попытаетесь использовать его перед инициализацией. Просто проверьте, инициализирован ли он. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Иван](https://github.com/abyss7)) -- Очистка буфера данных от предыдущей операции чтения, которая была завершена с ошибкой. [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Николай](https://github.com/bopohaa)) -- Исправлена ошибка с включением адаптивной детализации при создании новой реплики для реплицированной таблицы \* MergeTree. [\#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([алесапин](https://github.com/alesapin)) -- Исправлена возможная ошибка при запуске сервера в случае возникновения исключения `libunwind` во время исключения при доступе к неинициализированному `ThreadStatus` структура. [\#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([Никита Михайлов](https://github.com/nikitamikhaylov)) -- Исправить сбой в работе `yandexConsistentHash` функция. Найдено с помощью теста fuzz. [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [\#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена возможность зависания запросов, когда сервер перегружен и глобальный пул потоков становится почти полным. Это имеет более высокие шансы произойти в кластерах с большим количеством сегментов (сотни), поскольку распределенные запросы выделяют поток для каждого соединения с каждым сегментом. Например, эта проблема может возникнуть, если кластер из 330 сегментов обрабатывает 30 параллельных распределенных запросов. Эта проблема затрагивает все версии, начиная с версии 19.2. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Фиксированная логика работы `arrayEnumerateUniqRanked` функция. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка segfault при декодировании таблицы символов. [\#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([Амос Птица](https://github.com/amosbird)) -- Исправлено неуместное исключение в приведении `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Удалено дополнительное цитирование описания в `system.settings` стол. [\#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [\#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Избегайте возможных тупиков в работе `TRUNCATE` из реплицированной таблицы. [\#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте чтение в порядке сортировки ключа. [\#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([Антон Попов](https://github.com/CurtizJ)) -- Чинить `ALTER TABLE ... UPDATE` запрос для таблиц с `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([алесапин](https://github.com/alesapin)) -- Исправить ошибку, открытую [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (начиная с 19.4.0). Воспроизводится в запросах к распределенным таблицам через таблицы MergeTree, когда мы не запрашиваем никаких столбцов (`SELECT 1`). [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([алесапин](https://github.com/alesapin)) -- Исправлено переполнение при целочисленном делении знакового типа на беззнаковый. Поведение было точно таким же, как в языке C или C++ (целочисленные правила продвижения), что может быть удивительно. Обратите внимание, что переполнение все еще возможно при делении большого числа со знаком на большое число без знака или наоборот (но этот случай менее обычен). Эта проблема существовала во всех версиях сервера. [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Ограничьте максимальное время сна для дросселирования, когда `max_execution_speed` или `max_execution_speed_bytes` заданный. Исправлены ложные ошибки, такие как `Estimated query execution time (inf seconds) is too long`. [\#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [\#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлены проблемы, связанные с использованием `MATERIALIZED` столбцы и псевдонимы в `MaterializedView`. [\#448](https://github.com/ClickHouse/ClickHouse/issues/448) [\#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [\#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [\#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [\#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [\#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([Амос Птица](https://github.com/amosbird)) [\#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Чинить `FormatFactory` поведение для входных потоков, которые не реализованы в качестве процессора. [\#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлена опечатка. [\#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Алексей Рындин](https://github.com/alexryndin)) -- Опечатка в сообщении об ошибке (is - \> are). [\#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Денис Журавлев](https://github.com/den-crane)) -- Исправлена ошибка при разборе списка столбцов из строки, Если тип содержал запятую (эта проблема была актуальна для `File`, `URL`, `HDFS` хранения) [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) - -#### Исправление безопасности {#security-fix} - -- Этот релиз также содержит все исправления безопасности ошибок от 19.13 и 19.11. -- Исправлена возможность сфабрикованного запроса вызвать сбой сервера из-за переполнения стека в синтаксическом анализаторе SQL. Исправлена возможность переполнения стека в таблицах слияния и распределения, материализованных представлениях и условиях безопасности на уровне строк, включающих подзапросы. [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшение {#improvement-3} - -- Правильная реализация троичной логики для `AND/OR`. [\#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([Александр казаков](https://github.com/Akazz)) -- Теперь значения и строки с истекшим сроком действия TTL будут удалены после этого `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` запрос. Добавленные запросы `SYSTEM STOP/START TTL MERGES` чтобы запретить / разрешить назначать слияния с TTL и фильтровать просроченные значения во всех слияниях. [\#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([Антон Попов](https://github.com/CurtizJ)) -- Возможность изменить расположение файла истории ClickHouse для использования клиентом `CLICKHOUSE_HISTORY_FILE` ОКР. [\#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([Филимонов](https://github.com/filimonov)) -- Удалять `dry_run` флаг от `InterpreterSelectQuery`. … [\#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Поддержка `ASOF JOIN` с `ON` раздел. [\#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Артем Зуйков](https://github.com/4ertus2)) -- Улучшенная поддержка индексов пропуска для мутаций и репликации. Поддержка `MATERIALIZE/CLEAR INDEX ... IN PARTITION` запрос. `UPDATE x = x` пересчитывает все индексы, использующие столбец `x`. [\#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([Никита Васильев](https://github.com/nikvas0)) -- Разрешить `ATTACH` живые представления (например, при запуске сервера) независимо от того, чтобы `allow_experimental_live_view` установка. [\#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Для трассировок стека, собранных профилировщиком запросов, не включайте кадры стека, созданные самим профилировщиком запросов. [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Теперь функции таблицы `values`, `file`, `url`, `hdfs` есть поддержка столбцов псевдонимов. [\#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Создайте исключение, если `config.d` файл не имеет соответствующего корневого элемента в качестве файла конфигурации. [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) -- Распечатайте дополнительную информацию в сообщении об исключении для `no space left on device`. [\#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [\#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [\#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([тавплубикс](https://github.com/tavplubix)) -- При определении осколков а `Distributed` таблица, которая будет покрыта запросом на чтение (для `optimize_skip_unused_shards` = 1) ClickHouse теперь проверяет условия от обоих `prewhere` и `where` предложения оператора select. [\#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([Александр казаков](https://github.com/Akazz)) -- Включенный `SIMDJSON` для машин без AVX2, но с набором инструкций SSE 4.2 и PCLMUL. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [\#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- ClickHouse может работать на файловых системах без `O_DIRECT` поддержка (например, ZFS и BtrFS) без дополнительной настройки. [\#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [\#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Поддержка push down предиката для окончательного подзапроса. [\#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [\#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Лучше `JOIN ON` извлечение ключей [\#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Артем Зуйков](https://github.com/4ertus2)) -- Обновление `SIMDJSON`. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [\#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Оптимизируйте выбор самого маленького столбца для `SELECT count()` запрос. [\#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([Амос Птица](https://github.com/amosbird)) -- Добавлен `strict` параметр в `windowFunnel()`. Когда `strict` устанавливается, то `windowFunnel()` применяет условия только для уникальных значений. [\#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([ачимбаб](https://github.com/achimbab)) -- Более безопасный интерфейс `mysqlxx::Pool`. [\#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([авасилиев](https://github.com/avasiliev)) -- Параметры размер строки при выполнении с помощью `--help` опция теперь соответствует размеру терминала. [\#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) -- Отключать «read in order» оптимизация для агрегации без ключей. [\#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([Антон Попов](https://github.com/CurtizJ)) -- Код состояния HTTP для `INCORRECT_DATA` и `TYPE_MISMATCH` коды ошибок были изменены по умолчанию `500 Internal Server Error` к `400 Bad Request`. [\#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([Александр Родин](https://github.com/a-rodin)) -- Переместить объект соединения из `ExpressionAction` в `AnalyzedJoin`. `ExpressionAnalyzer` и `ExpressionAction` не знаю о чем `Join` больше никаких занятий. Его логика скрыта за `AnalyzedJoin` iface защитный. [\#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена возможная взаимоблокировка распределенных запросов, когда один из сегментов является localhost, но запрос отправляется через сетевое соединение. [\#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Изменена семантика нескольких таблиц `RENAME` чтобы избежать возможных тупиков. [\#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [\#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Переписан сервер совместимости MySQL, чтобы предотвратить загрузку полной полезной нагрузки пакета в память. Снижение потребления памяти для каждого соединения примерно до `2 * DBMS_DEFAULT_BUFFER_SIZE` (буферы чтения/записи). [\#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([Юрий Баранов](https://github.com/yurriy)) -- Переместите логику интерпретации псевдонимов AST из синтаксического анализатора, который не должен ничего знать о семантике запросов. [\#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Артем Зуйков](https://github.com/4ertus2)) -- Чуть более безопасный разбор данных `NamesAndTypesList`. [\#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [\#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `clickhouse-copier`: Разрешить использование `where_condition` из конфигурации с `partition_key` псевдоним в запросе для проверки существования раздела (ранее он использовался только при чтении запросов данных). [\#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) -- Добавлен необязательный аргумент сообщения в поле `throwIf`. ([\#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [\#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Вдимир](https://github.com/Vdimir)) -- Исключение сервера, полученное при отправке данных вставки, теперь обрабатывается и в клиенте. [\#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [\#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) -- Добавлена метрика `DistributedFilesToInsert` это показывает общее количество файлов в файловой системе, выбранных для отправки на удаленные серверы распределенными таблицами. Это число суммируется по всем осколкам. [\#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Переместите большинство соединений подготовьте логику из `ExpressionAction/ExpressionAnalyzer` к `AnalyzedJoin`. [\#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправить Цан [предупреждение](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) ‘lock-order-inversion’. [\#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([Василий Немков](https://github.com/Enmk)) -- Улучшенные информационные сообщения об отсутствии возможностей Linux. Протоколирование фатальных ошибок с помощью «fatal» уровень, который будет легче найти в `system.text_log`. [\#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Когда включить сброс временных данных на диск, чтобы ограничить использование памяти во время `GROUP BY`, `ORDER BY`, он не проверял свободное место на диске. Исправление добавить новую настройку `min_free_disk_space`, когда свободное место на диске будет меньше порогового значения, запрос остановится и бросит `ErrorCodes::NOT_ENOUGH_SPACE`. [\#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([Вэйцин Сюй](https://github.com/weiqxu)) [\#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Удален рекурсивной записи по теме. Это не имеет смысла, потому что потоки используются повторно между запросами. `SELECT` запрос может получить Блокировку в одном потоке, удерживать блокировку в другом потоке и выходить из первого потока. В то же время, первый поток может быть повторно использован `DROP` запрос. Это приведет к ложным результатам «Attempt to acquire exclusive lock recursively» сообщения. [\#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Расщеплять `ExpressionAnalyzer.appendJoin()`. Подготовьте место в `ExpressionAnalyzer` для `MergeJoin`. [\#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавлен `mysql_native_password` плагин аутентификации для сервера совместимости MySQL. [\#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([Юрий Баранов](https://github.com/yurriy)) -- Меньшее количество `clock_gettime` вызовы; исправлена совместимость ABI между debug/release in `Allocator` (незначительный вопрос). [\#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Подвиньте `collectUsedColumns` от `ExpressionAnalyzer` к `SyntaxAnalyzer`. `SyntaxAnalyzer` делает `required_source_columns` теперь он сам по себе. [\#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавить настройку `joined_subquery_requires_alias` чтобы требовать псевдонимы для подселектов и табличных функций в `FROM` that more than one table is present (i.e. queries with JOINs). [\#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Артем Зуйков](https://github.com/4ertus2)) -- Извлекать `GetAggregatesVisitor` класс от `ExpressionAnalyzer`. [\#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Артем Зуйков](https://github.com/4ertus2)) -- `system.query_log`: изменение типа данных `type` столбец `Enum`. [\#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([Никита Михайлов](https://github.com/nikitamikhaylov)) -- Статическое соединение `sha256_password` плагин аутентификации. [\#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([Юрий Баранов](https://github.com/yurriy)) -- Избегайте дополнительной зависимости для настройки `compile` работать. В предыдущих версиях пользователь может получить ошибку типа `cannot open crti.o`, `unable to find library -lc` и т.д. [\#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Дополнительная проверка входных данных, которые могут быть получены от вредоносной реплики. [\#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Сейчас `clickhouse-obfuscator` файл доступен в формате `clickhouse-client` пакет. В предыдущих версиях он был доступен как `clickhouse obfuscator` (с пробелами). [\#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [\#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) -- Исправлена взаимоблокировка, когда у нас есть по крайней мере два запроса, которые читают по крайней мере две таблицы в разном порядке, и еще один запрос, который выполняет операцию DDL на одной из таблиц. Исправлена еще одна очень редкая тупиковая ситуация. [\#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлен `os_thread_ids` столбец `system.processes` и `system.query_log` для улучшения возможностей отладки. [\#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Обходной путь для ошибок расширения PHP mysqlnd, которые возникают, когда `sha256_password` используется в качестве плагина аутентификации по умолчанию (описано в разделе [\#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [\#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([Юрий Баранов](https://github.com/yurriy)) -- Удалите ненужное место с измененными столбцами nullability. [\#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Артем Зуйков](https://github.com/4ertus2)) -- Установите значение по умолчанию: `queue_max_wait_ms` до нуля, потому что текущее значение (пять секунд) не имеет никакого смысла. Есть редкие обстоятельства, когда эта настройка имеет какое-либо применение. Добавлены настройки `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` и `connection_pool_max_wait_ms` для устранения двусмысленности. [\#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Извлекать `SelectQueryExpressionAnalyzer` от `ExpressionAnalyzer`. Оставьте последний вариант для запросов, не связанных с выбором. [\#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Артем Зуйков](https://github.com/4ertus2)) -- Удалено дублирование входных и выходных форматов. [\#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Позволяет пользователю переопределить `poll_interval` и `idle_connection_timeout` настройки при подключении. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `MergeTree` теперь есть дополнительная опция `ttl_only_drop_parts` (отключено по умолчанию), чтобы избежать частичной обрезки деталей, чтобы они полностью выпадали, когда все строки в детали истекли. [\#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([Сергей Владыкин](https://github.com/svladykin)) -- Тип проверяет наличие заданных индексных функций. Бросьте исключение, если функция получила неправильный тип. Это устраняет тестирования с помощью утилиты. [\#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([Никита Васильев](https://github.com/nikvas0)) - -#### Улучшение производительности {#performance-improvement-2} - -- Оптимизируйте запросы с помощью `ORDER BY expressions` пункт, где `expressions` есть совпадающий префикс с ключом сортировки в `MergeTree` таблицы. Эта оптимизация управляется с помощью `optimize_read_in_order` установка. [\#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [\#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([Антон Попов](https://github.com/CurtizJ)) -- Позволяет использовать несколько резьб при загрузке и демонтаже деталей. [\#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [\#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [\#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Реализован пакетный вариант обновления состояний агрегатной функции. Это может привести к повышению производительности. [\#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- С помощью `FastOps` библиотека для функций `exp`, `log`, `sigmoid`, `tanh`. FastOps-это быстрая векторная математическая библиотека от Михаила Парахина (технический директор Яндекса). Улучшенная производительность `exp` и `log` функции более чем в 6 раз. Функция `exp` и `log` от `Float32` аргумент вернется `Float32` (в предыдущих версиях они всегда возвращаются `Float64`). Сейчас `exp(nan)` может вернуться `inf`. Результат работы `exp` и `log` функции могут быть не самым близким машинным представимым числом к истинному ответу. [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) Используя вариант Данилы Кутенина, чтобы сделать fastops работающими [\#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Отключить последовательную оптимизацию ключа для `UInt8/16`. [\#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [\#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([акузм](https://github.com/akuzm)) -- Улучшенная производительность `simdjson` библиотека, избавившись от динамического распределения в `ParsedJson::Iterator`. [\#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([Виталий Баранов](https://github.com/vitlibar)) -- Предаварийные страницы при выделении памяти с помощью `mmap()`. [\#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([акузм](https://github.com/akuzm)) -- Исправлена ошибка производительности в `Decimal` сравнение. [\#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-4} - -- Удалите компилятор (экземпляр шаблона времени выполнения), потому что мы выиграли его производительность. [\#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлен тест производительности, чтобы показать ухудшение производительности в gcc-9 более изолированным способом. [\#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена функция таблицы `numbers_mt`, который является многопоточным вариантом `numbers`. Обновленные тесты производительности с хэш-функциями. [\#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Режим сравнения в `clickhouse-benchmark` [\#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [\#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) -- Самое лучшее усилие для печати следов стека. Также добавить `SIGPROF` в качестве отладочного сигнала для печати трассировки стека запущенного потока. [\#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Каждая функция в своем собственном файле, часть 10. [\#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Удалить два раза константный `TABLE_IS_READ_ONLY`. [\#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([Филимонов](https://github.com/filimonov)) -- Изменения форматирования для `StringHashMap` PR [\#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [\#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([акузм](https://github.com/akuzm)) -- Лучший подзапрос для создания соединения в `ExpressionAnalyzer`. [\#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Артем Зуйков](https://github.com/4ertus2)) -- Удалить ненужное состояние (найденных с помощью PVS-студия). [\#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([акузм](https://github.com/akuzm)) -- Разделите интерфейс хэш-таблицы для `ReverseIndex`. [\#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([акузм](https://github.com/akuzm)) -- Рефакторинг настроек. [\#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([алесапин](https://github.com/alesapin)) -- Добавить комментарии для `set` индексные функции. [\#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([Никита Васильев](https://github.com/nikvas0)) -- Увеличьте оценку OOM в отладочной версии на Linux. [\#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([акузм](https://github.com/akuzm)) -- HDFS HA теперь работает в debug build. [\#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([Вэйцин Сюй](https://github.com/weiqxu)) -- Добавлен тест на `transform_query_for_external_database`. [\#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте тест для нескольких материализованных представлений для таблицы Кафки. [\#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([Иван](https://github.com/abyss7)) -- Сделайте лучшую схему сборки. [\#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([Иван](https://github.com/abyss7)) -- Исправлено `test_external_dictionaries` интеграция в случае, если она была выполнена под некорневым пользователем. [\#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Ошибка воспроизводится, когда общий размер записанных пакетов превышает `DBMS_DEFAULT_BUFFER_SIZE`. [\#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([Юрий Баранов](https://github.com/yurriy)) -- Добавлен тест для `RENAME` состояние гонки таблицы [\#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Избегайте гонки данных по настройкам в `KILL QUERY`. [\#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте интеграционный тест для обработки ошибок с помощью словаря кэша. [\#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([Виталий Баранов](https://github.com/vitlibar)) -- Отключите синтаксический анализ объектных файлов ELF на Mac OS, потому что это не имеет никакого смысла. [\#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Попытайтесь сделать генератор списка изменений лучше. [\#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавление `-Wshadow` перейти к ССЗ. [\#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) -- Удален устаревший код для `mimalloc` поддержка. [\#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `zlib-ng` определяет возможности x86 и сохраняет эту информацию в глобальных переменных. Это делается в вызове defalteInit, который может быть выполнен разными потоками одновременно. Чтобы избежать многопоточной записи, сделайте это при запуске библиотеки. [\#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([акузм](https://github.com/akuzm)) -- Регрессионный тест на ошибку, которая в соединении была исправлена в [\#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [\#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Бахтиер Рузиев](https://github.com/theruziev)) -- Исправлен отчет MSan. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте хлопающий тест TTL. [\#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена гонка ложных данных в `MergeTreeDataPart::is_frozen` поле. [\#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлены тайм-ауты в тесте fuzz. В предыдущей версии ему удалось найти ложное зависание в запросе `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [\#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлены отладочные проверки для `static_cast` из колонн. [\#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Поддержка Oracle Linux в официальных пакетах RPM. [\#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [\#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Изменен JSON perftests с `once` к `loop` тип. [\#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- `odbc-bridge.cpp` определяет `main()` поэтому он не должен быть включен в состав `clickhouse-lib`. [\#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Оривей Деш](https://github.com/orivej)) -- Тест на аварийное включение `FULL|RIGHT JOIN` с нулями в ключах правого стола. [\#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Артем Зуйков](https://github.com/4ertus2)) -- На всякий случай добавлен тест на ограничение по расширению псевдонимов. [\#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Переключился с `boost::filesystem` к `std::filesystem` при необходимости. [\#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [\#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлены пакеты RPM на веб-сайт. [\#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте тест для фиксированного `Unknown identifier` исключение в `IN` раздел. [\#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Артем Зуйков](https://github.com/4ertus2)) -- Упрощать `shared_ptr_helper` потому что люди сталкиваются с трудностями понимания этого. [\#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлены тесты производительности для исправленных кодеков Gorilla и DoubleDelta. [\#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([Василий Немков](https://github.com/Enmk)) -- Разделите интеграционный тест `test_dictionaries` в 4 отдельных теста. [\#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправить предупреждение PVS-Studio в `PipelineExecutor`. [\#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Разрешить использовать `library` источник словаря с ASan. [\#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена возможность генерировать список изменений из списка PR. [\#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Заприте дверь `TinyLog` хранение при чтении. [\#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([акузм](https://github.com/akuzm)) -- Проверить неработающие ссылки в ИЦ. [\#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Увеличьте тайм-аут для «stack overflow» тест, потому что это может занять много времени в отладочной сборке. [\#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена проверка на наличие двойных пробелов. [\#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Чинить `new/delete` отслеживание памяти при сборке с помощью дезинфицирующих средств. Слежка не совсем ясна. Это только предотвращает исключения ограничения памяти в тестах. [\#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Артем Зуйков](https://github.com/4ertus2)) -- Включите обратную проверку неопределенных символов при связывании. [\#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([Иван](https://github.com/abyss7)) -- Избежать восстановления `hyperscan` каждый день. [\#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен отчет утилиты в `ProtobufWriter`. [\#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Не разрешайте использовать query profiler с дезинфицирующими средствами, потому что он не совместим. [\#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте тест для перезагрузки словаря после сбоя по таймеру. [\#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправить несоответствие `PipelineExecutor::prepareProcessor` тип аргумента. [\#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Добавлен тест на плохие URI. [\#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлены дополнительные проверки для `CAST` функция. Это должно получить больше информации о неисправности сегментации в нечетком тесте. [\#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Добавлен `gcc-9` поддержка для `docker/builder` контейнер, который создает образ локально. [\#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([Глеб Новиков](https://github.com/NanoBjorn)) -- Тест для первичного ключа с помощью `LowCardinality(String)`. [\#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [\#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) -- Исправлены тесты, связанные с медленной печатью трассировок стека. [\#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте тестовый случай для аварийного входа `groupUniqArray` фиксированной в [\#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [\#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [\#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([акузм](https://github.com/akuzm)) -- Фиксированные индексы мутаций тестов. [\#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([Никита Васильев](https://github.com/nikvas0)) -- В тесте производительности не считывайте журнал запросов для запросов, которые мы не выполняли. [\#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([акузм](https://github.com/akuzm)) -- Материализованное представление теперь может быть создано с любыми типами низкой мощности независимо от настройки о подозрительных типах низкой мощности. [\#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Обновленные тесты для `send_logs_level` установка. [\#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлена сборка под gcc-8.2. [\#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Макс Ахмедов](https://github.com/zlobober)) -- Исправлена сборка с помощью внутреннего libc++. [\#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([Иван](https://github.com/abyss7)) -- Исправлена общая сборка с помощью `rdkafka` библиотека [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Иван](https://github.com/abyss7)) -- Исправления для сборки Mac OS (неполные). [\#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) [\#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([Алексей-Зайцев](https://github.com/alex-zaitsev)) -- Чинить «splitted» строить. [\#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Другие исправления сборки: [\#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([Амос Птица](https://github.com/amosbird)) [\#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [\#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [\#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([Иван](https://github.com/abyss7)) [\#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [\#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [\#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-3} - -- Удалены редко используемые функции таблица `catBoostPool` и хранение `CatBoostPool`. Если вы использовали эту функцию таблицы, пожалуйста, напишите письмо по адресу `clickhouse-feedback@yandex-team.com`. Обратите внимание, что интеграция CatBoost остается и будет поддерживаться. [\#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Отключать `ANY RIGHT JOIN` и `ANY FULL JOIN` по умолчанию. Набор `any_join_distinct_right_table_keys` настройка для их включения. [\#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [\#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Артем Зуйков](https://github.com/4ertus2)) - -## ClickHouse релиз 19.13 {#clickhouse-release-19-13} - -### ClickHouse релиз 19.13.6.51, 2019-10-02 {#clickhouse-release-19-13-6-51-2019-10-02} - -#### Исправление ошибок {#bug-fix-9} - -- Этот релиз также содержит все исправления ошибок от 19.11.12.69. - -### ClickHouse релиз 19.13.5.44, 2019-09-20 {#clickhouse-release-19-13-5-44-2019-09-20} - -#### Исправление ошибок {#bug-fix-10} - -- Этот релиз также содержит все исправления ошибок от 19.14.6.12. -- Исправлено возможное несогласованное состояние таблицы при выполнении `DROP` запрос для реплицированной таблицы в то время как zookeeper недоступен. [\#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [\#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([Никита Михайлов](https://github.com/nikitamikhaylov)) -- Исправление для гонки данных в StorageMerge [\#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, появившаяся в профайлером запрос, который приводит к бесконечному приему от гнезда. [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([алесапин](https://github.com/alesapin)) -- Исправлена чрезмерная загрузка процессора во время выполнения `JSONExtractRaw` функция над логическим значением. [\#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправляет регрессию при нажатии на материализованный вид. [\#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([Иван](https://github.com/abyss7)) -- Табличная функция `url` если бы эта уязвимость позволяла злоумышленнику вводить произвольные HTTP-заголовки в запрос. Эта проблема была обнаружена [Никита Тихомиров](https://github.com/NSTikhomirov). [\#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлять бесполезно `AST` проверьте установленный индекс. [\#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [\#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([Никита Васильев](https://github.com/nikvas0)) -- Исправлен парсинг данных `AggregateFunction` значения, встроенные в запрос. [\#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [\#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([Zhichang Ю](https://github.com/yuzhichang)) -- Исправлено неправильное поведение `trim` функции семьи. [\#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.13.4.32, 2019-09-10 {#clickhouse-release-19-13-4-32-2019-09-10} - -#### Исправление ошибок {#bug-fix-11} - -- Этот релиз также содержит все исправления безопасности ошибок от 19.11.9.52 и 19.11.10.54. -- Фиксированная гонка данных в `system.parts` стол и `ALTER` запрос. [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено несовпадение заголовков в потоках, произошедшее при чтении из пустой распределенной таблицы с sample и prewhere. [\#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([Лисян Цянь](https://github.com/fancyqlx)) [\#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлена ошибка при использовании `IN` предложение с подзапросом с кортежем. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([тавплубикс](https://github.com/tavplubix)) -- Исправьте случай с одинаковыми именами столбцов в `GLOBAL JOIN ON` раздел. [\#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка при приведении типов к `Decimal` это не поддерживает его. Вместо этого бросьте исключение. [\#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлен сбой в работе `extractAll()` функция. [\#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Артем Зуйков](https://github.com/4ertus2)) -- Преобразование запроса для `MySQL`, `ODBC`, `JDBC` функции таблицы теперь работают правильно для `SELECT WHERE` запросы с несколькими `AND` выражения. [\#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [\#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) -- Добавлены предыдущие проверки деклараций для интеграции MySQL 8. [\#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([Рафаэль Давид Тиноко](https://github.com/rafaeldtinoco)) - -#### Исправление безопасности {#security-fix-1} - -- Исправлены две уязвимости в кодеках на этапе декомпрессии (злоумышленник может сфабриковать сжатые данные, что приведет к переполнению буфера при декомпрессии). [\#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Артем Зуйков](https://github.com/4ertus2)) - -### ClickHouse релиз 19.13.3.26, 2019-08-22 {#clickhouse-release-19-13-3-26-2019-08-22} - -#### Исправление ошибок {#bug-fix-12} - -- Чинить `ALTER TABLE ... UPDATE` запрос для таблиц с `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([алесапин](https://github.com/alesapin)) -- Исправьте NPE при использовании предложения IN с подзапросом с кортежем. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена проблема, что если устаревшая реплика становится живой, она все еще может содержать части данных, которые были удалены разделом DROP. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена проблема с синтаксическим анализом CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена гонка данных в системе.таблица деталей и запрос ALTER. Это исправление [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен неправильный код в мутациях, которые могут привести к повреждению памяти. Исправлена обработка выхода онлайн / оффлайн чтения адреса `0x14c0` это может произойти из-за совпадения `DROP TABLE` и `SELECT` от `system.parts` или `system.parts_columns`. Фиксированное состояние расы при подготовке запросов мутаций. Исправлена тупиковая ситуация, вызванная `OPTIMIZE` реплицированных таблиц и параллельных операций модификации, таких как ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена возможная потеря данных после этого `ALTER DELETE` запрос на таблицу с пропущенным индексом. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Никита Васильев](https://github.com/nikvas0)) - -#### Исправление безопасности {#security-fix-2} - -- Если злоумышленник имеет доступ на запись в ZooKeeper и может запустить пользовательский сервер, доступный из сети, где работает ClickHouse, он может создать пользовательский вредоносный сервер, который будет действовать как реплика ClickHouse, и зарегистрировать его в ZooKeeper. Когда другая реплика будет извлекать часть данных из вредоносной реплики, она может заставить clickhouse-сервер выполнить запись в произвольный путь на файловой системе. Найдено Эльдаром Зайтовым, специалистом по информационной безопасности Яндекса. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.13.2.19, 2019-08-14 {#clickhouse-release-19-13-2-19-2019-08-14} - -#### Новая функция {#new-feature-5} - -- Профилировщик выборки на уровне запроса. [Пример](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [\#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([лаплаб](https://github.com/laplab)) [\#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) -- Разрешить указывать список столбцов с помощью `COLUMNS('regexp')` выражение, которое работает как более сложный вариант `*` звездочка. [\#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([мфриденталь](https://github.com/mfridental)), ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `CREATE TABLE AS table_function()` теперь возможный [\#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) -- Adam optimizer для стохастического градиентного спуска используется по умолчанию в `stochasticLinearRegression()` и `stochasticLogisticRegression()` агрегатные функции, потому что он показывает хорошее качество почти без какой-либо настройки. [\#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) -- Added functions for working with the сustom week number [\#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([Энди Янг](https://github.com/andyyzh)) -- `RENAME` запросы теперь работают со всеми хранилищами. [\#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([Иван](https://github.com/abyss7)) -- Теперь клиент получает журналы с сервера с любым желаемым уровнем настройки `send_logs_level` независимо от уровня журнала, указанного в настройках сервера. [\#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([Никита Михайлов](https://github.com/nikitamikhaylov)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-4} - -- Установка `input_format_defaults_for_omitted_fields` по умолчанию он включен. Вставки в распределенные таблицы требуют, чтобы этот параметр был одинаковым в кластере (его необходимо установить перед развертыванием обновления). Он позволяет вычислять сложные выражения по умолчанию для пропущенных полей в `JSONEachRow` и `CSV*` форматы. Это должно быть ожидаемое поведение, но может привести к незначительной разнице в производительности. [\#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Артем Зуйков](https://github.com/4ertus2)), [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([акузм](https://github.com/akuzm)) - -#### Экспериментальная возможность {#experimental-features} - -- Новый конвейер обработки запросов. Воспользуйся `experimental_use_processors=1` возможность включить его. Используй для своих собственных проблем. [\#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([Николай Кочетов](https://github.com/KochetovNicolai)) - -#### Исправление ошибок {#bug-fix-13} - -- Интеграция Кафки была исправлена в этой версии. -- Исправлено `DoubleDelta` кодирование данных `Int64` для больших `DoubleDelta` значения, улучшенные `DoubleDelta` кодирование случайных данных для `Int32`. [\#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([Василий Немков](https://github.com/Enmk)) -- Исправлена завышенная оценка стоимости `max_rows_to_read` если установка `merge_tree_uniform_read_distribution` имеет значение 0. [\#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшение {#improvement-4} - -- Создает исключение, если `config.d` файл не имеет соответствующего корневого элемента в качестве файла конфигурации [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) - -#### Улучшение производительности {#performance-improvement-3} - -- Оптимизировать `count()`. Теперь он использует самый маленький столбец (если это возможно). [\#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([Амос Птица](https://github.com/amosbird)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-5} - -- Отчет об использовании памяти в тестах производительности. [\#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([акузм](https://github.com/akuzm)) -- Исправление построения с внешним `libcxx` [\#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([Иван](https://github.com/abyss7)) -- Исправить общую сборку с помощью `rdkafka` библиотека [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Иван](https://github.com/abyss7)) - -## ClickHouse релиз 19.11 {#clickhouse-release-19-11} - -### ClickHouse релиз 19.11.13.74, 2019-11-01 {#clickhouse-release-19-11-13-74-2019-11-01} - -#### Исправление ошибок {#bug-fix-14} - -- Исправлена редкая авария в системе `ALTER MODIFY COLUMN` и вертикальное слияние, когда одна из Объединенных/измененных частей пуста (0 строк). [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([алесапин](https://github.com/alesapin)) -- Ручное обновление данных `SIMDJSON`. Это устраняет возможные наводнения в stderr файлы с поддельными диагностическими сообщениями в формате JSON. [\#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Александр казаков](https://github.com/Akazz)) -- Исправлена ошибка с `mrk` расширение файла для мутаций ([алесапин](https://github.com/alesapin)) - -### ClickHouse релиз 19.11.12.69, 2019-10-02 {#clickhouse-release-19-11-12-69-2019-10-02} - -#### Исправление ошибок {#bug-fix-15} - -- Исправлено снижение производительности индексного анализа по сложным ключам на больших таблицах. Это исправление [\#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Избегайте редких SIGSEGV при отправке данных в таблицах с распределенным движком (`Failed to send batch: file with index XXXXX is absent`). [\#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Азат Хужин](https://github.com/azat)) -- Чинить `Unknown identifier` с несколькими соединениями. Это исправление [\#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Артем Зуйков](https://github.com/4ertus2)) - -### ClickHouse релиз 19.11.11.57, 2019-09-13 {#clickhouse-release-19-11-11-57-2019-09-13} - -- Исправлена логическая ошибка, вызывающая segfaults при выборе из Кафки пустой темы. [\#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Иван](https://github.com/abyss7)) -- Исправление для функции `АrrayEnumerateUniqRanked` с пустыми массивами в парах. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) - -### ClickHouse релиз 19.11.10.54, 2019-09-10 {#clickhouse-release-19-11-10-54-2019-09-10} - -#### Исправление ошибок {#bug-fix-16} - -- Сохраняйте смещения для сообщений Кафки вручную, чтобы иметь возможность фиксировать их все сразу для всех разделов. Исправляет потенциальное дублирование в «one consumer - many partitions» сценарий. [\#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([Иван](https://github.com/abyss7)) - -### ClickHouse релиз 19.11.9.52, 2019-09-6 {#clickhouse-release-19-11-9-52-2019-09-6} - -- Улучшена обработка ошибок в словарях кэша. [\#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправлена ошибка в функции `arrayEnumerateUniqRanked`. [\#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) -- Чинить `JSONExtract` функция при извлечении `Tuple` из JSON. [\#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправлена возможная потеря данных после этого `ALTER DELETE` запрос на таблицу с пропущенным индексом. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Никита Васильев](https://github.com/nikvas0)) -- Исправлена проверка производительности. [\#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Паркет: исправьте чтение логических столбцов. [\#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено неправильное поведение `nullIf` функция для постоянных аргументов. [\#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([Гийом Тассери](https://github.com/YiuRULE)) [\#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена проблема дублирования сообщений Кафки при обычном перезапуске сервера. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Иван](https://github.com/abyss7)) -- Исправлена ошибка, когда долго `ALTER UPDATE` или `ALTER DELETE` может помешать запуску регулярных слияний. Предотвратите выполнение мутаций, если нет достаточного количества свободных потоков. [\#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [\#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена ошибка при обработке данных «timezone» в файле конфигурации сервера. [\#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте тесты Кафки. [\#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([Иван](https://github.com/abyss7)) - -#### Исправление безопасности {#security-fix-3} - -- Если злоумышленник имеет доступ на запись в ZooKeeper и может запустить пользовательский сервер, доступный из сети, где работает ClickHouse, он может создать пользовательский вредоносный сервер, который будет действовать как реплика ClickHouse, и зарегистрировать его в ZooKeeper. Когда другая реплика будет извлекать часть данных из вредоносной реплики, она может заставить clickhouse-сервер выполнить запись в произвольный путь на файловой системе. Найдено Эльдаром Зайтовым, специалистом по информационной безопасности Яндекса. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.11.8.46, 2019-08-22 {#clickhouse-release-19-11-8-46-2019-08-22} - -#### Исправление ошибок {#bug-fix-17} - -- Чинить `ALTER TABLE ... UPDATE` запрос для таблиц с `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([алесапин](https://github.com/alesapin)) -- Исправьте NPE при использовании предложения IN с подзапросом с кортежем. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена проблема, что если устаревшая реплика становится живой, она все еще может содержать части данных, которые были удалены разделом DROP. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена проблема с синтаксическим анализом CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([тавплубикс](https://github.com/tavplubix)) -- Исправлена гонка данных в системе.таблица деталей и запрос ALTER. Это исправление [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен неправильный код в мутациях, которые могут привести к повреждению памяти. Исправлена обработка выхода онлайн / оффлайн чтения адреса `0x14c0` это может произойти из-за совпадения `DROP TABLE` и `SELECT` от `system.parts` или `system.parts_columns`. Фиксированное состояние расы при подготовке запросов мутаций. Исправлена тупиковая ситуация, вызванная `OPTIMIZE` реплицированных таблиц и параллельных операций модификации, таких как ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.11.7.40, 2019-08-14 {#clickhouse-release-19-11-7-40-2019-08-14} - -#### Исправление ошибок {#bug-fix-18} - -- Интеграция Кафки была исправлена в этой версии. -- Исправлена обработка выхода онлайн / оффлайн при использовании `arrayReduce` для постоянных споров. [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено `toFloat()` монотонность. [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -- Исправлена обработка выхода онлайн / оффлайн с поддержкой `optimize_skip_unused_shards` и пропал ключ от осколков. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) -- Фиксированная логика работы `arrayEnumerateUniqRanked` функция. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Удалено дополнительное подробное ведение журнала из обработчика MySQL. [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить неправильное поведение и возможные вылеты в `topK` и `topKWeighted` агрегированные функции. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) -- Не выставляйте виртуальные столбцы в `system.columns` стол. Это необходимо для обратной совместимости. [\#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка с выделением памяти для строковых полей в сложном словаре кэша ключей. [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка с включением адаптивной детализации при создании новой реплики для `Replicated*MergeTree` стол. [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([алесапин](https://github.com/alesapin)) -- Исправьте бесконечный цикл при чтении сообщений Кафки. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) -- Исправлена возможность готовых запрос, чтобы вызвать падение сервера из-за переполнения стека в парсер SQL и возможность переполнения стека в `Merge` и `Distributed` таблицы [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка кодирования горилл на небольших последовательностях. [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) - -#### Улучшение {#improvement-5} - -- Позволяет пользователю переопределить `poll_interval` и `idle_connection_timeout` настройки при подключении. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.11.5.28, 2019-08-05 {#clickhouse-release-19-11-5-28-2019-08-05} - -#### Исправление ошибок {#bug-fix-19} - -- Исправлена возможность зависания запросов при перегрузке сервера. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте FPE в функции yandexConsistentHash. Это исправление [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка в преобразовании `LowCardinality` напечатать `AggregateFunctionFactory`. Это исправление [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправьте синтаксический анализ `bool` настройки от `true` и `false` строки в файлах конфигурации. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([алесапин](https://github.com/alesapin)) -- Исправлена редкая ошибка с несовместимыми заголовками потока в запросах к `Distributed` стол `MergeTree` таблица, когда часть `WHERE` движется к `PREWHERE`. [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([алесапин](https://github.com/alesapin)) -- Исправлено переполнение при целочисленном делении знакового типа на беззнаковый. Это исправление [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-5} - -- `Kafka` все еще сломанный. - -### ClickHouse релиз 19.11.4.24, 2019-08-01 {#clickhouse-release-19-11-4-24-2019-08-01} - -#### Исправление ошибок {#bug-fix-20} - -- Исправлена ошибка с написанием вторичных индексных меток с адаптивной детализацией. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([алесапин](https://github.com/alesapin)) -- Чинить `WITH ROLLUP` и `WITH CUBE` модификаторы `GROUP BY` с двухуровневой агрегацией. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлено зависание `JSONExtractRaw` функция. Исправлено [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка segfault в ExternalLoader:: reloadOutdated (). [\#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправлен случай, когда сервер может закрыть прослушивающие сокеты, но не выключаться и продолжать обслуживать оставшиеся запросы. В конечном итоге вы можете получить два запущенных процесса clickhouse-server. Иногда сервер может выдать ошибку `bad_function_call` для остальных запросов. [\#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено бесполезное и некорректное условие на поле update для начальной загрузки внешних словарей через ODBC, MySQL, ClickHouse и HTTP. Это исправление [\#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [\#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено неуместное исключение в приведении `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Зафиксировать недетерминированный результат «uniq» агрегатная функция в крайне редких случаях. Ошибка присутствовала во всех версиях ClickHouse. [\#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Обработка выхода онлайн / оффлайн, когда мы немного завышены запись на функцию `IPv6CIDRToRange`. [\#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([Гийом Тассери](https://github.com/YiuRULE)) -- Исправлена небольшая утечка памяти, когда сервер выбрасывает много исключений из многих различных контекстов. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте ситуацию, когда потребитель сделал паузу перед подпиской и не возобновил ее после этого. [\#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([Иван](https://github.com/abyss7)) Обратите внимание, что Кафка разбит в этой версии. -- Очистка буфера данных Кафки от предыдущей операции чтения, которая была завершена с ошибкой [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Николай](https://github.com/bopohaa)) Обратите внимание, что Кафка разбит в этой версии. -- С `StorageMergeTree::background_task_handle` инициализируется в `startup()` то `MergeTreeBlockOutputStream::write()` возможно, вы попытаетесь использовать его перед инициализацией. Просто проверьте, инициализирован ли он. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Иван](https://github.com/abyss7)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-6} - -- Добавлено официальное лицо `rpm` пакеты. [\#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([алесапин](https://github.com/alesapin)) -- Добавьте возможность строить `.rpm` и `.tgz` пакеты с `packager` скрипт. [\#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([алесапин](https://github.com/alesapin)) -- Исправления для «Arcadia» система сборки. [\#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-6} - -- `Kafka` сломан в этой версии. - -### ClickHouse релиз 19.11.3.11, 2019-07-18 {#clickhouse-release-19-11-3-11-2019-07-18} - -#### Новая функция {#new-feature-6} - -- Добавлена поддержка подготовленных заявлений. [\#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([Александр](https://github.com/sanych73)) [\#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `DoubleDelta` и `Gorilla` кодеки столбцов [\#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([Василий Немков](https://github.com/Enmk)) -- Добавлен `os_thread_priority` настройка, позволяющая контролировать «nice» значение потоков обработки запросов, используемых ОС для настройки приоритета динамического планирования. Для этого требуется `CAP_SYS_NICE` возможности для работы. Это орудия труда [\#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [\#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Осуществлять `_topic`, `_offset`, `_key` колонны для двигателя Кафки [\#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([Иван](https://github.com/abyss7)) Обратите внимание, что Кафка разбит в этой версии. -- Добавить комбинатор агрегатных функций `-Resample` [\#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) -- Статистическая функция `groupArrayMovingSum(win_size)(x)` и `groupArrayMovingAvg(win_size)(x)`, которые вычисляют движущуюся сумму / среднее значение с ограничением размера окна или без него. [\#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) -- Добавить синоним `arrayFlatten` \<-\> `flatten` [\#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) -- Функция сделала из intergate Н3 `geoToH3` от Uber. [\#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Ремен Иван](https://github.com/BHYCHIK)) [\#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Исправление ошибок {#bug-fix-21} - -- Реализуйте кэш DNS с асинхронным обновлением. Отдельный поток разрешает все хосты и обновляет кэш DNS с периодом (настройка `dns_cache_update_period`). Это должно помочь, когда ip хостов часто меняется. [\#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена обработка выхода онлайн / оффлайн в `Delta` кодек, который влияет на столбцы со значениями размером менее 32 бит. Ошибка привела к случайному повреждению памяти. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([алесапин](https://github.com/alesapin)) -- Исправлена обработка выхода онлайн / оффлайн в TTL слиться с не-физической столбцов в блоке. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена редкая ошибка при проверке деталей с помощью `LowCardinality` колонка. Ранее `checkDataPart` всегда терпит неудачу при расставании с `LowCardinality` колонка. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([алесапин](https://github.com/alesapin)) -- Избегайте зависания соединений, когда пул потоков сервера заполнен. Это важно для соединений от `remote` табличная функция или соединения с сегментом без реплик при длительном таймауте соединения. Это исправление [\#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [\#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Поддержка постоянных аргументов для того, чтобы `evalMLModel` функция. Это исправление [\#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, когда ClickHouse определяет часовой пояс по умолчанию как `UCT` вместо `UTC`. Это исправление [\#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Фиксированный нижний поток буфера в `visitParamExtractRaw`. Это исправление [\#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [\#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Сейчас распространены `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` запросы будут выполняться непосредственно на реплике лидера. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([алесапин](https://github.com/alesapin)) -- Чинить `coalesce` для `ColumnConst` с `ColumnNullable` + соответствующие изменения. [\#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправьте это `ReadBufferFromKafkaConsumer` так что он продолжает читать новые сообщения после этого `commit()` даже если он был остановлен раньше [\#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([Иван](https://github.com/abyss7)) -- Чинить `FULL` и `RIGHT` Результаты соединения при присоединении на `Nullable` ключи в правой таблице. [\#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Артем Зуйков](https://github.com/4ertus2)) -- Возможно исправление бесконечного сна низкоприоритетных запросов. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено состояние гонки, которое приводит к тому, что некоторые запросы могут не отображаться в query\_log после `SYSTEM FLUSH LOGS` запрос. [\#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлено `heap-use-after-free` Предупреждение ASan в ClusterCopier вызвано часами, которые пытаются использовать уже удаленный объект copier. [\#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлена ошибка `StringRef` указатель, возвращаемый некоторыми реализациями `IColumn::deserializeAndInsertFromArena`. Эта ошибка затронула только модульные тесты. [\#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Не допускайте соединения исходного и промежуточного массива со столбцами маскировки одноименных столбцов. [\#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка вставки и выбора запроса к движку MySQL с цитированием идентификатора стиля MySQL. [\#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Зимний Чжан](https://github.com/zhang2014)) -- Сейчас `CHECK TABLE` запрос может работать с семейством движков MergeTree. Он возвращает статус проверки и сообщение, если таковые имеются для каждой детали (или файла в случае более простых движков). Кроме того, Исправлена ошибка в извлечении сломанной детали. [\#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([алесапин](https://github.com/alesapin)) -- Исправлена среда выполнения SPLIT\_SHARED\_LIBRARIES [\#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Данила Кутенин](https://github.com/danlark1)) -- Инициализация фиксированного часового пояса когда `/etc/localtime` это относительная ссылка, как `../usr/share/zoneinfo/Europe/Moscow` [\#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- clickhouse-копир: исправлена использования после освобождения при завершении работы [\#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) -- Обновленный `simdjson`. Исправлена проблема, из-за которой некоторые недопустимые JSONs с нулевыми байтами успешно разбирались. [\#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено отключение системных журналов [\#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Антон Попов](https://github.com/CurtizJ)) -- Исправьте зависание, когда условие в invalidate\_query зависит от словаря. [\#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([Виталий Баранов](https://github.com/vitlibar)) - -#### Улучшение {#improvement-6} - -- Разрешить неразрешимые адреса в конфигурации кластера. Они будут считаться недоступными и пытаться разрешить их при каждой попытке подключения. Это особенно полезно для Kubernetes. Это исправление [\#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [\#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Закройте неработающие TCP-соединения (по умолчанию с таймаутом в один час). Это особенно важно для больших кластеров с несколькими распределенными таблицами на каждом сервере, поскольку каждый сервер может содержать пул соединений с каждым другим сервером, и после пикового параллелизма запросов соединения будут останавливаться. Это исправление [\#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [\#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Более лучшее качество `topK` функция. Изменено поведение набора SavingSpace для удаления последнего элемента, если новый элемент имеет больший вес. [\#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [\#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([Гийом Тассери](https://github.com/YiuRULE)) -- Функции URL для работы с доменами теперь могут работать для неполных url без схемы [\#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([алесапин](https://github.com/alesapin)) -- Контрольные суммы, добавленные к `system.parts_columns` стол. [\#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([Никита Михайлов](https://github.com/nikitamikhaylov)) -- Добавлен `Enum` тип данных как синоним для `Enum8` или `Enum16`. [\#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) -- Полный вариант транспонирования битов для `T64` кодек. Может привести к лучшему сжатию с помощью `zstd`. [\#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Артем Зуйков](https://github.com/4ertus2)) -- Состояние на `startsWith` функция теперь может использовать первичный ключ. Это исправление [\#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) и [\#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [\#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) -- Разрешить использовать `clickhouse-copier` с перекрестной репликацией кластерной топологии, разрешив пустое имя базы данных. [\#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([nvartolomei](https://github.com/nvartolomei)) -- Воспользуйся `UTC` как часовой пояс по умолчанию в системе без `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` была напечатана, и сервер или клиент отказались запускаться. [\#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Возвращенная назад поддержка аргумента с плавающей запятой в функции `quantileTiming` для обратной совместимости. [\#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Показать, в какой таблице отсутствует столбец в сообщениях об ошибках. [\#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([Иван](https://github.com/abyss7)) -- Запретить выполнение запроса с одинаковым идентификатором query\_id разными пользователями [\#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) -- Более надежный код для отправки метрик в графит. Он будет работать даже во время длительного многократного использования `RENAME TABLE` операция. [\#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Более информативные сообщения об ошибках будут отображаться, когда ThreadPool не может запланировать выполнение задачи. Это исправление [\#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [\#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Инвертирование ngramSearch, чтобы быть более интуитивным [\#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Данила Кутенин](https://github.com/danlark1)) -- Добавить пользователя parsing в HDFS engine builder [\#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([аконяев90](https://github.com/akonyaev90)) -- Обновить значение по умолчанию `max_ast_elements parameter` [\#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Артем Коновалов](https://github.com/izebit)) -- Добавлено понятие устаревших настроек. Устаревшая настройка `allow_experimental_low_cardinality_type` может использоваться без какого-либо эффекта. [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Алексей Миловидов](https://github.com/alexey-milovidov) - -#### Улучшение производительности {#performance-improvement-4} - -- Увеличьте количество потоков для выбора из таблицы слияния для более равномерного распределения потоков. Добавлена настройка `max_streams_multiplier_for_merge_tables`. Это исправление [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-7} - -- Добавьте тест обратной совместимости для взаимодействия клиент-сервер с различными версиями clickhouse. [\#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([алесапин](https://github.com/alesapin)) -- Проверьте информацию о покрытии в каждом запросе фиксации и вытягивания. [\#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([алесапин](https://github.com/alesapin)) -- Сотрудничайте с address sanitizer для поддержки наших пользовательских распределителей (`Arena` и `ArenaWithFreeLists`) для лучшей отладки «use-after-free» ошибки. [\#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([акузм](https://github.com/akuzm)) -- Переключитесь на [Реализация LLVM libunwind](https://github.com/llvm-mirror/libunwind) для обработки исключений C++ и для печати трассировок стека [\#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([Никита Лапков](https://github.com/laplab)) -- Добавьте еще два предупреждения от -Weverything [\#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Разрешите построить ClickHouse с дезинфицирующим средством для памяти. [\#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен отчет утилиты о `bitTest` функция в тест. [\#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Настройки: добавлена возможность инициализации экземпляра ClickHouse, который требует проверки подлинности. [\#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([Корвяков Андрей Николаевич](https://github.com/shurshun)) -- Librdkafka обновление до версии 1.1.0 [\#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([Иван](https://github.com/abyss7)) -- Добавьте глобальный тайм-аут для интеграционных тестов и отключите некоторые из них в коде тестов. [\#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([алесапин](https://github.com/alesapin)) -- Исправьте некоторые сбои ThreadSanitizer. [\#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([акузм](https://github.com/akuzm)) -- То `--no-undefined` опция заставляет компоновщика проверять все внешние имена на наличие во время связывания. Очень полезно отслеживать реальные зависимости между библиотеками в режиме разделенной сборки. [\#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([Иван](https://github.com/abyss7)) -- Добавлен тест производительности для [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена совместимость с gcc-7. [\#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена поддержка gcc-9. Это исправление [\#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [\#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, когда libunwind может быть связан неправильно. [\#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено несколько предупреждений, найденных PVS-Studio. [\#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена начальная поддержка для `clang-tidy` статический анализатор. [\#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Преобразование макросов BSD / Linux endian( ‘be64toh’ и ‘htobe64’) к эквивалентам Mac OS X [\#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([Фу Чен](https://github.com/fredchenbj)) -- Улучшенное руководство по интеграционным тестам. [\#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Владимир Чеботарев](https://github.com/excitoon)) -- Исправление сборки на macosx + gcc9 [\#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([Филимонов](https://github.com/filimonov)) -- Исправьте трудноуловимую опечатку: aggreAGte - \> aggregate. [\#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([акузм](https://github.com/akuzm)) -- Исправлена сборка freebsd [\#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) -- Добавить ссылку на экспериментальный канал YouTube на сайт [\#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([Иван Блинков](https://github.com/blinkov)) -- CMake: добавить опцию для флагов покрытия: WITH\_COVERAGE [\#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) -- Исправьте начальный размер некоторых встроенных подарков. [\#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([акузм](https://github.com/akuzm)) -- clickhouse-сервер.postinst: исправлено обнаружение ОС для centos 6 [\#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) -- Добавлена генерация пакетов Arch linux. [\#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Владимир Чеботарев](https://github.com/excitoon)) -- Разделите Common / config.ч по библиотекам (СУБД) [\#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) -- Исправления для «Arcadia» построить платформу [\#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) -- Исправления для нетрадиционной сборки (gcc9, без подмодулей) [\#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) -- Требуется явный тип в unalignedStore, потому что было доказано, что он подвержен ошибкам [\#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([акузм](https://github.com/akuzm)) -- Исправлена сборка MacOS [\#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([Филимонов](https://github.com/filimonov)) -- Тест производительности относительно новой функции JIT с большим набором данных, как это было запрошено здесь [\#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [\#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([Гийом Тассери](https://github.com/YiuRULE)) -- Запуск статических тестов в стресс-тесте [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([алесапин](https://github.com/alesapin)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-7} - -- `Kafka` сломан в этой версии. -- Включить `adaptive_index_granularity` = 10 МБ по умолчанию для новых `MergeTree` таблицы. Если вы создали новые таблицы MergeTree на версии 19.11+, понижение рейтинга до версий до 19.6 будет невозможно. [\#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([алесапин](https://github.com/alesapin)) -- Удалены устаревшие недокументированные встроенные словари, которые использовались Яндексом.Метрика. Функция `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` они больше не доступны. Если вы используете эти функции, напишите письмо по адресу clickhouse-feedback@yandex-team.com Примечание: в последний момент мы решили сохранить эти функции на некоторое время. [\#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -## ClickHouse релиз 19.10 {#clickhouse-release-19-10} - -### ClickHouse релиз 19.10.1.5, 2019-07-12 {#clickhouse-release-19-10-1-5-2019-07-12} - -#### Новая функция {#new-feature-7} - -- Добавить новый кодек столбца: `T64`. Сделано для(U)столбцов IntX/EnumX/Data (Time)/DecimalX. Это должно быть хорошо для столбцов с постоянными или малыми значениями диапазона. Сам кодек позволяет увеличить или уменьшить тип данных без повторного сжатия. [\#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавление СУБД `MySQL` что позволяет просматривать все таблицы на удаленном сервере MySQL [\#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([Зимний Чжан](https://github.com/zhang2014)) -- `bitmapContains` реализация. Это в 2 раза быстрее, чем `bitmapHasAny` если второе растровое изображение содержит один элемент. [\#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([Zhichang Ю](https://github.com/yuzhichang)) -- Поддержка `crc32` функция (с поведением точно таким же, как в MySQL или PHP). Не используйте его, если вам нужна хэш-функция. [\#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Ремен Иван](https://github.com/BHYCHIK)) -- Реализованный `SYSTEM START/STOP DISTRIBUTED SENDS` запросы для управления асинхронными вставками в `Distributed` таблицы. [\#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([Зимний Чжан](https://github.com/zhang2014)) - -#### Исправление ошибок {#bug-fix-22} - -- Игнорируйте ограничения на выполнение запросов и максимальный размер частей для ограничений слияния при выполнении мутаций. [\#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([Антон Попов](https://github.com/CurtizJ)) -- Исправлена ошибка, которая может привести к дедупликации обычных блоков (крайне редко) и вставке дубликатов блоков (чаще). [\#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([алесапин](https://github.com/alesapin)) -- Исправление функции `arrayEnumerateUniqRanked` для Аргументов с пустыми массивами [\#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) -- Не Подписывайтесь на темы Кафки без намерения опросить какие-либо сообщения. [\#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([Иван](https://github.com/abyss7)) -- Сделать настройку `join_use_nulls` не получите никакого эффекта для типов, которые не могут быть внутри Nullable [\#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Исправлено `Incorrect size of index granularity` ошибки [\#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([коракстер](https://github.com/coraxster)) -- Фиксировать поплавок в десятичные преобразования переполнения [\#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([коракстер](https://github.com/coraxster)) -- Смыть буфер, когда `WriteBufferFromHDFS`- деструктор называется. Это исправляет запись в `HDFS`. [\#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([Синьдун Пэн](https://github.com/eejoin)) - -#### Улучшение {#improvement-7} - -- Обработать пустые ячейки в `CSV` в качестве значений по умолчанию при настройке `input_format_defaults_for_omitted_fields` это включено. [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([акузм](https://github.com/akuzm)) -- Неблокирующая загрузка внешних словарей. [\#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([Виталий Баранов](https://github.com/vitlibar)) -- Тайм-ауты сети могут быть динамически изменены для уже установленных соединений в соответствии с настройками. [\#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Константин Подшумок](https://github.com/podshumok)) -- С помощью «public\_suffix\_list» для функций `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. Он использует идеальную хэш-таблицу, сгенерированную `gperf` с помощью списка, сгенерированного из файла: https://publicsuffix.org/list/public\_suffix\_list.dat (например, теперь мы признаем домен `ac.uk` как несущественные). [\#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([Гийом Тассери](https://github.com/YiuRULE)) -- Усыновленный `IPv6` тип данных в системных таблицах; унифицированные столбцы информации о клиенте в `system.processes` и `system.query_log` [\#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Использование сеансов для соединений с протоколом совместимости MySQL. \#5476 [\#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([Юрий Баранов](https://github.com/yurriy)) -- Поддержка более `ALTER` запросы `ON CLUSTER`. [\#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [\#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([сундили](https://github.com/sundy-li)) -- Поддержка `` раздел в `clickhouse-local` конфигурационный файл. [\#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) -- Разрешить выполнение запроса с помощью `remote` функция таблицы в `clickhouse-local` [\#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) - -#### Улучшение производительности {#performance-improvement-5} - -- Добавьте возможность написать окончательную отметку в конце столбцов MergeTree. Это позволяет избежать бесполезного считывания ключей, находящихся вне диапазона табличных данных. Он включается только в том случае, если используется адаптивная детализация индекса. [\#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([алесапин](https://github.com/alesapin)) -- Улучшена производительность таблиц MergeTree на очень медленных файловых системах за счет уменьшения количества `stat` системных вызовов. [\#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено снижение производительности при чтении из таблиц MergeTree, которое было введено в версии 19.6. Исправления №5631. [\#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-8} - -- Реализованный `TestKeeper` в качестве реализации интерфейса ZooKeeper используется для тестирования [\#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) ([Левушкин Алексей](https://github.com/alexey-milovidov)) -- Отныне `.sql` тесты могут выполняться изолированно сервером, параллельно, со случайной базой данных. Это позволяет запускать их быстрее, добавлять новые тесты с пользовательскими конфигурациями серверов и быть уверенным, что различные тесты не влияют друг на друга. [\#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([Иван](https://github.com/abyss7)) -- Удалять `` и `` из тестов производительности [\#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Исправлено «select\_format» тест производительности для `Pretty` форматы [\#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -## ClickHouse релиз 19.9 {#clickhouse-release-19-9} - -### ClickHouse релиз 19.9.3.31, 2019-07-05 {#clickhouse-release-19-9-3-31-2019-07-05} - -#### Исправление ошибок {#bug-fix-23} - -- Исправлена ошибка segfault в Дельта-кодеке, которая влияет на столбцы со значениями размером менее 32 бит. Ошибка привела к случайному повреждению памяти. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([алесапин](https://github.com/alesapin)) -- Исправлена редкая ошибка в проверке детали с колонкой LowCardinality. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([алесапин](https://github.com/alesapin)) -- Исправлена обработка выхода онлайн / оффлайн в TTL слиться с не-физической столбцов в блоке. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Антон Попов](https://github.com/CurtizJ)) -- Исправьте потенциальный бесконечный спящий режим низкоприоритетных запросов. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить, как ClickHouse определяет часовой пояс по умолчанию, как СРТ, а не мирового. [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, связанная с выполнением распределенного DROP/ALTER/TRUNCATE/OPTIMIZE в кластерных запросах на реплику последователя перед репликой лидера. Теперь они будут выполняться непосредственно на реплике лидера. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([алесапин](https://github.com/alesapin)) -- Исправлено состояние гонки, которое приводит к тому, что некоторые запросы могут не отображаться в query\_log сразу же после запроса SYSTEM FLUSH LOGS. [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Антон Попов](https://github.com/CurtizJ)) -- Добавлена отсутствующая поддержка постоянных аргументов для `evalMLModel` функция. [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.9.2.4, 2019-06-24 {#clickhouse-release-19-9-2-4-2019-06-24} - -#### Новая функция {#new-feature-8} - -- Печать информации о замороженных деталях в `system.parts` стол. [\#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) -- Ask client password on clickhouse-запуск клиента на tty, если он не задан в аргументах [\#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) -- Осуществлять `dictGet` и `dictGetOrDefault` функции для десятичных типов. [\#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Улучшение {#improvement-8} - -- Инициализации в Debian: добавить службу ожидания [\#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) -- Добавление параметра запрещено по умолчанию, чтобы создать таблицу с подозрительных типов для LowCardinality [\#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Регрессионные функции возвращают веса модели, если они не используются в качестве состояния в функции `evalMLMethod`. [\#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) -- Переименуйте и улучшите методы регрессии. [\#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) -- Более четкие интерфейсы поиска строк. [\#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Данила Кутенин](https://github.com/danlark1)) - -#### Исправление ошибок {#bug-fix-24} - -- Исправить потенциальную потерю данных в Kafka [\#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([Иван](https://github.com/abyss7)) -- Исправьте потенциальную бесконечную петлю в `PrettySpace` форматирование при вызове с нулевыми столбцами [\#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Исправлена ошибка переполнения UInt32 в линейных моделях. Разрешить eval ML-модель для аргумента неконстантной модели. [\#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- `ALTER TABLE ... DROP INDEX IF EXISTS ...` не следует вызывать исключение, если указанный индекс не существует [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Глеб Новиков](https://github.com/NanoBjorn)) -- Исправлена обработка выхода онлайн / оффлайн с `bitmapHasAny` в скалярном подзапросе [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Ю](https://github.com/yuzhichang)) -- Исправлена ошибка, когда пул соединений репликации не повторяет попытку разрешения узла, даже если кэш DNS был удален. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([алесапин](https://github.com/alesapin)) -- Исправлено `ALTER ... MODIFY TTL` на Реплицированном Mergetree. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([Антон Попов](https://github.com/CurtizJ)) -- Фиксированная вставка в распределенную таблицу с материализованной колонкой [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Азат Хужин](https://github.com/azat)) -- Исправление плохой запас, когда усекают присоединиться хранения [\#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) -- В последних версиях пакета tzdata некоторые файлы теперь являются символическими ссылками. Текущий механизм обнаружения часового пояса по умолчанию нарушается и дает неверные имена для некоторых часовых поясов. Теперь, по крайней мере, мы заставим имя часового пояса к содержимому TZ, если оно будет предоставлено. [\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Иван](https://github.com/abyss7)) -- Исправьте некоторые крайне редкие случаи с Многовольницким поисковиком, когда постоянные иглы в сумме имеют длину не менее 16 КБ. Алгоритм пропустил или переписал предыдущие результаты, что может привести к неправильному результату работы алгоритма. `multiSearchAny`. [\#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Данила Кутенин](https://github.com/danlark1)) -- Исправлена проблема, когда настройки для запросов ExternalData не могли использовать параметры ClickHouse. Кроме того, на данный момент настройки `date_time_input_format` и `low_cardinality_allow_in_native_format` не может использоваться из-за неоднозначности имен (во внешних данных это может быть интерпретировано как формат таблицы, а в запросе-как настройка). [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Данила Кутенин](https://github.com/danlark1)) -- Исправлена ошибка, когда детали удалялись только из FS, не сбрасывая их из Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([алесапин](https://github.com/alesapin)) -- Удалить ведение журнала отладки из протокола MySQL [\#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Пропустить ZNONODE во время обработки DDL запроса [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Азат Хужин](https://github.com/azat)) -- Фикс микс `UNION ALL` тип столбца результатов. Были случаи с несогласованными данными и типами столбцов результирующих столбцов. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Артем Зуйков](https://github.com/4ertus2)) -- Бросьте исключение на неправильные целые числа в `dictGetT` функции вместо сбоя. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка element\_count и load\_factor для хэшированного словаря в `system.dictionaries` стол. [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Азат Хужин](https://github.com/azat)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-9} - -- Исправлена сборка без `Brotli` Поддержка сжатия HTTP (`ENABLE_BROTLI=OFF` переменная cmake). [\#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Антон Южанинов](https://github.com/citrin)) -- Включая рев.ч как рев/рев.х [\#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Оривей Деш](https://github.com/orivej)) -- Исправьте предупреждения gcc9 в hyperscan (директива \# line-это зло!) [\#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Данила Кутенин](https://github.com/danlark1)) -- Исправьте все предупреждения при компиляции с gcc-9. Исправлены некоторые проблемы ВНО. Исправьте GCC9 ICE и отправьте его в bugzilla. [\#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Данила Кутенин](https://github.com/danlark1)) -- Фиксированная связь с LLD [\#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Удаление неиспользуемых специализаций в словарях [\#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Артем Зуйков](https://github.com/4ertus2)) -- Улучшение тестов производительности для форматирования и синтаксического анализа таблиц для различных типов файлов [\#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Ольга Хвостикова](https://github.com/stavrolia)) -- Исправления для параллельного выполнения тестов [\#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) -- Docker: используйте конфигурации из clickhouse-test [\#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) -- Исправлена компиляция для FreeBSD [\#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) -- Повышение уровня обновления до 1.70 [\#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) -- Исправлена сборка clickhouse как подмодуля [\#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) -- Улучшение тестов производительности JSONExtract [\#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([Виталий Баранов](https://github.com/vitlibar)) - -## ClickHouse релиз 19.8 {#clickhouse-release-19-8} - -### ClickHouse релиз 19.8.3.8, 2019-06-11 {#clickhouse-release-19-8-3-8-2019-06-11} - -#### Новые средства {#new-features} - -- Добавлены функции для работы с JSON [\#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [\#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([Виталий Баранов](https://github.com/vitlibar)) -- Добавьте функцию basename с аналогичным поведением к функции basename, которая существует во многих языках (`os.path.basename` в Python, `basename` in PHP, etc…). Work with both an UNIX-like path or a Windows path. [\#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([Гийом Тассери](https://github.com/YiuRULE)) -- Добавлен `LIMIT n, m BY` или `LIMIT m OFFSET n BY` синтаксис для задания смещения N для ограничения на предложение. [\#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([Антон Попов](https://github.com/CurtizJ)) -- Добавлен новый тип данных `SimpleAggregateFunction`, что позволяет иметь столбцы с легкой агрегацией в виде `AggregatingMergeTree`. Это может быть использовано только с простыми функциями, такими как `any`, `anyLast`, `sum`, `min`, `max`. [\#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Борис Гранво](https://github.com/bgranvea)) -- Добавлена поддержка непостоянных аргументов в функции `ngramDistance` [\#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Данила Кутенин](https://github.com/danlark1)) -- Добавленные функции `skewPop`, `skewSamp`, `kurtPop` и `kurtSamp` для вычисления асимметрии последовательности, асимметрии образца, эксцесса и эксцесса образца соответственно. [\#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) -- Поддержка переименования операции для `MaterializeView` место хранения. [\#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([Гийом Тассери](https://github.com/YiuRULE)) -- Добавлен сервер, который позволяет подключаться к ClickHouse с помощью клиента MySQL. [\#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([Юрий Баранов](https://github.com/yurriy)) -- Добавь `toDecimal*OrZero` и `toDecimal*OrNull` функции. [\#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Артем Зуйков](https://github.com/4ertus2)) -- Поддержка десятичных типов в функциях: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted`, medianExactWeighted. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавлен `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [\#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Данила Кутенин](https://github.com/danlark1)) -- Добавлен `format` функция. Форматирование константы pattern (упрощенный шаблон формата Python) со строками, перечисленными в аргументах. [\#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Данила Кутенин](https://github.com/danlark1)) -- Добавлен `system.detached_parts` таблица, содержащая информацию об отсоединенных частях `MergeTree` таблицы. [\#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([акузм](https://github.com/akuzm)) -- Добавлен `ngramSearch` функция для вычисления несимметричной разности между иглой и стогом сена. [\#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[\#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Данила Кутенин](https://github.com/danlark1)) -- Реализация основных методов машинного обучения (стохастическая линейная регрессия и логистическая регрессия) с использованием интерфейса агрегатных функций. Имеет различные стратегии обновления весов моделей (простой градиентный спуск, метод импульса, метод Нестерова). Также поддерживаются мини-пакеты нестандартного размера. [\#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) -- Реализация проекта `geohashEncode` и `geohashDecode` функции. [\#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([Василий Немков](https://github.com/Enmk)) -- Добавлена статистическая функция `timeSeriesGroupSum`, который может агрегировать различные временные ряды, которые выборка временных меток не выравнивается. Он будет использовать линейную интерполяцию между двумя временными метками выборки, а затем суммировать временные ряды вместе. Добавлена статистическая функция `timeSeriesGroupRateSum`, который вычисляет скорость временных рядов, а затем суммирует ставки вместе. [\#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([Янкуань Лю](https://github.com/LiuYangkuan)) -- Добавленные функции `IPv4CIDRtoIPv4Range` и `IPv6CIDRtoIPv6Range` рассчитать нижний и верхний пределы для IP в подсети с использованием бесклассовой междоменной маршрутизации. [\#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([Гийом Тассери](https://github.com/YiuRULE)) -- Добавьте заголовок X-ClickHouse-Summary, когда мы отправляем запрос с помощью HTTP с включенной настройкой `send_progress_in_http_headers`. Возвращает обычную информацию X-ClickHouse-Progress с дополнительной информацией, например, сколько строк и байтов было вставлено в запрос. [\#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([Гийом Тассери](https://github.com/YiuRULE)) - -#### Улучшения {#improvements} - -- Добавлен `max_parts_in_total` настройка для семейства таблиц MergeTree (по умолчанию: 100 000), которая предотвращает небезопасную спецификацию ключа раздела \#5166. [\#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `clickhouse-obfuscator`: выведите начальное значение для отдельных столбцов, объединив начальное значение с именем столбца, а не с позицией столбца. Это предназначено для преобразования наборов данных с несколькими связанными таблицами, чтобы таблицы оставались соединяемыми после преобразования. [\#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавленные функции `JSONExtractRaw`, `JSONExtractKeyAndValues`. Переименованные функции `jsonExtract` к `JSONExtract`. Когда что-то идет не так, эти функции возвращают соответствующие значения, а не наоборот. `NULL`. Модифицированная функция `JSONExtract`, теперь он получает возвращаемый тип из своего последнего параметра и не вводит nullables. Реализован резервный вариант для RapidJSON в случае, если инструкции AVX2 недоступны. Библиотека Simdjson обновлена до новой версии. [\#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([Виталий Баранов](https://github.com/vitlibar)) -- Сейчас `if` и `multiIf` функции не зависят от условий `Nullable`, но полагайтесь на ветви для обеспечения совместимости sql. [\#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([Цзянь Ву](https://github.com/janplus)) -- `In` теперь предикат генерирует `Null` результат от `Null` входные данные, такие как `Equal` функция. [\#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([Цзянь Ву](https://github.com/janplus)) -- Проверьте ограничение по времени для каждого (flush\_interval / poll\_timeout) числа строк из Kafka. Это позволяет чаще отрывать чтение от потребителя Кафки и проверять временные ограничения для потоков верхнего уровня [\#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([Иван](https://github.com/abyss7)) -- Соедините рдкафку с комплектным САСЛОМ. Это должно позволить использовать аутентификацию SASL SCRAM [\#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([Иван](https://github.com/abyss7)) -- Пакетная версия RowRefList для всех соединений. [\#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Артем Зуйков](https://github.com/4ertus2)) -- clickhouse-server: более информативное прослушивание сообщений об ошибках. [\#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) -- Поддержка словарей в clickhouse-copier для функций в `` [\#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) -- Добавить новую настройку `kafka_commit_every_batch` чтобы регулировать политику Кафки. - Он позволяет установить режим фиксации: после обработки каждой партии сообщений или после записи всего блока в хранилище. Это компромисс между потерей некоторых сообщений или чтением их дважды в некоторых экстремальных ситуациях. [\#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([Иван](https://github.com/abyss7)) -- Сделай `windowFunnel` поддержка других целочисленных типов без знака. [\#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([сундили](https://github.com/sundy-li)) -- Разрешить теневой виртуальный столбец `_table` в двигателе слияния. [\#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([Иван](https://github.com/abyss7)) -- Сделай `sequenceMatch` агрегатные функции поддерживают другие целочисленные типы без знака [\#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([сундили](https://github.com/sundy-li)) -- Лучше сообщения об ошибках, если несоответствие контрольной суммы, скорее всего, вызвано аппаратными сбоями. [\#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Проверьте, что базовые таблицы поддерживают выборку для `StorageMerge` [\#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([Иван](https://github.com/abyss7)) -- Сlose MySQL connections after their usage in external dictionaries. It is related to issue \#893. [\#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) -- Улучшения протокола MySQL Wire. Изменено имя формата на MySQLWire. Использование RAII для вызова RSA\_free. Отключение SSL, если контекст не может быть создан. [\#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([Юрий Баранов](https://github.com/yurriy)) -- clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, …). [\#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) -- Соблюдайте настройки запросов при асинхронных вставках в распределенные таблицы. [\#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) -- Переименованные функции `leastSqr` к `simpleLinearRegression`, `LinearRegression` к `linearRegression`, `LogisticRegression` к `logisticRegression`. [\#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([Николай Кочетов](https://github.com/KochetovNicolai)) - -#### Улучшения в производительности {#performance-improvements} - -- Распараллеливание обработки деталей невоспроизводимого MergeTree столы В изменить изменить запрос. [\#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([Иван Куш](https://github.com/IvanKush)) -- Оптимизация при извлечении регулярных выражений. [\#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [\#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Данила Кутенин](https://github.com/danlark1)) -- Не добавляйте правый ключевой столбец join к результату соединения, если он используется только в разделе join on. [\#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Артем Зуйков](https://github.com/4ertus2)) -- Заморозьте буфер Кафки после первого пустого ответа. Это позволяет избежать многократных обращений к `ReadBuffer::next()` для пустого результата в некоторых потоках разбора строк. [\#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([Иван](https://github.com/abyss7)) -- `concat` оптимизация функций для нескольких аргументов. [\#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Данила Кутенин](https://github.com/danlark1)) -- Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [\#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Артем Зуйков](https://github.com/4ertus2)) -- Обновите нашу реализацию LZ4 со ссылкой на нее, чтобы иметь более быструю декомпрессию. [\#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Данила Кутенин](https://github.com/danlark1)) -- Реализована сортировка MSD radix (на основе kxsort) и частичная сортировка. [\#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Евгений Правда](https://github.com/kvinty)) - -#### Устранение ошибок {#bug-fixes} - -- Исправить пуш требуют колонн с соединением [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлена ошибка, когда ClickHouse запускался systemd, команда `sudo service clickhouse-server forcerestart` он работал не так, как ожидалось. [\#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) -- Исправьте коды ошибок http в DataPartsExchange (interserver http server на порту 9009 всегда возвращал код 200, даже при ошибках). [\#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) -- Исправить SimpleAggregateFunction на более длительный строк, чем MAX\_SMALL\_STRING\_SIZE [\#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Азат Хужин](https://github.com/azat)) -- Исправьте ошибку для `Decimal` к `Nullable(Decimal)` конверсия в ин. Поддержка других десятичных и десятичных преобразований (включая различные масштабы). [\#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлено блокирование FPU в библиотеке simdjson, приводящее к неправильному вычислению `uniqHLL` и `uniqCombined` агрегатная функция и математические функции, такие как `log`. [\#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена обработка смешанных случаев const/nonconst в функциях JSON. [\#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([Виталий Баранов](https://github.com/vitlibar)) -- Чинить `retention` функция. Теперь все условия, которые удовлетворяют в строке данных, добавляются в состояние данных. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) -- Исправьте тип результата для `quantileExact` с десятичными дробями. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Документация {#documentation} - -- Перевести документацию для `CollapsingMergeTree` к китайцам. [\#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) -- Переведите некоторые документы о табличных движках на китайский язык. - [\#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) - [\#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) - ([никогда ли](https://github.com/neverlee)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements} - -- Исправьте некоторые отчеты о дезинфицирующих средствах, которые показывают вероятное использование после освобождения.[\#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [\#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [\#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([Иван](https://github.com/abyss7)) -- Для удобства переместите тесты производительности из отдельных каталогов. [\#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте неправильные тесты производительности. [\#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([алесапин](https://github.com/alesapin)) -- Добавлен инструмент для вычисления контрольных сумм, вызванных битовыми переворотами, для отладки аппаратных проблем. [\#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Сделайте сценарий runner более удобным для использования. [\#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[\#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([Филимонов](https://github.com/filimonov)) -- Добавьте небольшую инструкцию, как писать тесты производительности. [\#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([алесапин](https://github.com/alesapin)) -- Добавить возможность делать замены В создать, заполнить и запросов падение производительности тесты [\#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Ольга Хвостикова](https://github.com/stavrolia)) - -## ClickHouse релиз 19.7 {#clickhouse-release-19-7} - -### ClickHouse релиз 19.7.5.29, 2019-07-05 {#clickhouse-release-19-7-5-29-2019-07-05} - -#### Исправление ошибок {#bug-fix-25} - -- Исправьте регрессию производительности в некоторых запросах с помощью JOIN. [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Зимний Чжан](https://github.com/zhang2014)) - -### ClickHouse релиз 19.7.5.27, 2019-06-09 {#clickhouse-release-19-7-5-27-2019-06-09} - -#### Новые средства {#new-features-1} - -- Добавлены функции связанные с растровым изображением `bitmapHasAny` и `bitmapHasAll` по аналогии с `hasAny` и `hasAll` функции для массивов. [\#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([Сергей Владыкин](https://github.com/svladykin)) - -#### Устранение ошибок {#bug-fixes-1} - -- Исправлена обработка выхода онлайн / оффлайн на `minmax` Индекс с нулевым значением. [\#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([Никита Васильев](https://github.com/nikvas0)) -- Отметить все входные столбцы в пределе по мере необходимости выход. Это исправляет ‘Not found column’ ошибка в некоторых распределенных запросах. [\#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([Константин Сергеевич Пан](https://github.com/kvap)) -- Чинить «Column ‘0’ already exists» ошибка в работе `SELECT .. PREWHERE` на колонке с дефолтом [\#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) -- Чинить `ALTER MODIFY TTL` запрос на `ReplicatedMergeTree`. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([Антон Попов](https://github.com/CurtizJ)) -- Не разрушайте сервер, когда потребители Kafka не смогли запустить его. [\#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([Иван](https://github.com/abyss7)) -- Исправленные функции растрового изображения дают неверный результат. [\#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([Энди Янг](https://github.com/andyyzh)) -- Исправить element\_count для хэшированного словаря (не включать дубликаты) [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Азат Хужин](https://github.com/azat)) -- Используйте содержимое переменной окружения TZ в качестве имени для часового пояса. В некоторых случаях это помогает правильно определить часовой пояс по умолчанию.[\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Иван](https://github.com/abyss7)) -- Не пытайтесь конвертировать целые числа в `dictGetT` функции, потому что он не работает правильно. Вместо этого создайте исключение. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Артем Зуйков](https://github.com/4ertus2)) -- Фиксировать параметры в запрос внешних данных по протоколу HTTP. [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Данила - Кутенин](https://github.com/danlark1)) -- Исправлена ошибка, когда детали удалялись только из FS, не сбрасывая их из Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка сегментации в `bitmapHasAny` функция. [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Ю](https://github.com/yuzhichang)) -- Исправлена ошибка, когда пул соединений репликации не повторяет попытку разрешения узла, даже если кэш DNS был удален. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([алесапин](https://github.com/alesapin)) -- Исправлено `DROP INDEX IF EXISTS` запрос. Сейчас `ALTER TABLE ... DROP INDEX IF EXISTS ...` запрос не вызывает исключения, если указанный индекс не существует. [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Глеб Новиков](https://github.com/NanoBjorn)) -- Союз исправить все колонки супертипа. Были случаи с несогласованными данными и типами столбцов результирующих столбцов. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Артем Зуйков](https://github.com/4ertus2)) -- Пропустите ZNONODE во время обработки DDL-запроса. До того, как другой узел удалит znode в очереди задач, тот, который - не обработал его, но уже получил список детей,завершит поток DDLWorker. [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Азат Хужин](https://github.com/azat)) -- Исправлена вставка в таблицу Distributed () с материализованным столбцом. [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Азат Хужин](https://github.com/azat)) - -### ClickHouse релиз 19.7.3.9, 2019-05-30 {#clickhouse-release-19-7-3-9-2019-05-30} - -#### Новые средства {#new-features-2} - -- Разрешить ограничить диапазон настроек, которые могут быть заданы пользователем. - Эти ограничения можно настроить в профиле настроек пользователя. - [\#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([Виталий - Баранов](https://github.com/vitlibar)) -- Добавьте вторую версию функции `groupUniqArray` с дополнительным - `max_size` параметр, ограничивающий размер результирующего массива. Этот - поведение похоже на то, что `groupArray(max_size)(x)` функция. - [\#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([Гийом - Тассери](https://github.com/YiuRULE)) -- Для форматов входных файлов TSVWithNames/CSVWithNames порядок столбцов теперь может быть - определяется из заголовка файла. Это контролируется с помощью - `input_format_with_names_use_header` параметр. - [\#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) - ([Александр](https://github.com/Akazz)) - -#### Устранение ошибок {#bug-fixes-2} - -- Сбой с uncompressed\_cache + JOIN во время слияния (\#5197) - [\#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Данила - Кутенин](https://github.com/danlark1)) -- Ошибка сегментации при запросе clickhouse-клиента к системным таблицам. \#5066 - [\#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) - ([Иван](https://github.com/abyss7)) -- Потеря данных при большой нагрузке через KafkaEngine (\#4736) - [\#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) - ([Иван](https://github.com/abyss7)) -- Исправлено очень редкое состояние гонки данных, которое могло произойти при выполнении запроса с объединением всех, включающих по крайней мере два выбора из системы.колонны, система.таблицы, система.детали, система.parts\_tables или таблицы объединить семью и исполнительского изменять столбцы из связанных таблиц одновременно. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшения в производительности {#performance-improvements-1} - -- Используйте radix sort для сортировки по одному числовому столбцу в `ORDER BY` без - `LIMIT`. [\#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), - [\#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) - ([Евгений Правда](https://github.com/kvinty), - [Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Документация {#documentation-1} - -- Перевод документации для некоторых движков таблиц на китайский язык. - [\#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), - [\#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), - [\#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) - ([张风啸](https://github.com/AlexZFX)), - [\#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([никогда - Ли](https://github.com/neverlee)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-1} - -- Правильная печать символов UTF-8 в `clickhouse-test`. - [\#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) - ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте параметр командной строки для clickhouse-client, чтобы всегда загружать предложение - данные. [\#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) - ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Устраните некоторые предупреждения PVS-Studio. - [\#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) - ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Обновление LZ4 [\#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Данила - Кутенин](https://github.com/danlark1)) -- Добавьте gperf для построения требований к предстоящему запросу на вытягивание \#5030. - [\#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) - ([proller](https://github.com/proller)) - -## ClickHouse релиз 19.6 {#clickhouse-release-19-6} - -### ClickHouse релиз 19.6.3.18, 2019-06-13 {#clickhouse-release-19-6-3-18-2019-06-13} - -#### Устранение ошибок {#bug-fixes-3} - -- Исправлено в состоянии pushdown для запросов из табличных функций `mysql` и `odbc` и соответствующие табличные двигатели. Это исправляет \#3540 и \#2384. [\#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена тупиковая ситуация в Zookeeper. [\#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) -- Разрешить кавычки десятичных знаков в CSV. [\#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Артем Зуйков](https://github.com/4ertus2) -- Запретить преобразование из float Inf/NaN в десятичные дроби (исключение throw). [\#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена гонка данных в запросе переименования. [\#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([Зимний Чжан](https://github.com/zhang2014)) -- Временно отключите LFAlloc. Использование LFAlloc может привести к большому количеству MAP\_FAILED при выделении несжатого кэша и в результате к сбоям запросов на высоконагруженных серверах. [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Данила Кутенин](https://github.com/danlark1)) - -### ClickHouse релиз 19.6.2.11, 2019-05-13 {#clickhouse-release-19-6-2-11-2019-05-13} - -#### Новые средства {#new-features-3} - -- Выражения TTL для столбцов и таблиц. [\#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([Антон Попов](https://github.com/CurtizJ)) -- Добавлена поддержка для `brotli` сжатие для HTTP-ответов (Accept-Encoding: br) [\#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([Михаил](https://github.com/fandyushin)) -- Добавлена новая функция `isValidUTF8` для проверки правильности кодировки набора байтов в кодировке utf-8. [\#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Данила Кутенин](https://github.com/danlark1)) -- Добавление новой политики балансировки нагрузки `first_or_random` который отправляет запросы на первый указанный хост, а если он недоступен, то отправляет запросы на случайные хосты shard. Полезно для настройки топологии перекрестной репликации. [\#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) - -#### Экспериментальная возможность {#experimental-features-1} - -- Добавить настройку `index_granularity_bytes` (адаптивная степень детализации индекса) для семейства таблиц MergeTree\*. [\#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([алесапин](https://github.com/alesapin)) - -#### Улучшения {#improvements-1} - -- Добавлена поддержка непостоянных и отрицательных аргументов размера и длины для функции `substringUTF8`. [\#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Отключить push-вниз в правой таблице в левой присоединиться, левой таблицы в правую присоединиться, и обе таблицы полностью присоединиться. Это исправляет неправильные результаты соединения в некоторых случаях. [\#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([Иван](https://github.com/abyss7)) -- `clickhouse-copier`: автоматическая загрузка конфигурации задачи из `--task-file` вариант [\#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) -- Добавлен обработчик опечаток для фабрики хранения и фабрики табличных функций. [\#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Данила Кутенин](https://github.com/danlark1)) -- Поддержка звездочек и квалифицированных звездочек для нескольких соединений без вложенных запросов [\#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Артем Зуйков](https://github.com/4ertus2)) -- Сделайте сообщение об ошибке отсутствующего столбца более удобным для пользователя. [\#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Улучшения в производительности {#performance-improvements-2} - -- Значительное ускорение от присоединения [\#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Мартийн Баккер](https://github.com/Gladdy)) - -#### Назад Несовместимые Изменения {#backward-incompatible-changes} - -- Заголовок http `Query-Id` был переименован в `X-ClickHouse-Query-Id` для последовательности. [\#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([Михаил](https://github.com/fandyushin)) - -#### Устранение ошибок {#bug-fixes-4} - -- Исправлено потенциальное разыменование нулевого указателя в `clickhouse-copier`. [\#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) -- Исправлена ошибка запроса с соединением + массив присоединиться [\#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлено зависание при запуске сервера, когда словарь зависит от другого словаря через базу данных с engine=Dictionary. [\#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([Виталий Баранов](https://github.com/vitlibar)) -- Partially fix distributed\_product\_mode = local. It's possible to allow columns of local tables in where/having/order by/… via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [\#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправьте потенциально неправильный результат для `SELECT DISTINCT` с `JOIN` [\#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлено очень редкое состояние гонки данных, которое могло произойти при выполнении запроса с объединением всех, включающих по крайней мере два выбора из системы.колонны, система.таблицы, система.детали, система.parts\_tables или таблицы объединить семью и исполнительского изменять столбцы из связанных таблиц одновременно. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-2} - -- Исправлены ошибки тестирования при запуске clickhouse-сервера на другом хосте [\#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([Василий Немков](https://github.com/Enmk)) -- clickhouse-test: отключение последовательностей управления цветом в среде без tty. [\#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([алесапин](https://github.com/alesapin)) -- clickhouse-test: разрешить использование любой тестовой базы данных (удалить `test.` квалификация там, где это возможно) [\#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) -- Исправление ошибок утилиты [\#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([Виталий Баранов](https://github.com/vitlibar)) -- Yandex LFAlloc был добавлен в ClickHouse для выделения данных MarkCache и UncompressedCache различными способами для более надежного улавливания сегментов [\#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Данила Кутенин](https://github.com/danlark1)) -- Python util, чтобы помочь с backports и changelogs. [\#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([Иван](https://github.com/abyss7)) - -## ClickHouse релиз 19.5 {#clickhouse-release-19-5} - -### ClickHouse релиз 19.5.4.22, 2019-05-13 {#clickhouse-release-19-5-4-22-2019-05-13} - -#### Устранение ошибок {#bug-fixes-5} - -- Исправлена возможная ошибка в функциях bitmap\* [\#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [\#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([Энди Янг](https://github.com/andyyzh)) -- Исправлено очень редкое состояние гонки данных, которое могло произойти при выполнении запроса с объединением всех, включающих по крайней мере два выбора из системы.колонны, система.таблицы, система.детали, система.parts\_tables или таблицы объединить семью и исполнительского изменять столбцы из связанных таблиц одновременно. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправления ошибок `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. Эта ошибка произошла, если столбец LowCardinality был частью первичного ключа. \#5031 [\#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Модификация функции удержания: если строка удовлетворяет как первому, так и N-му условию, то в состояние данных добавляется только первое удовлетворенное условие. Теперь все условия, которые удовлетворяют в строке данных, добавляются в состояние данных. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) - -### ClickHouse релиз 19.5.3.8, 2019-04-18 {#clickhouse-release-19-5-3-8-2019-04-18} - -#### Устранение ошибок {#bug-fixes-6} - -- Фиксированный тип установки `max_partitions_per_insert_block` из булев тип uint64. [\#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([Мохаммад Хосейн Сехават](https://github.com/mhsekhavat)) - -### ClickHouse релиз 19.5.2.6, 2019-04-15 {#clickhouse-release-19-5-2-6-2019-04-15} - -#### Новые средства {#new-features-4} - -- [Гиперскан](https://github.com/intel/hyperscan) было добавлено несколько совпадений регулярных выражений (функции `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [\#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Данила Кутенин](https://github.com/danlark1)) -- `multiSearchFirstPosition` была добавлена функция. [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Данила Кутенин](https://github.com/danlark1)) -- Реализуйте предварительно заданный фильтр выражений для каждой строки таблиц. [\#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([Иван](https://github.com/abyss7)) -- Новый тип индексов пропуска данных на основе фильтров Блума (может использоваться для `equal`, `in` и `like` должностные обязанности). [\#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([Никита Васильев](https://github.com/nikvas0)) -- Добавлен `ASOF JOIN` что позволяет запускать запросы, которые присоединяются к самому последнему известному значению. [\#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [\#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [\#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [\#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Мартийн Баккер](https://github.com/Gladdy), [Артем Зуйков](https://github.com/4ertus2)) -- Переписать несколько раз `COMMA JOIN` к `CROSS JOIN`. Затем перепишите их на `INNER JOIN` если можно. [\#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Улучшение {#improvement-9} - -- `topK` и `topKWeighted` теперь поддерживает пользовательские `loadFactor` (Исправлена проблема [\#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [\#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([Кирилл Даньшин](https://github.com/kirillDanshin)) -- Разрешить использовать `parallel_replicas_count > 1` даже для таблиц без выборки (настройка для них просто игнорируется). В предыдущих версиях это приводило к исключениям. [\#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Алексей Елыманов](https://github.com/digitalist)) -- Поддержка `CREATE OR REPLACE VIEW`. Позволяет создать представление или задать новое определение в одном операторе. [\#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Борис Гранво](https://github.com/bgranvea)) -- `Buffer` движок таблицы теперь поддерживает `PREWHERE`. [\#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([Янкуань Лю](https://github.com/LiuYangkuan)) -- Добавить возможность запуска реплицированной таблицы без метаданных в zookeeper in `readonly` режим. [\#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([алесапин](https://github.com/alesapin)) -- Исправлено мерцание индикатора выполнения в clickhouse-клиенте. Эта проблема была наиболее заметна при использовании `FORMAT Null` с потоковыми запросами. [\#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Разрешить отключать функции с помощью `hyperscan` библиотека на основе каждого пользователя, чтобы ограничить потенциально чрезмерное и неконтролируемое использование ресурсов. [\#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте номер версии, регистрирующий все ошибки. [\#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) -- Добавлено ограничение на использование `multiMatch` функции, которые требуют размера строки, чтобы вписаться в `unsigned int`. Также добавлено ограничение по количеству аргументов для `multiSearch` функции. [\#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Данила Кутенин](https://github.com/danlark1)) -- Улучшено использование пространства царапин и обработка ошибок в Hyperscan. [\#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Данила Кутенин](https://github.com/danlark1)) -- Заполнить `system.graphite_detentions` из таблицы config of `*GraphiteMergeTree` столы для двигателей. [\#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Переименовать `trigramDistance` функция к `ngramDistance` и добавьте больше функций с помощью `CaseInsensitive` и `UTF`. [\#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Данила Кутенин](https://github.com/danlark1)) -- Улучшен расчет индексов пропуска данных. [\#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([Никита Васильев](https://github.com/nikvas0)) -- Держать обычные, `DEFAULT`, `MATERIALIZED` и `ALIAS` столбцы в одном списке (Исправлена проблема [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Алексей Зателепин](https://github.com/ztlpn)) - -#### Исправление ошибок {#bug-fix-26} - -- Избегать `std::terminate` в случае сбоя выделения памяти. Сейчас `std::bad_alloc` исключение создается, как и ожидалось. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено чтение capnproto из буфера. Иногда файлы не были успешно загружены по протоколу HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Владислав](https://github.com/smirnov-vs)) -- Исправления ошибок `Unknown log entry type: 0` после `OPTIMIZE TABLE FINAL` запрос. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Амос Птица](https://github.com/amosbird)) -- Неверные аргументы, чтобы `hasAny` или `hasAll` функции может привести к обработка выхода онлайн / оффлайн. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Взаимоблокировка может произойти во время выполнения `DROP DATABASE dictionary` запрос. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить неопределенное поведение в `median` и `quantile` функции. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -- Исправьте обнаружение уровня сжатия, когда `network_compression_method` в нижнем регистре. Разбитые в в19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -- Фиксированное незнание `UTC` настройка (Исправлена проблема [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -- Чинить `histogram` поведение функции с помощью `Distributed` таблицы. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -- Исправлен отчет Цан `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен отчет TSan о завершении работы из-за состояния гонки в использовании системных журналов. Исправлено потенциальное использование-после освобождения при выключении, когда включен part\_log. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить перепроверять детали в `ReplicatedMergeTreeAlterThread` в случае ошибки. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Арифметические операции над промежуточными состояниями агрегатной функции не работали для постоянных аргументов (таких как результаты подзапросов). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Всегда делайте обратные кавычки имен столбцов в метаданных. В противном случае невозможно создать таблицу с именем столбца `index` (сервер не будет перезапущен из-за неправильной формы `ATTACH` запрос в метаданных). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить сбой в работе `ALTER ... MODIFY ORDER BY` на `Distributed` стол. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -- Исправлена обработка выхода онлайн / оффлайн в `JOIN ON` с включенной функцией `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлена ошибка с добавлением посторонней строки после использования сообщения protobuf от Кафки. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправить аварию `JOIN` на не значение столбца против значение null. Чинить `NULLs` в правой клавиш в `ANY JOIN` + `join_use_nulls`. [\#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Исправлено состояние гонки в `SELECT` от `system.tables` если таблица переименована или изменена одновременно. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена гонка данных при извлечении части данных, которая уже устарела. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена редкая гонка данных, которая может произойти во время `RENAME` таблица семейства MergeTree. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка сегментации в функции `arrayIntersect`. Ошибка сегментации может произойти, если функция вызывается со смешанными постоянными и обычными аргументами. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Лисян Цянь](https://github.com/fancyqlx)) -- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправить сбой в работе `FULL/RIGHT JOIN` когда мы присоединились на обнуляемой против не допускает значения null. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Артем Зуйков](https://github.com/4ertus2)) -- Чинить `No message received` исключение при извлечении деталей между репликами. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([алесапин](https://github.com/alesapin)) -- Исправлено `arrayIntersect` неправильный результат функции в случае нескольких повторяющихся значений в одном массиве. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправьте состояние гонки во время параллельной работы `ALTER COLUMN` запросы, которые могут привести к сбою сервера (Исправлена проблема [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправьте неправильный результат в `FULL/RIGHT JOIN` с колонкой const. [\#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправьте дубликаты внутри `GLOBAL JOIN` с Asterisk. [\#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправить вычет параметров в `ALTER MODIFY` из колонки `CODEC` если тип столбца не указан. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([алесапин](https://github.com/alesapin)) -- Функции `cutQueryStringAndFragment()` и `queryStringAndFragment()` теперь работает правильно, когда `URL` содержит фрагмент и не содержит запроса. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправлена редкая ошибка при настройке `min_bytes_to_use_direct_io` больше нуля, что происходит, когда поток должен искать назад в файле столбца. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([алесапин](https://github.com/alesapin)) -- Исправьте неправильные типы аргументов для агрегатных функций с помощью `LowCardinality` аргументы (Исправлена проблема [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправить неправильное имя квалификация в `GLOBAL JOIN`. [\#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Артем Зуйков](https://github.com/4ertus2)) -- Фиксированная функция `toISOWeek` результат за 1970 год. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Чинить `DROP`, `TRUNCATE` и `OPTIMIZE` дублирование запросов при выполнении на `ON CLUSTER` для `ReplicatedMergeTree*` столы семейные. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([алесапин](https://github.com/alesapin)) - -#### Назад Несовместимые Изменения {#backward-incompatible-change-8} - -- Переименовать настройки `insert_sample_with_metadata` ставить `input_format_defaults_for_omitted_fields`. [\#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Артем Зуйков](https://github.com/4ertus2)) -- Добавлена настройка `max_partitions_per_insert_block` (со значением 100 по умолчанию). Если вставленный блок содержит большее количество разделов, то возникает исключение. Установите его равным 0, если вы хотите удалить ограничение (не рекомендуется). [\#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Функции мульти-поиска были переименованы (`multiPosition` к `multiSearchAllPositions`, `multiSearch` к `multiSearchAny`, `firstMatch` к `multiSearchFirstIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Данила Кутенин](https://github.com/danlark1)) - -#### Улучшение производительности {#performance-improvement-6} - -- Оптимизировать Volnitsky поисковик путем встраивания, дающая около 5-10% улучшение поиска по запросам со многими иглами или много схожих биграмм. [\#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Данила Кутенин](https://github.com/danlark1)) -- Исправлена проблема производительности при настройке `use_uncompressed_cache` больше нуля, который появился при считывании всех данных, содержащихся в кэше. [\#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([алесапин](https://github.com/alesapin)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-10} - -- Упрочнение отладочной сборки: более детализированные сопоставления памяти и ASLR; добавление защиты памяти для кэша меток и индекса. Это позволяет найти больше ошибок топтания памяти в случае, когда ASan и MSan не могут этого сделать. [\#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте поддержку переменных cmake `ENABLE_PROTOBUF`, `ENABLE_PARQUET` и `ENABLE_BROTLI` который позволяет включить/выключить выше особенностей (так же, как мы можем сделать для librdkafka, MySQL и т. д). [\#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Сильвиу Развивается](https://github.com/silviucpp)) -- Добавьте возможность печати списка процессов и stacktraces всех потоков, если некоторые запросы зависли после тестового запуска. [\#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([алесапин](https://github.com/alesapin)) -- Добавить повторные попытки ВКЛ `Connection loss` ошибка в работе `clickhouse-test`. [\#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([алесапин](https://github.com/alesapin)) -- Добавьте build FreeBSD с Vagrant и построить с резьбой дезинфицирующее средство на упаковщик скриптов. [\#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [\#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([алесапин](https://github.com/alesapin)) -- Теперь пользователь запросил пароль для пользователя `'default'` во время установки. [\#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) -- Подавить предупреждение в `rdkafka` библиотека. [\#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Разрешить возможность сборки без ssl. [\#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) -- Добавьте способ запуска образа clickhouse-server от пользовательского пользователя. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Обновите contrib boost до 1.69. [\#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) -- Отключить использование `mremap` при компиляции с помощью дезинфицирующего средства для нитей. Как ни странно, Цан не перехватил его `mremap` (хотя это действительно перехват `mmap`, `munmap`) это приводит к ложным срабатываниям. Исправлен отчет TSan в тестах с сохранением состояния. [\#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте тестовую проверку с использованием схемы формата через HTTP-интерфейс. [\#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([Виталий Баранов](https://github.com/vitlibar)) - -## ClickHouse релиз 19.4 {#clickhouse-release-19-4} - -### ClickHouse релиз 19.4.4.33, 2019-04-17 {#clickhouse-release-19-4-4-33-2019-04-17} - -#### Устранение ошибок {#bug-fixes-7} - -- Избегать `std::terminate` в случае сбоя выделения памяти. Сейчас `std::bad_alloc` исключение создается, как и ожидалось. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено чтение capnproto из буфера. Иногда файлы не были успешно загружены по протоколу HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Владислав](https://github.com/smirnov-vs)) -- Исправления ошибок `Unknown log entry type: 0` после `OPTIMIZE TABLE FINAL` запрос. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Амос Птица](https://github.com/amosbird)) -- Неверные аргументы, чтобы `hasAny` или `hasAll` функции может привести к обработка выхода онлайн / оффлайн. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Взаимоблокировка может произойти во время выполнения `DROP DATABASE dictionary` запрос. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить неопределенное поведение в `median` и `quantile` функции. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -- Исправьте обнаружение уровня сжатия, когда `network_compression_method` в нижнем регистре. Разбитые в в19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -- Фиксированное незнание `UTC` настройка (Исправлена проблема [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -- Чинить `histogram` поведение функции с помощью `Distributed` таблицы. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -- Исправлен отчет Цан `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен отчет TSan о завершении работы из-за состояния гонки в использовании системных журналов. Исправлено потенциальное использование-после освобождения при выключении, когда включен part\_log. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить перепроверять детали в `ReplicatedMergeTreeAlterThread` в случае ошибки. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Арифметические операции над промежуточными состояниями агрегатной функции не работали для постоянных аргументов (таких как результаты подзапросов). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Всегда делайте обратные кавычки имен столбцов в метаданных. В противном случае невозможно создать таблицу с именем столбца `index` (сервер не будет перезапущен из-за неправильной формы `ATTACH` запрос в метаданных). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправить сбой в работе `ALTER ... MODIFY ORDER BY` на `Distributed` стол. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -- Исправлена обработка выхода онлайн / оффлайн в `JOIN ON` с включенной функцией `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлена ошибка с добавлением посторонней строки после использования сообщения protobuf от Кафки. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Исправлено состояние гонки в `SELECT` от `system.tables` если таблица переименована или изменена одновременно. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена гонка данных при извлечении части данных, которая уже устарела. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена редкая гонка данных, которая может произойти во время `RENAME` таблица семейства MergeTree. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка сегментации в функции `arrayIntersect`. Ошибка сегментации может произойти, если функция вызывается со смешанными постоянными и обычными аргументами. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Лисян Цянь](https://github.com/fancyqlx)) -- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Чинить `No message received` исключение при извлечении деталей между репликами. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([алесапин](https://github.com/alesapin)) -- Исправлено `arrayIntersect` неправильный результат функции в случае нескольких повторяющихся значений в одном массиве. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправьте состояние гонки во время параллельной работы `ALTER COLUMN` запросы, которые могут привести к сбою сервера (Исправлена проблема [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправьте вычет параметров в `ALTER MODIFY` из колонки `CODEC` если тип столбца не указан. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([алесапин](https://github.com/alesapin)) -- Функции `cutQueryStringAndFragment()` и `queryStringAndFragment()` теперь работает правильно, когда `URL` содержит фрагмент и не содержит запроса. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправлена редкая ошибка при настройке `min_bytes_to_use_direct_io` больше нуля, что происходит, когда поток должен искать назад в файле столбца. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([алесапин](https://github.com/alesapin)) -- Исправьте неправильные типы аргументов для агрегатных функций с помощью `LowCardinality` аргументы (Исправлена проблема [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Фиксированная функция `toISOWeek` результат за 1970 год. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Чинить `DROP`, `TRUNCATE` и `OPTIMIZE` дублирование запросов при выполнении на `ON CLUSTER` для `ReplicatedMergeTree*` столы семейные. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([алесапин](https://github.com/alesapin)) - -#### Улучшения {#improvements-2} - -- Держать обычные, `DEFAULT`, `MATERIALIZED` и `ALIAS` столбцы в одном списке (Исправлена проблема [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Алексей Зателепин](https://github.com/ztlpn)) - -### ClickHouse релиз 19.4.3.11, 2019-04-02 {#clickhouse-release-19-4-3-11-2019-04-02} - -#### Устранение ошибок {#bug-fixes-8} - -- Исправить сбой в работе `FULL/RIGHT JOIN` когда мы присоединились на обнуляемой против не допускает значения null. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-11} - -- Добавьте способ запуска образа clickhouse-server от пользовательского пользователя. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse релиз 19.4.2.7, 2019-03-30 {#clickhouse-release-19-4-2-7-2019-03-30} - -#### Устранение ошибок {#bug-fixes-9} - -- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) - -### ClickHouse релиз 19.4.1.3, 2019-03-19 {#clickhouse-release-19-4-1-3-2019-03-19} - -#### Устранение ошибок {#bug-fixes-10} - -- Исправлены удаленные запросы, содержащие и то, и другое `LIMIT BY` и `LIMIT`. Ранее, если `LIMIT BY` и `LIMIT` были использованы для удаленного запроса, `LIMIT` может случиться и раньше `LIMIT BY`, что привело к слишком отфильтрованному результату. [\#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([Константин Сергеевич Пан](https://github.com/kvap)) - -### ClickHouse релиз 19.4.0.49, 2019-03-09 {#clickhouse-release-19-4-0-49-2019-03-09} - -#### Новые средства {#new-features-5} - -- Добавлена полная поддержка для `Protobuf` формат (ввод и вывод, вложенные структуры данных). [\#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [\#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([Виталий Баранов](https://github.com/vitlibar)) -- Добавлены растровые функции с ревущими растровыми изображениями. [\#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([Энди Янг](https://github.com/andyyzh)) [\#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([Виталий Баранов](https://github.com/vitlibar)) -- Поддержка формата паркета. [\#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) -- Для сравнения нечетких строк было добавлено расстояние N-грамм. Это похоже на Q-граммовые метрики в языке R. [\#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Данила Кутенин](https://github.com/danlark1)) -- Комбинируйте правила для свертки графита из выделенных шаблонов агрегации и хранения. [\#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Добавлен `max_execution_speed` и `max_execution_speed_bytes` чтобы ограничить использование ресурсов. Добавлен `min_execution_speed_bytes` установка в дополнение к `min_execution_speed`. [\#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([Зимний Чжан](https://github.com/zhang2014)) -- Реализованная функция `flatten`. [\#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [\#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([Алексей-Миловидов](https://github.com/alexey-milovidov), [произв](https://github.com/kzon)) -- Добавленные функции `arrayEnumerateDenseRanked` и `arrayEnumerateUniqRanked` (это как будто `arrayEnumerateUniq` но позволяет точно настроить глубину массива, чтобы заглянуть внутрь многомерных массивов). [\#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [\#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/… [\#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Устранение ошибок {#bug-fixes-11} - -- Этот релиз также содержит все исправления ошибок из 19.3 и 19.1. -- Исправлена ошибка в индексах пропуска данных: неправильный порядок гранул после вставки. [\#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([Никита Васильев](https://github.com/nikvas0)) -- Исправлено `set` индекс для `Nullable` и `LowCardinality` столбцы. Перед ним, `set` индекс с `Nullable` или `LowCardinality` колонка привела к ошибке `Data type must be deserialized with multiple streams` во время выбора. [\#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Правильно установите update\_time на полный `executable` обновление словаря. [\#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Тема Новиков](https://github.com/temoon)) -- Исправлена поломка индикатора выполнения в 19.3. [\#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([Филимонов](https://github.com/filimonov)) -- Исправлены несогласованные значения MemoryTracker при сжатии области памяти, в некоторых случаях. [\#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено неопределенное поведение в ThreadPool. [\#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена очень редкая ошибка с сообщением `mutex lock failed: Invalid argument` это может произойти, когда таблица MergeTree была удалена одновременно с SELECT. [\#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Алексей Зателепин](https://github.com/ztlpn)) -- Совместимость драйвера ODBC с `LowCardinality` тип данных. [\#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) -- FreeBSD: исправление для `AIOcontextPool: Found io_event with unknown id 0` ошибка. [\#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) -- `system.part_log` таблица была создана независимо от конфигурации. [\#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте неопределенное поведение в `dictIsIn` функция для словарей кэша. [\#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([алесапин](https://github.com/alesapin)) -- Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [\#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Алексей Зателепин](https://github.com/ztlpn)) -- Отключите compile\_expressions по умолчанию, пока мы не получим собственные `llvm` contrib и может проверить его с помощью `clang` и `asan`. [\#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([алесапин](https://github.com/alesapin)) -- Предотвращать `std::terminate` когда `invalidate_query` для `clickhouse` внешний источник словаря вернул неверный результирующий набор (пустой или более одной строки или более одного столбца). Исправлена проблема, когда `invalidate_query` выполнялось каждые пять секунд независимо от `lifetime`. [\#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Избегайте тупиковой ситуации, когда `invalidate_query` для словаря с `clickhouse` источник был задействован `system.dictionaries` таблица или `Dictionaries` база данных (редкий случай). [\#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено перекрестное соединение с пустым местом. [\#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена обработка выхода онлайн / оффлайн в функции «replicate» когда передается постоянный аргумент. [\#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте лямбда-функцию с помощью оптимизатора предикатов. [\#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([Зимний Чжан](https://github.com/zhang2014)) -- Несколько соединений несколько исправлений. [\#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Улучшения {#improvements-3} - -- Поддержка псевдонимов в разделе JOIN ON для правых столбцов таблицы. [\#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Артем Зуйков](https://github.com/4ertus2)) -- Результат нескольких соединений требует правильных имен результатов, которые будут использоваться в подсекциях. В результате замените плоские псевдонимы именами источников. [\#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Артем Зуйков](https://github.com/4ertus2)) -- Улучшить нажимаем-вниз-логика вступила заявления. [\#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([Иван](https://github.com/abyss7)) - -#### Улучшения в производительности {#performance-improvements-3} - -- Улучшенная эвристика «move to PREWHERE» оптимизация. [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Используйте правильные таблицы поиска, которые используют API HashTable для 8-битных и 16-битных ключей. [\#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([Амос Птица](https://github.com/amosbird)) -- Улучшена производительность сравнения строк. [\#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Очистите распределенную очередь DDL в отдельном потоке, чтобы она не замедляла основной цикл, обрабатывающий распределенные задачи DDL. [\#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Алексей Зателепин](https://github.com/ztlpn)) -- Когда `min_bytes_to_use_direct_io` имеет значение 1, не каждый файл был открыт в режиме O\_DIRECT, потому что размер данных для чтения иногда недооценивался размером одного сжатого блока. [\#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-12} - -- Добавлена поддержка clang-9 [\#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправь ошибку `__asm__` инструкции (опять же) [\#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Константин Подшумок](https://github.com/podshumok)) -- Добавить возможность задавать настройки для `clickhouse-performance-test` из командной строки. [\#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([алесапин](https://github.com/alesapin)) -- Добавьте тесты словарей в интеграционные тесты. [\#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([алесапин](https://github.com/alesapin)) -- Добавлены запросы от бенчмарка на веб-сайте к автоматизированным тестам производительности. [\#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `xxhash.h` не существует во внешнем lz4, потому что это деталь реализации, и ее символы находятся в пространстве имен с `XXH_NAMESPACE` макрос. Когда lz4 является внешним, xxHash также должен быть внешним, и зависимые должны быть связаны с ним. [\#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Оривей Деш](https://github.com/orivej)) -- Исправлен случай, когда `quantileTiming` агрегатная функция может быть вызвана с отрицательным или плавающим аргументом (это исправляет тест fuzz с неопределенным поведением дезинфицирующего средства). [\#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправление орфографических ошибок. [\#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([сдк2](https://github.com/sdk2)) -- Исправлена компиляция на Mac. [\#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([Виталий Баранов](https://github.com/vitlibar)) -- Исправления сборки для FreeBSD и различных необычных конфигураций сборки. [\#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) - -## ClickHouse релиз 19.3 {#clickhouse-release-19-3} - -### ClickHouse релиз 19.3.9.1, 2019-04-02 {#clickhouse-release-19-3-9-1-2019-04-02} - -#### Устранение ошибок {#bug-fixes-12} - -- Исправить сбой в работе `FULL/RIGHT JOIN` когда мы присоединились на обнуляемой против не допускает значения null. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка сегментации в `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Исправлено чтение из `Array(LowCardinality)` столбец в редком случае, когда столбец содержит длинную последовательность пустых массивов. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Николай Кочетов](https://github.com/KochetovNicolai)) - -#### Сборка/Тестирование / Улучшение Упаковки {#buildtestingpackaging-improvement-13} - -- Добавьте способ запуска образа clickhouse-server от пользовательского пользователя [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse релиз 19.3.7, 2019-03-12 {#clickhouse-release-19-3-7-2019-03-12} - -#### Устранение ошибок {#bug-fixes-13} - -- Исправлена ошибка в \#3920. Эта ошибка проявляется как случайное повреждение кэша (сообщения `Unknown codec family code`, `Cannot seek through file`) и segfaults. Эта ошибка впервые появилась в версии 19.1 и присутствует в версиях до 19.1.10 и 19.3.6. [\#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.3.6, 2019-03-02 {#clickhouse-release-19-3-6-2019-03-02} - -#### Устранение ошибок {#bug-fixes-14} - -- При наличии более 1000 потоков в пуле, `std::terminate` может произойти на выходе из потока. [Азат Хужин](https://github.com/azat) [\#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [\#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Теперь это можно создать `ReplicatedMergeTree*` таблицы с комментариями к столбцам без значений по умолчанию и таблицы со столбцами кодеки без комментариев и значений по умолчанию. Также исправлено сравнение кодеков. [\#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка при соединении с массивом или кортежем. [\#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка в работе clickhouse-копировальной машины с сообщением `ThreadStatus not created`. [\#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлено зависание при выключении сервера, если использовались распределенные DDLs. [\#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Алексей Зателепин](https://github.com/ztlpn)) -- Неверные номера столбцов были напечатаны в сообщении об ошибке о синтаксическом анализе текстового формата для столбцов с числом больше 10. [\#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-3} - -- Исправлена сборка с включенным AVX. [\#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Включите расширенный учет и учет ввода-вывода на основе хорошо известной версии вместо ядра, под которым он компилируется. [\#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([nvartolomei](https://github.com/nvartolomei)) -- Разрешить пропустить настройку core\_dump.size\_limit, предупреждение, а не бросать, если лимита не получится. [\#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) -- Удалил то `inline` метки из `void readBinary(...)` в `Field.cpp`. Также объединены избыточные `namespace DB` блоки. [\#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) - -### ClickHouse релиз 19.3.5, 2019-02-21 {#clickhouse-release-19-3-5-2019-02-21} - -#### Устранение ошибок {#bug-fixes-15} - -- Исправлена ошибка с обработкой больших запросов вставки http. [\#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([алесапин](https://github.com/alesapin)) -- Исправлена обратная несовместимость со старыми версиями из-за неправильной реализации `send_logs_level` установка. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена обратная несовместимость функции таблицы `remote` введено с комментариями к колонке. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.3.4, 2019-02-16 {#clickhouse-release-19-3-4-2019-02-16} - -#### Улучшения {#improvements-4} - -- Размер индекса таблицы не учитывается для ограничений памяти при выполнении `ATTACH TABLE` запрос. Избегайте возможности того, что стол не может быть прикреплен после отсоединения. [\#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Немного повышен лимит на максимальный размер строки и массива, полученный от ZooKeeper. Это позволяет продолжать работу с увеличенным размером `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` на смотрителя зоопарка. [\#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Разрешить восстанавливать брошенную реплику, даже если она уже имеет огромное количество узлов в своей очереди. [\#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавьте один обязательный аргумент к `SET` индекс (максимальное количество хранимых строк). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Никита Васильев](https://github.com/nikvas0)) - -#### Устранение ошибок {#bug-fixes-16} - -- Исправлено `WITH ROLLUP` результат для группы по одиночке `LowCardinality` ключ. [\#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Исправлена ошибка в заданном индексе (удаление гранулы, если она содержит более `max_rows` грядки). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Никита Васильев](https://github.com/nikvas0)) -- Множество исправлений для сборки FreeBSD. [\#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) -- Исправлена подстановка псевдонимов в запросах с подзапросом, содержащим один и тот же псевдоним (проблема [\#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [\#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-4} - -- Добавить возможность запуска `clickhouse-server` для тестов без состояния в образе docker. [\#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([Василий Немков](https://github.com/Enmk)) - -### ClickHouse релиз 19.3.3, 2019-02-13 {#clickhouse-release-19-3-3-2019-02-13} - -#### Новые средства {#new-features-6} - -- Добавил тот `KILL MUTATION` оператор, который позволяет удалять мутации, которые по каким-то причинам застряли. Добавлен `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` поля к тому же `system.mutations` таблица для более легкого устранения неполадок. [\#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Алексей Зателепин](https://github.com/ztlpn)) -- Добавлена статистическая функция `entropy` который вычисляет энтропию Шеннона. [\#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) -- Добавлена возможность отправлять запросы `INSERT INTO tbl VALUES (....` к серверу без разделения на `query` и `data` части. [\#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([алесапин](https://github.com/alesapin)) -- Общая реализация проекта `arrayWithConstant` была добавлена функция. [\#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Реализованный `NOT BETWEEN` оператор сравнения. [\#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Дмитрий Наумов](https://github.com/nezed)) -- Осуществлять `sumMapFiltered` для того чтобы иметь возможность ограничить количество ключей для которых значения будут суммироваться по формуле `sumMap`. [\#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Добавлена поддержка `Nullable` напечатать `mysql` табличная функция. [\#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) -- Поддержка произвольных константных выражений в `LIMIT` пункт. [\#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) -- Добавлен `topKWeighted` агрегатная функция, принимающая дополнительный аргумент с весом (целое число без знака). [\#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([Андрей Гольман](https://github.com/andrewgolman)) -- `StorageJoin` теперь поддерживать `join_any_take_last_row` настройка, позволяющая перезаписать существующие значения одного и того же ключа. [\#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([Амос Птица](https://github.com/amosbird) -- Добавлена функция `toStartOfInterval`. [\#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([Виталий Баранов](https://github.com/vitlibar)) -- Добавлен `RowBinaryWithNamesAndTypes` формат. [\#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Козлюк Олег Викторович](https://github.com/DarkWanderer)) -- Добавлен `IPv4` и `IPv6` тип данных. Более эффективное внедрение `IPv*` функции. [\#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([Василий Немков](https://github.com/Enmk)) -- Добавлена функция `toStartOfTenMinutes()`. [\#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([Виталий Баранов](https://github.com/vitlibar)) -- Добавлен `Protobuf` выходной формат. [\#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [\#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([Виталий Баранов](https://github.com/vitlibar)) -- Добавлена поддержка brotli для HTTP-интерфейса для импорта данных (вставки). [\#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([Михаил](https://github.com/fandyushin)) -- Добавлены подсказки, когда пользователь делает опечатку в имени функции или вводит клиент командной строки. [\#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Данила Кутенин](https://github.com/danlark1)) -- Добавлен `Query-Id` к заголовку HTTP-ответа сервера. [\#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([Михаил](https://github.com/fandyushin)) - -#### Экспериментальная возможность {#experimental-features-2} - -- Добавлен `minmax` и `set` индексы пропуска данных для семейства движков таблиц MergeTree. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Никита Васильев](https://github.com/nikvas0)) -- Добавлено преобразование из `CROSS JOIN` к `INNER JOIN` если можно. [\#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [\#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Устранение ошибок {#bug-fixes-17} - -- Исправлено `Not found column` для повторяющихся столбцов в `JOIN ON` раздел. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Артем Зуйков](https://github.com/4ertus2)) -- Сделай `START REPLICATED SENDS` команда начала репликации отправляет. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -- Фиксированное выполнение агрегатных функций с помощью `Array(LowCardinality)` аргументы. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Исправлено неправильное поведение при выполнении `INSERT ... SELECT ... FROM file(...)` запрос и файл имеет `CSVWithNames` или `TSVWIthNames` формат и первая строка данных отсутствуют. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка при перезагрузке словаря, если словарь недоступен. Эта ошибка появилась в 19.1.6. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -- Исправлено `ALL JOIN` с дубликатами в правой таблице. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка сегментации с помощью `use_uncompressed_cache=1` и исключение с неправильным несжатым размером. Эта ошибка появилась в 19.1.6. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([алесапин](https://github.com/alesapin)) -- Исправлено `compile_expressions` ошибка с сопоставлением больших (более int16) дат. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([алесапин](https://github.com/alesapin)) -- Исправлена бесконечная петля при выборе функции из таблицы `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Временно отключите оптимизацию предикатов для `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлено `Illegal instruction` ошибка при использовании функций base64 на старых процессорах. Эта ошибка была воспроизведена только тогда, когда ClickHouse был скомпилирован с помощью gcc-8. [\#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено `No message received` ошибка при взаимодействии с драйвером PostgreSQL ODBC через TLS-соединение. Также исправлена ошибка segfault при использовании драйвера MySQL ODBC. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен неверный результат, когда `Date` и `DateTime` аргументы используются в ветвях условного оператора (функции `if`). Добавлен общий случай для функции `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Словари ClickHouse теперь загружаются внутри `clickhouse` процесс. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена тупиковая ситуация, когда `SELECT` из-за стола с `File` двигатель был восстановлен после того, как `No such file or directory` ошибка. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Фиксированное состояние гонки при выборе из `system.tables` может дать `table doesn't exist` ошибка. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `clickhouse-client` может segfault на выходе при загрузке данных для предложений командной строки, если он был запущен в интерактивном режиме. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка при выполнении мутаций, содержащих `IN` операторы давали неверные результаты. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправлена ошибка: если есть база данных с `Dictionary` движок, все словари принудительно загружаются при запуске сервера, и если есть словарь с источником ClickHouse от localhost, то словарь не может загрузиться. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка при повторной попытке создания системных журналов при выключении сервера. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Правильно верните правильный тип и правильно обработайте замки `joinGet` функция. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Амос Птица](https://github.com/amosbird)) -- Добавлен `sumMapWithOverflow` функция. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Исправлена обработка выхода онлайн / оффлайн с `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлена ошибка с неправильным `Date` и `DateTime` сравнение. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -- Исправлен тест fuzz при неопределенном поведении дезинфицирующего средства: добавлена проверка типа параметра для `quantile*Weighted` семейство функций. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено редкое состояние гонки при удалении старых частей данных может произойти сбой с помощью `File not found` ошибка. [\#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка установки пакета с отсутствующим /etc / clickhouse-server / config.XML. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-5} - -- Пакет Debian: исправьте/etc/clickhouse-server / preprocessed link в соответствии с конфигурацией. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -- Различные исправления сборки для FreeBSD. [\#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) -- Добавлена возможность создавать, заполнять и удалять таблицы в perftest. [\#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([алесапин](https://github.com/alesapin)) -- Добавлен скрипт для проверки наличия дубликатов включений. [\#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена возможность выполнения запросов по индексу в тесте производительности. [\#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([алесапин](https://github.com/alesapin)) -- Предлагается установить пакет с отладочными символами. [\#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Рефакторинг производительности-тест. Улучшенная регистрация и обработка сигналов. [\#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([алесапин](https://github.com/alesapin)) -- Добавлены документы в анонимизированный Яндекс.Метрика набирает данные. [\#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([алесапин](https://github.com/alesapin)) -- Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [\#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Алексей Зателепин](https://github.com/ztlpn)) -- Добавлены документы о двух наборах данных в s3. [\#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([алесапин](https://github.com/alesapin)) -- Добавлен скрипт, который создает список изменений из описания запросов на вытягивание. [\#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [\#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([Кочетовниколай](https://github.com/KochetovNicolai)) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Добавлен кукольный модуль для Clickhouse. [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Максим Федотов](https://github.com/MaxFedotov)) -- Добавлены документы для группы недокументированных функций. [\#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправление сборки рук. [\#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[\#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [\#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) -- Словарные тесты теперь можно запускать из `ctest`. [\#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) -- Сейчас `/etc/ssl` используется в качестве каталога по умолчанию с SSL-сертификатами. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена проверка инструкций SSE и AVX при запуске. [\#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Игр](https://github.com/igron99)) -- Init скрипт будет ждать сервер до запуска. [\#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) - -#### Назад Несовместимые Изменения {#backward-incompatible-changes-1} - -- Удаленный `allow_experimental_low_cardinality_type` установка. `LowCardinality` типы данных уже готовы к производству. [\#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Уменьшите размер маркированного кэша и размер несжатого кэша соответственно доступному объему памяти. [\#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Лопатин Константин](https://github.com/k-lopatin) -- Добавить ключевые слова `INDEX` в `CREATE TABLE` запрос. Столбец с именем `index` должен быть заключен в кавычки с обратными или двойными кавычками: `` `index` ``. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Никита Васильев](https://github.com/nikvas0)) -- `sumMap` теперь продвигайте тип результата вместо переполнения. Старое `sumMap` поведение может быть получено с помощью `sumMapWithOverflow` функция. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) - -#### Улучшения в производительности {#performance-improvements-4} - -- `std::sort` заменено на `pdqsort` для запросов без `LIMIT`. [\#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Евгений Правда](https://github.com/kvinty)) -- Теперь сервер повторно использует потоки из глобального пула потоков. Это влияет на производительность в некоторых угловых случаях. [\#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшения {#improvements-5} - -- Реализована поддержка AIO для FreeBSD. [\#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) -- `SELECT * FROM a JOIN b USING a, b` теперь вернуться `a` и `b` столбцы только из левой таблицы. [\#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Артем Зуйков](https://github.com/4ertus2)) -- Позволять `-C` возможность работы клиента в качестве `-c` вариант. [\#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([семинсергей](https://github.com/syominsergey)) -- Теперь вариант `--password` использовать без стоимости требует пароль из stdin. [\#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD\_Conqueror](https://github.com/bsd-conqueror)) -- Добавлена подсветка неэскапированных метасимволов в строковых литералах, содержащих `LIKE` выражения или регулярные выражения. [\#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена отмена HTTP-запросов только для чтения, если клиентский сокет уходит. [\#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([nvartolomei](https://github.com/nvartolomei)) -- Теперь сервер сообщает о прогрессе, чтобы сохранить клиентские соединения живыми. [\#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([Иван](https://github.com/abyss7)) -- Немного лучше сообщение с причиной для оптимизации запроса с помощью `optimize_throw_if_noop` настройка включена. [\#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена поддержка `--version` опция для сервера clickhouse. [\#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Лопатин Константин](https://github.com/k-lopatin)) -- Добавлен `--help/-h` опцион на `clickhouse-server`. [\#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([Юрий Баранов](https://github.com/yurriy)) -- Добавлена поддержка скалярных подзапросов с результатом состояния агрегатной функции. [\#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([Николай Кочетов](https://github.com/KochetovNicolai)) -- Улучшено время завершения работы сервера и изменено время ожидания. [\#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлена информация о настройке replicated\_can\_become\_leader для system.реплики и добавить ведение журнала, если реплика не будет пытаться стать лидером. [\#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Алексей Зателепин](https://github.com/ztlpn)) - -## ClickHouse релиз 19.1 {#clickhouse-release-19-1} - -### ClickHouse релиз 19.1.14, 2019-03-14 {#clickhouse-release-19-1-14-2019-03-14} - -- Исправления ошибок `Column ... queried more than once` это может произойти, если установка `asterisk_left_columns_only` имеет значение 1 в случае использования `GLOBAL JOIN` с `SELECT *` (редкий случай). Эта проблема не существует в версии 19.3 и более поздних версиях. [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Артем Зуйков](https://github.com/4ertus2)) - -### ClickHouse релиз 19.1.13, 2019-03-12 {#clickhouse-release-19-1-13-2019-03-12} - -Этот релиз содержит точно такой же набор исправлений, как и 19.3.7. - -### ClickHouse релиз 19.1.10, 2019-03-03 {#clickhouse-release-19-1-10-2019-03-03} - -Этот релиз содержит точно такой же набор исправлений, как и 19.3.6. - -## ClickHouse релиз 19.1 {#clickhouse-release-19-1-1} - -### ClickHouse релиз 19.1.9, 2019-02-21 {#clickhouse-release-19-1-9-2019-02-21} - -#### Устранение ошибок {#bug-fixes-18} - -- Исправлена обратная несовместимость со старыми версиями из-за неправильной реализации `send_logs_level` установка. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена обратная несовместимость функции таблицы `remote` введено с комментариями к колонке. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.1.8, 2019-02-16 {#clickhouse-release-19-1-8-2019-02-16} - -#### Устранение ошибок {#bug-fixes-19} - -- Исправлена ошибка установки пакета с отсутствующим /etc / clickhouse-server / config.XML. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - -## ClickHouse релиз 19.1 {#clickhouse-release-19-1-2} - -### ClickHouse релиз 19.1.7, 2019-02-15 {#clickhouse-release-19-1-7-2019-02-15} - -#### Устранение ошибок {#bug-fixes-20} - -- Правильно возвращайте правильный тип и правильно обрабатывайте замки `joinGet` функция. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Амос Птица](https://github.com/amosbird)) -- Исправлена ошибка при повторной попытке создания системных журналов при выключении сервера. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка: если есть база данных с `Dictionary` движок, все словари принудительно загружаются при запуске сервера, и если есть словарь с источником ClickHouse от localhost, то словарь не может загрузиться. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка при выполнении мутаций, содержащих `IN` операторы давали неверные результаты. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Алексей Зателепин](https://github.com/ztlpn)) -- `clickhouse-client` может segfault на выходе при загрузке данных для предложений командной строки, если он был запущен в интерактивном режиме. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Фиксированное состояние гонки при выборе из `system.tables` может дать `table doesn't exist` ошибка. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена тупиковая ситуация, когда `SELECT` из-за стола с надписью: `File` двигатель был восстановлен после того, как `No such file or directory` ошибка. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка: локальные словари ClickHouse загружаются через TCP, но должны загружаться внутри процесса. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено `No message received` ошибка при взаимодействии с драйвером PostgreSQL ODBC через TLS-соединение. Также исправлена ошибка segfault при использовании драйвера MySQL ODBC. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Временно отключите оптимизацию предикатов для `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлена бесконечная петля при выборе функции из таблицы `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлено `compile_expressions` ошибка с сопоставлением больших (более int16) дат. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка сегментации с помощью `uncompressed_cache=1` и исключение с неправильным несжатым размером. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([алесапин](https://github.com/alesapin)) -- Исправлено `ALL JOIN` с дубликатами в правой таблице. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Артем Зуйков](https://github.com/4ertus2)) -- Исправлено неправильное поведение при выполнении `INSERT ... SELECT ... FROM file(...)` запрос и файл имеет `CSVWithNames` или `TSVWIthNames` формат и первая строка данных отсутствуют. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Фиксированное выполнение агрегатных функций с помощью `Array(LowCardinality)` аргументы. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Пакет Debian: исправьте/etc/clickhouse-server / preprocessed link в соответствии с конфигурацией. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -- Исправлен тест fuzz при неопределенном поведении дезинфицирующего средства: добавлена проверка типа параметра для `quantile*Weighted` семейство функций. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Сделай `START REPLICATED SENDS` команда начала репликации отправляет. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -- Исправлено `Not found column` для повторяющихся столбцов в разделе JOIN ON. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Артем Зуйков](https://github.com/4ertus2)) -- Сейчас `/etc/ssl` используется в качестве каталога по умолчанию с SSL-сертификатами. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка при перезагрузке словаря, если словарь недоступен. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -- Исправлена ошибка с неправильным `Date` и `DateTime` сравнение. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -- Исправлен неверный результат, когда `Date` и `DateTime` аргументы используются в ветвях условного оператора (функции `if`). Добавлен общий случай для функции `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -### ClickHouse релиз 19.1.6, 2019-01-24 {#clickhouse-release-19-1-6-2019-01-24} - -#### Новые средства {#new-features-7} - -- Пользовательские кодеки сжатия для каждого столбца для таблиц. [\#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [\#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([алесапин](https://github.com/alesapin), [Зимний Чжан](https://github.com/zhang2014), [Анатолий](https://github.com/Sindbag)) -- Добавлен кодек сжатия `Delta`. [\#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([алесапин](https://github.com/alesapin)) -- Разрешить `ALTER` кодеки сжатия. [\#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([алесапин](https://github.com/alesapin)) -- Добавленные функции `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` для совместимости со стандартом SQL. [\#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([Иван Блинков](https://github.com/blinkov)) -- Поддержка записи в систему `HDFS` таблица или `hdfs` табличная функция. [\#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([алесапин](https://github.com/alesapin)) -- Добавлены функции поиска нескольких постоянных строк из большого стога сена: `multiPosition`, `multiSearch` ,`firstMatch` также с помощью `-UTF8`, `-CaseInsensitive`, и `-CaseInsensitiveUTF8` варианты. [\#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Данила Кутенин](https://github.com/danlark1)) -- Обрезка неиспользуемых осколков, если `SELECT` фильтры запросов по ключу сегментирования (настройка `optimize_skip_unused_shards`). [\#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Глеб Кантеров](https://github.com/kanterov), [Иван](https://github.com/abyss7)) -- Позволять `Kafka` движок для игнорирования некоторого количества ошибок синтаксического анализа в каждом блоке. [\#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([Иван](https://github.com/abyss7)) -- Добавлена поддержка для `CatBoost` мультиклассовые модели оценки. Функция `modelEvaluate` возвращает кортеж с необработанными прогнозами для каждого класса для многоклассовых моделей. `libcatboostmodel.so` должно быть построено с помощью [\#607](https://github.com/catboost/catboost/pull/607). [\#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Добавленные функции `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [\#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Борис Гранво](https://github.com/bgranvea)) -- Добавлены функции хэширования `xxHash64` и `xxHash32`. [\#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([Филимонов](https://github.com/filimonov)) -- Добавлен `gccMurmurHash` функция хэширования (GCC flavoured Murmur hash), которая использует то же самое хэш-семя, что и [ССЗ](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [\#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([сундили](https://github.com/sundy-li)) -- Добавлены функции хэширования `javaHash`, `hiveHash`. [\#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) -- Добавлена функция таблицы `remoteSecure`. Функция работает как `remote`, но использует безопасное соединение. [\#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) - -#### Экспериментальная возможность {#experimental-features-3} - -- Добавлена эмуляция нескольких соединений (`allow_experimental_multiple_joins_emulation` установочный). [\#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Артем Зуйков](https://github.com/4ertus2)) - -#### Устранение ошибок {#bug-fixes-21} - -- Сделай `compiled_expression_cache_size` установка ограничена по умолчанию для снижения потребления памяти. [\#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка, которая привела к зависанию в потоках, выполняющих изменения реплицированных таблиц, и в потоке, обновляющем конфигурацию из ZooKeeper. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [\#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправлено состояние гонки при выполнении распределенной задачи ALTER. Состояние гонки привело к тому, что более чем одна реплика пыталась выполнить задачу, и все реплики, кроме одной, потерпели неудачу с ошибкой ZooKeeper. [\#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправьте ошибку, когда `from_zk` элементы конфигурации не были обновлены после истечения времени ожидания запроса к ZooKeeper. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправлена ошибка с неправильным префиксом для масок подсети IPv4. [\#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([алесапин](https://github.com/alesapin)) -- Исправлена ошибка (`std::terminate`) в редких случаях, когда новый поток не может быть создан из-за исчерпания ресурсов. [\#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, когда в `remote` таблица выполнения функции, когда ошибались ограничений, используемых в `getStructureOfRemoteTable`. [\#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([алесапин](https://github.com/alesapin)) -- Исправьте утечку сокетов netlink. Они были помещены в пул, где они никогда не удалялись, а новые сокеты создавались в начале нового потока, когда использовались все текущие сокеты. [\#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Алексей Зателепин](https://github.com/ztlpn)) -- Исправлена ошибка с закрытием `/proc/self/fd` каталог раньше, чем все fds были прочитаны из него `/proc` после раздвоения `odbc-bridge` подпроцесс. [\#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([алесапин](https://github.com/alesapin)) -- Исправлено монотонное преобразование строки в UInt в случае использования строки в первичном ключе. [\#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлена ошибка при вычислении монотонности функции преобразования целых чисел. [\#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена обработка выхода онлайн / оффлайн в `arrayEnumerateUniq`, `arrayEnumerateDense` функции в случае некоторых недопустимых аргументов. [\#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправьте UB в StorageMerge. [\#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([Амос Птица](https://github.com/amosbird)) -- Исправлена обработка выхода онлайн / оффлайн в функции `addDays`, `subtractDays`. [\#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка: функции `round`, `floor`, `trunc`, `ceil` может возвращать фиктивный результат при выполнении с целочисленным аргументом и большим отрицательным масштабом. [\#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, вызванная ‘kill query sync’ что ведет к свалке ядра. [\#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([мувулдипекер](https://github.com/fancyqlx)) -- Исправлена ошибка с длительной задержкой после пустой очереди репликации. [\#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [\#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([алесапин](https://github.com/alesapin)) -- Исправлено чрезмерное использование памяти при вставке в таблицу с `LowCardinality` первичный ключ. [\#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Исправлено `LowCardinality` сериализация для `Native` форматирование в случае пустых массивов. [\#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [\#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Исправлен неверный результат при использовании числового столбца distinct by single LowCardinality. [\#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [\#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Исправлена специализированная агрегация с ключом LowCardinality (в случае, когда `compile` настройка включена). [\#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Исправлена переадресация пользователей и паролей для запросов реплицированных таблиц. [\#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([алесапин](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) -- Исправлено очень редкое состояние гонки, которое может произойти при перечислении таблиц в базе данных словаря во время перезагрузки словарей. [\#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлен неверный результат при использовании метода Rollup или CUBE. [\#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [\#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([Сэм Чоу](https://github.com/reflection)) -- Исправлены псевдонимы столбцов для запроса с помощью `JOIN ON` синтаксис и распределенные таблицы. [\#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([Зимний Чжан](https://github.com/zhang2014)) -- Исправлена ошибка во внутренней реализации `quantileTDigest` (найдено Артемом Вахрушевым). Эта ошибка никогда не происходит в ClickHouse и была актуальна только для тех, кто использует кодовую базу ClickHouse непосредственно в качестве библиотеки. [\#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Улучшения {#improvements-6} - -- Поддержка `IF NOT EXISTS` в `ALTER TABLE ADD COLUMN` заявления вместе с `IF EXISTS` в `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [\#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Борис Гранво](https://github.com/bgranvea)) -- Функция `parseDateTimeBestEffort`: поддержка форматов `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` и тому подобное. [\#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- `CapnProtoInputStream` теперь поддерживайте зубчатые конструкции. [\#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Hultgren Один Ван Дер Хорст](https://github.com/Miniwoffer)) -- Улучшение удобства использования: добавлена проверка того, что серверный процесс запускается от владельца каталога данных. Не позволяют запускать сервер от root, если данные принадлежат к непривилегированным пользователем. [\#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([Сергей-в-Гальцев](https://github.com/sergey-v-galtsev)) -- Улучшена логика проверки необходимых столбцов при анализе запросов с соединениями. [\#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Артем Зуйков](https://github.com/4ertus2)) -- Уменьшено количество подключений в случае большого количества распределенных таблиц на одном сервере. [\#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([Зимний Чжан](https://github.com/zhang2014)) -- Поддерживаемые итоговые значения строка для `WITH TOTALS` запрос для драйвера ODBC. [\#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Максим Корицкий](https://github.com/nightweb)) -- Разрешено к использованию `Enum`s как целые числа внутри функции if. [\#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([Иван](https://github.com/abyss7)) -- Добавлен `low_cardinality_allow_in_native_format` установка. Если он отключен, не используйте его `LowCadrinality` напечатать `Native` формат. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([Кочетовниколай](https://github.com/KochetovNicolai)) -- Удалил некоторые избыточные объекты из кэша скомпилированных выражений, чтобы снизить использование памяти. [\#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([алесапин](https://github.com/alesapin)) -- Добавить проверить это `SET send_logs_level = 'value'` запрос принимает соответствующее значение. [\#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Сабянин Максим](https://github.com/s-mx)) -- Исправлена проверка типа данных в функциях преобразования типов. [\#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([Зимний Чжан](https://github.com/zhang2014)) - -#### Улучшения в производительности {#performance-improvements-5} - -- Добавьте параметр MergeTree `use_minimalistic_part_header_in_zookeeper`. Если этот параметр включен, реплицированные таблицы будут хранить метаданные компактной детали в одном znode детали. Это может значительно уменьшить размер моментального снимка ZooKeeper (особенно если таблицы содержат много столбцов). Обратите внимание, что после включения этого параметра вы не сможете понизить рейтинг до версии, которая его не поддерживает. [\#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Алексей Зателепин](https://github.com/ztlpn)) -- Добавление реализации функций на основе DFA `sequenceMatch` и `sequenceCount` в случае, если шаблон не содержит времени. [\#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Повышение производительности при сериализации целых чисел. [\#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([Амос Птица](https://github.com/amosbird)) -- Нулевое левое заполнение PODArray так, чтобы элемент -1 всегда был действителен и обнулялся. Он используется для безветвевого расчета смещений. [\#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([Амос Птица](https://github.com/amosbird)) -- Возвратившегося `jemalloc` версии, которые приводят к снижению производительности. [\#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) - -#### Назад Несовместимые Изменения {#backward-incompatible-changes-2} - -- Удалена недокументированная функция `ALTER MODIFY PRIMARY KEY` потому что он был вытеснен на второй план `ALTER MODIFY ORDER BY` команда. [\#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Алексей Зателепин](https://github.com/ztlpn)) -- Удаленная функция `shardByHash`. [\#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Запретить использование скалярных подзапросов с результатом типа `AggregateFunction`. [\#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([Иван](https://github.com/abyss7)) - -#### Улучшения Сборки / Тестирования / Упаковки {#buildtestingpackaging-improvements-6} - -- Добавлена поддержка PowerPC (`ppc64le`) строить. [\#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Данила Кутенин](https://github.com/danlark1)) -- Функциональные тесты с отслеживанием состояния выполняются на общедоступном наборе данных. [\#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлена ошибка, когда сервер не может начать работу с `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` сообщение внутри Docker или systemd-nspawn. [\#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Обновленный `rdkafka` библиотека для v1.0.0-проект RC5. Используется cppkafka вместо raw c интерфейса. [\#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([Иван](https://github.com/abyss7)) -- Обновленный `mariadb-client` библиотека. Исправлена одна из проблем, обнаруженных UBSan. [\#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Некоторые исправления для утилиты для сборки. [\#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [\#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [\#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлено в фиксации запусков тестов с утилиты для сборки. -- Добавлено в фиксации работает в PVS-Studio-статический анализатор. -- Исправлены ошибки, обнаруженные компанией PVS-Studio. [\#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлены проблемы совместимости glibc. [\#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Переместите изображения Docker в 18.10 и добавьте файл совместимости для glibc \>= 2.28 [\#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([алесапин](https://github.com/alesapin)) -- Добавьте переменную env, если пользователь не хочет использовать каталоги chown в образе Server Docker. [\#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([алесапин](https://github.com/alesapin)) -- Включено большинство предупреждений от `-Weverything` в лязг. Включенный `-Wpedantic`. [\#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Добавлено еще несколько предупреждений, которые доступны только в clang 8. [\#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Ссылка на `libLLVM` а не к отдельным библиотекам LLVM при использовании общего связывания. [\#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Оривей Деш](https://github.com/orivej)) -- Добавлены переменные дезинфицирующего средства для тестовых изображений. [\#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([алесапин](https://github.com/alesapin)) -- `clickhouse-server` пакет debian будет рекомендовать `libcap2-bin` пакет для использования `setcap` инструмент для настройки возможностей. Это необязательно. [\#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Улучшено время компиляции, исправлены ошибки. [\#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) -- Добавлены тесты производительности для хэш-функций. [\#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([Филимонов](https://github.com/filimonov)) -- Фиксированные циклические библиотечные зависимости. [\#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) -- Улучшена компиляция с низким уровнем доступной памяти. [\#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) -- Добавлен тестовый сценарий для воспроизведения снижения производительности в `jemalloc`. [\#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([Алексей-Миловидов](https://github.com/alexey-milovidov)) -- Исправлены опечатки в комментариях и строковых литералах под заголовком `dbms`. [\#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([майха](https://github.com/maiha)) -- Исправлены опечатки в комментариях. [\#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Евгений Правда](https://github.com/kvinty)) - -## [Changelog для 2018](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2018.md) {#changelog-for-2018} diff --git a/docs/ru/whats_new/changelog/2019.md b/docs/ru/whats_new/changelog/2019.md new file mode 120000 index 00000000000..905836eef7a --- /dev/null +++ b/docs/ru/whats_new/changelog/2019.md @@ -0,0 +1 @@ +en/whats_new/changelog/2019.md \ No newline at end of file diff --git a/docs/ru/whats_new/roadmap.md b/docs/ru/whats_new/roadmap.md deleted file mode 100644 index 3994ed4ac29..00000000000 --- a/docs/ru/whats_new/roadmap.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 1cd5f0028d917696daf71ac1c9ee849c99c1d5c8 ---- - -# Дорожная карта {#roadmap} - -## Q1 2020 {#q1-2020} - -- Управление доступом на основе ролей - -## Q2 2020 {#q2-2020} - -- Интеграция с внешними службами аутентификации -- Пулы ресурсов для более точного распределения емкости кластера между пользователями - -{## [Оригинальная статья](https://clickhouse.tech/docs/en/roadmap/) ##} diff --git a/docs/ru/whats_new/roadmap.md b/docs/ru/whats_new/roadmap.md new file mode 120000 index 00000000000..81184f9c26c --- /dev/null +++ b/docs/ru/whats_new/roadmap.md @@ -0,0 +1 @@ +en/whats_new/roadmap.md \ No newline at end of file From b00d9c78550840735a13aba1a47c50cd061c9f91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 17:56:54 +0300 Subject: [PATCH 273/752] Fix bad translation, step 1: remove files #10191 --- docs/ru/development/architecture.md | 1 - docs/ru/development/build.md | 1 - docs/ru/development/build_cross_arm.md | 1 - docs/ru/development/build_cross_osx.md | 1 - docs/ru/development/build_osx.md | 1 - docs/ru/development/index.md | 1 - docs/ru/development/tests.md | 1 - docs/ru/engines/table_engines/special/generate.md | 1 - docs/ru/getting_started/tutorial.md | 1 - docs/ru/introduction/adopters.md | 1 - .../operations/optimizing_performance/sampling_query_profiler.md | 1 - docs/ru/operations/performance_test.md | 1 - docs/ru/operations/utilities/clickhouse-benchmark.md | 1 - docs/ru/whats_new/changelog/2017.md | 1 - docs/ru/whats_new/changelog/2018.md | 1 - docs/ru/whats_new/changelog/2019.md | 1 - docs/ru/whats_new/roadmap.md | 1 - 17 files changed, 17 deletions(-) delete mode 120000 docs/ru/development/architecture.md delete mode 120000 docs/ru/development/build.md delete mode 120000 docs/ru/development/build_cross_arm.md delete mode 120000 docs/ru/development/build_cross_osx.md delete mode 120000 docs/ru/development/build_osx.md delete mode 120000 docs/ru/development/index.md delete mode 120000 docs/ru/development/tests.md delete mode 120000 docs/ru/engines/table_engines/special/generate.md delete mode 120000 docs/ru/getting_started/tutorial.md delete mode 120000 docs/ru/introduction/adopters.md delete mode 120000 docs/ru/operations/optimizing_performance/sampling_query_profiler.md delete mode 120000 docs/ru/operations/performance_test.md delete mode 120000 docs/ru/operations/utilities/clickhouse-benchmark.md delete mode 120000 docs/ru/whats_new/changelog/2017.md delete mode 120000 docs/ru/whats_new/changelog/2018.md delete mode 120000 docs/ru/whats_new/changelog/2019.md delete mode 120000 docs/ru/whats_new/roadmap.md diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md deleted file mode 120000 index 61968e46da2..00000000000 --- a/docs/ru/development/architecture.md +++ /dev/null @@ -1 +0,0 @@ -en/development/architecture.md \ No newline at end of file diff --git a/docs/ru/development/build.md b/docs/ru/development/build.md deleted file mode 120000 index 156d8382515..00000000000 --- a/docs/ru/development/build.md +++ /dev/null @@ -1 +0,0 @@ -en/development/build.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_arm.md b/docs/ru/development/build_cross_arm.md deleted file mode 120000 index ea33bb61837..00000000000 --- a/docs/ru/development/build_cross_arm.md +++ /dev/null @@ -1 +0,0 @@ -en/development/build_cross_arm.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_osx.md b/docs/ru/development/build_cross_osx.md deleted file mode 120000 index d4dc16f2fbc..00000000000 --- a/docs/ru/development/build_cross_osx.md +++ /dev/null @@ -1 +0,0 @@ -en/development/build_cross_osx.md \ No newline at end of file diff --git a/docs/ru/development/build_osx.md b/docs/ru/development/build_osx.md deleted file mode 120000 index 5c38a2b001a..00000000000 --- a/docs/ru/development/build_osx.md +++ /dev/null @@ -1 +0,0 @@ -en/development/build_osx.md \ No newline at end of file diff --git a/docs/ru/development/index.md b/docs/ru/development/index.md deleted file mode 120000 index 754385a9f4b..00000000000 --- a/docs/ru/development/index.md +++ /dev/null @@ -1 +0,0 @@ -en/development/index.md \ No newline at end of file diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md deleted file mode 120000 index ce23c881f32..00000000000 --- a/docs/ru/development/tests.md +++ /dev/null @@ -1 +0,0 @@ -en/development/tests.md \ No newline at end of file diff --git a/docs/ru/engines/table_engines/special/generate.md b/docs/ru/engines/table_engines/special/generate.md deleted file mode 120000 index 631f9bbba66..00000000000 --- a/docs/ru/engines/table_engines/special/generate.md +++ /dev/null @@ -1 +0,0 @@ -en/engines/table_engines/special/generate.md \ No newline at end of file diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md deleted file mode 120000 index 18b86bb2e9c..00000000000 --- a/docs/ru/getting_started/tutorial.md +++ /dev/null @@ -1 +0,0 @@ -en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md deleted file mode 120000 index b9b77a27eb9..00000000000 --- a/docs/ru/introduction/adopters.md +++ /dev/null @@ -1 +0,0 @@ -en/introduction/adopters.md \ No newline at end of file diff --git a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md deleted file mode 120000 index 565f39130fb..00000000000 --- a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md +++ /dev/null @@ -1 +0,0 @@ -en/operations/optimizing_performance/sampling_query_profiler.md \ No newline at end of file diff --git a/docs/ru/operations/performance_test.md b/docs/ru/operations/performance_test.md deleted file mode 120000 index 3787adb92bd..00000000000 --- a/docs/ru/operations/performance_test.md +++ /dev/null @@ -1 +0,0 @@ -en/operations/performance_test.md \ No newline at end of file diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md deleted file mode 120000 index fda8b1a50c7..00000000000 --- a/docs/ru/operations/utilities/clickhouse-benchmark.md +++ /dev/null @@ -1 +0,0 @@ -en/operations/utilities/clickhouse-benchmark.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2017.md b/docs/ru/whats_new/changelog/2017.md deleted file mode 120000 index f278c42f170..00000000000 --- a/docs/ru/whats_new/changelog/2017.md +++ /dev/null @@ -1 +0,0 @@ -en/whats_new/changelog/2017.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2018.md b/docs/ru/whats_new/changelog/2018.md deleted file mode 120000 index 675c07e8bbb..00000000000 --- a/docs/ru/whats_new/changelog/2018.md +++ /dev/null @@ -1 +0,0 @@ -en/whats_new/changelog/2018.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2019.md b/docs/ru/whats_new/changelog/2019.md deleted file mode 120000 index 905836eef7a..00000000000 --- a/docs/ru/whats_new/changelog/2019.md +++ /dev/null @@ -1 +0,0 @@ -en/whats_new/changelog/2019.md \ No newline at end of file diff --git a/docs/ru/whats_new/roadmap.md b/docs/ru/whats_new/roadmap.md deleted file mode 120000 index 81184f9c26c..00000000000 --- a/docs/ru/whats_new/roadmap.md +++ /dev/null @@ -1 +0,0 @@ -en/whats_new/roadmap.md \ No newline at end of file From ab8900ecff65150e803c50984cde78e634e095c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 18:03:35 +0300 Subject: [PATCH 274/752] Fix bad translation, step 2: replace with symlinks #10191 --- docs/ru/development/architecture.md | 1 + docs/ru/development/build.md | 1 + docs/ru/development/build_cross_arm.md | 1 + docs/ru/development/build_cross_osx.md | 1 + docs/ru/development/build_osx.md | 1 + docs/ru/development/index.md | 1 + docs/ru/development/tests.md | 1 + docs/ru/engines/table_engines/special/generate.md | 1 + docs/ru/getting_started/tutorial.md | 1 + docs/ru/introduction/adopters.md | 1 + .../operations/optimizing_performance/sampling_query_profiler.md | 1 + docs/ru/operations/performance_test.md | 1 + docs/ru/operations/utilities/clickhouse-benchmark.md | 1 + docs/ru/whats_new/changelog/2017.md | 1 + docs/ru/whats_new/changelog/2018.md | 1 + docs/ru/whats_new/changelog/2019.md | 1 + docs/ru/whats_new/roadmap.md | 1 + 17 files changed, 17 insertions(+) create mode 120000 docs/ru/development/architecture.md create mode 120000 docs/ru/development/build.md create mode 120000 docs/ru/development/build_cross_arm.md create mode 120000 docs/ru/development/build_cross_osx.md create mode 120000 docs/ru/development/build_osx.md create mode 120000 docs/ru/development/index.md create mode 120000 docs/ru/development/tests.md create mode 120000 docs/ru/engines/table_engines/special/generate.md create mode 120000 docs/ru/getting_started/tutorial.md create mode 120000 docs/ru/introduction/adopters.md create mode 120000 docs/ru/operations/optimizing_performance/sampling_query_profiler.md create mode 120000 docs/ru/operations/performance_test.md create mode 120000 docs/ru/operations/utilities/clickhouse-benchmark.md create mode 120000 docs/ru/whats_new/changelog/2017.md create mode 120000 docs/ru/whats_new/changelog/2018.md create mode 120000 docs/ru/whats_new/changelog/2019.md create mode 120000 docs/ru/whats_new/roadmap.md diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md new file mode 120000 index 00000000000..abda4dd48a8 --- /dev/null +++ b/docs/ru/development/architecture.md @@ -0,0 +1 @@ +../../en/development/architecture.md \ No newline at end of file diff --git a/docs/ru/development/build.md b/docs/ru/development/build.md new file mode 120000 index 00000000000..480dbc2e9f5 --- /dev/null +++ b/docs/ru/development/build.md @@ -0,0 +1 @@ +../../en/development/build.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_arm.md b/docs/ru/development/build_cross_arm.md new file mode 120000 index 00000000000..983a9872dc1 --- /dev/null +++ b/docs/ru/development/build_cross_arm.md @@ -0,0 +1 @@ +../../en/development/build_cross_arm.md \ No newline at end of file diff --git a/docs/ru/development/build_cross_osx.md b/docs/ru/development/build_cross_osx.md new file mode 120000 index 00000000000..72e64e8631f --- /dev/null +++ b/docs/ru/development/build_cross_osx.md @@ -0,0 +1 @@ +../../en/development/build_cross_osx.md \ No newline at end of file diff --git a/docs/ru/development/build_osx.md b/docs/ru/development/build_osx.md new file mode 120000 index 00000000000..f9adaf24584 --- /dev/null +++ b/docs/ru/development/build_osx.md @@ -0,0 +1 @@ +../../en/development/build_osx.md \ No newline at end of file diff --git a/docs/ru/development/index.md b/docs/ru/development/index.md new file mode 120000 index 00000000000..1e2ad97dcc5 --- /dev/null +++ b/docs/ru/development/index.md @@ -0,0 +1 @@ +../../en/development/index.md \ No newline at end of file diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md new file mode 120000 index 00000000000..c03d36c3916 --- /dev/null +++ b/docs/ru/development/tests.md @@ -0,0 +1 @@ +../../en/development/tests.md \ No newline at end of file diff --git a/docs/ru/engines/table_engines/special/generate.md b/docs/ru/engines/table_engines/special/generate.md new file mode 120000 index 00000000000..566dc4e5382 --- /dev/null +++ b/docs/ru/engines/table_engines/special/generate.md @@ -0,0 +1 @@ +../../../../en/engines/table_engines/special/generate.md \ No newline at end of file diff --git a/docs/ru/getting_started/tutorial.md b/docs/ru/getting_started/tutorial.md new file mode 120000 index 00000000000..8bc40816ab2 --- /dev/null +++ b/docs/ru/getting_started/tutorial.md @@ -0,0 +1 @@ +../../en/getting_started/tutorial.md \ No newline at end of file diff --git a/docs/ru/introduction/adopters.md b/docs/ru/introduction/adopters.md new file mode 120000 index 00000000000..659153d5f6c --- /dev/null +++ b/docs/ru/introduction/adopters.md @@ -0,0 +1 @@ +../../en/introduction/adopters.md \ No newline at end of file diff --git a/docs/ru/operations/optimizing_performance/sampling_query_profiler.md b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md new file mode 120000 index 00000000000..9f3b57cd086 --- /dev/null +++ b/docs/ru/operations/optimizing_performance/sampling_query_profiler.md @@ -0,0 +1 @@ +../../../en/operations/optimizing_performance/sampling_query_profiler.md \ No newline at end of file diff --git a/docs/ru/operations/performance_test.md b/docs/ru/operations/performance_test.md new file mode 120000 index 00000000000..a74c126c63f --- /dev/null +++ b/docs/ru/operations/performance_test.md @@ -0,0 +1 @@ +../../en/operations/performance_test.md \ No newline at end of file diff --git a/docs/ru/operations/utilities/clickhouse-benchmark.md b/docs/ru/operations/utilities/clickhouse-benchmark.md new file mode 120000 index 00000000000..3695c9fbdd3 --- /dev/null +++ b/docs/ru/operations/utilities/clickhouse-benchmark.md @@ -0,0 +1 @@ +../../../en/operations/utilities/clickhouse-benchmark.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2017.md b/docs/ru/whats_new/changelog/2017.md new file mode 120000 index 00000000000..a098eddf1d8 --- /dev/null +++ b/docs/ru/whats_new/changelog/2017.md @@ -0,0 +1 @@ +../../../en/whats_new/changelog/2017.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2018.md b/docs/ru/whats_new/changelog/2018.md new file mode 120000 index 00000000000..124fb19e175 --- /dev/null +++ b/docs/ru/whats_new/changelog/2018.md @@ -0,0 +1 @@ +../../../en/whats_new/changelog/2018.md \ No newline at end of file diff --git a/docs/ru/whats_new/changelog/2019.md b/docs/ru/whats_new/changelog/2019.md new file mode 120000 index 00000000000..740d1edd238 --- /dev/null +++ b/docs/ru/whats_new/changelog/2019.md @@ -0,0 +1 @@ +../../../en/whats_new/changelog/2019.md \ No newline at end of file diff --git a/docs/ru/whats_new/roadmap.md b/docs/ru/whats_new/roadmap.md new file mode 120000 index 00000000000..5ef0ebdb1bb --- /dev/null +++ b/docs/ru/whats_new/roadmap.md @@ -0,0 +1 @@ +../../en/whats_new/roadmap.md \ No newline at end of file From 218b9b3c6ca0c67526449f12741f0db89117e2ec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 18:40:11 +0300 Subject: [PATCH 275/752] Remove garbage --- docs/en/operations/performance_test.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/operations/performance_test.md b/docs/en/operations/performance_test.md index d955b50fa02..8c93f4e5f19 100644 --- a/docs/en/operations/performance_test.md +++ b/docs/en/operations/performance_test.md @@ -24,7 +24,7 @@ With this instruction you can run basic ClickHouse performance test on any serve # Then do: chmod a+x clickhouse -1. Download configs: +5. Download configs: @@ -34,7 +34,7 @@ With this instruction you can run basic ClickHouse performance test on any serve wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml -1. Download benchmark files: +6. Download benchmark files: @@ -42,7 +42,7 @@ With this instruction you can run basic ClickHouse performance test on any serve chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql -1. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction (“hits” table containing 100 million rows). +7. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction (“hits” table containing 100 million rows). @@ -50,31 +50,31 @@ With this instruction you can run basic ClickHouse performance test on any serve tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . -1. Run the server: +8. Run the server: ./clickhouse server -1. Check the data: ssh to the server in another terminal +9. Check the data: ssh to the server in another terminal ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 -1. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. +10. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. mcedit benchmark-new.sh -1. Run the benchmark: +11. Run the benchmark: ./benchmark-new.sh hits_100m_obfuscated -1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com -All the results are published here: https://clickhouse.tech/benchmark\_hardware.html +All the results are published here: https://clickhouse.tech/benchmark_hardware.html From 59b5f88099dae933372b7eb439ecb66cd480bc9a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 11 Apr 2020 18:43:13 +0300 Subject: [PATCH 276/752] Eliminate the rott. --- docs/en/operations/performance_test.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/performance_test.md b/docs/en/operations/performance_test.md index 8c93f4e5f19..6b59ec6fedd 100644 --- a/docs/en/operations/performance_test.md +++ b/docs/en/operations/performance_test.md @@ -63,7 +63,7 @@ With this instruction you can run basic ClickHouse performance test on any serve ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 -10. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. +10. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `–-max_memory_usage 100000000000` parameter. From 1526722333d46e75aa3cc4f327bbb4b2b072744c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 18:54:16 +0300 Subject: [PATCH 277/752] Enforce that there is no machine translation to russian #10191 --- utils/check-style/check-style | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 08b0e81c123..2a2e9dab42d 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -56,3 +56,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | while # Broken XML files (requires libxml2-utils) find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet + +# Machine translation to Russian is strictly prohibited +find $ROOT_PATH/docs/ru -name '*.md' | xargs grep -l -F 'machine_translated: true' From 3d1e5b4bc998d9bf4a62097673368d2d93da9158 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 19:34:24 +0300 Subject: [PATCH 278/752] Changed Slack Link (tnx. lnuynxa) --- website/templates/index/community.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/index/community.html b/website/templates/index/community.html index 47bcbd67218..e230cac8da9 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -69,7 +69,7 @@
-
Date: Sat, 11 Apr 2020 01:23:27 +0300 Subject: [PATCH 279/752] Fix using the current database for access checking when the database isn't specified. --- src/Access/ContextAccess.cpp | 33 ++++++++++++++++++++------------- src/Access/ContextAccess.h | 8 +++++++- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index cf788a0a63e..915593f58f0 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -196,7 +196,7 @@ bool ContextAccess::isClientHostAllowed() const template -bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const +bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const { auto access = calculateResultAccess(grant_option); bool is_granted = access->isGranted(flags, args...); @@ -267,6 +267,22 @@ bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & fla } +template +bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags) const +{ + return calculateResultAccessAndCheck(log_, flags); +} + +template +bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const Args &... args) const +{ + if (database.empty()) + return calculateResultAccessAndCheck(log_, flags, params.current_database, args...); + else + return calculateResultAccessAndCheck(log_, flags, database, args...); +} + + template bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessRightsElement & element) const { @@ -276,24 +292,15 @@ bool ContextAccess::checkAccessImpl(Poco::Logger * log_, const AccessRightsEleme } else if (element.any_table) { - if (element.database.empty()) - return checkAccessImpl(log_, element.access_flags, params.current_database); - else - return checkAccessImpl(log_, element.access_flags, element.database); + return checkAccessImpl(log_, element.access_flags, element.database); } else if (element.any_column) { - if (element.database.empty()) - return checkAccessImpl(log_, element.access_flags, params.current_database, element.table); - else - return checkAccessImpl(log_, element.access_flags, element.database, element.table); + return checkAccessImpl(log_, element.access_flags, element.database, element.table); } else { - if (element.database.empty()) - return checkAccessImpl(log_, element.access_flags, params.current_database, element.table, element.columns); - else - return checkAccessImpl(log_, element.access_flags, element.database, element.table, element.columns); + return checkAccessImpl(log_, element.access_flags, element.database, element.table, element.columns); } } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index bee63103793..e0fbf58dbe8 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -130,8 +130,11 @@ private: void setRolesInfo(const std::shared_ptr & roles_info_) const; void setSettingsAndConstraints() const; + template + bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags) const; + template - bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const; + bool checkAccessImpl(Poco::Logger * log_, const AccessFlags & flags, const std::string_view & database, const Args &... args) const; template bool checkAccessImpl(Poco::Logger * log_, const AccessRightsElement & element) const; @@ -139,6 +142,9 @@ private: template bool checkAccessImpl(Poco::Logger * log_, const AccessRightsElements & elements) const; + template + bool calculateResultAccessAndCheck(Poco::Logger * log_, const AccessFlags & flags, const Args &... args) const; + boost::shared_ptr calculateResultAccess(bool grant_option) const; boost::shared_ptr calculateResultAccess(bool grant_option, UInt64 readonly_, bool allow_ddl_, bool allow_introspection_) const; From 1e2206bdd9cf4c025853151a25a247a619a29562 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 11 Apr 2020 20:54:10 +0300 Subject: [PATCH 280/752] Update security_changelog.md --- docs/ru/whats_new/security_changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/ru/whats_new/security_changelog.md b/docs/ru/whats_new/security_changelog.md index c8f66bf0475..9a2dab8ba14 100644 --- a/docs/ru/whats_new/security_changelog.md +++ b/docs/ru/whats_new/security_changelog.md @@ -1,3 +1,5 @@ +# Security Changelog + ## Исправлено в релизе 19.14.3.3, 2019-09-10 {#ispravleno-v-relize-19-14-3-3-2019-09-10} ### CVE-2019-15024 {#cve-2019-15024} From 53199ae546ae20381a0c0c1fea19534364745311 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Apr 2020 22:51:04 +0300 Subject: [PATCH 281/752] Fix various small issues in interactive mode of clickhouse-client #10189 #5908 --- programs/client/Client.cpp | 45 +++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9cd1332b513..fef89d9df35 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -82,16 +82,8 @@ #endif /// http://en.wikipedia.org/wiki/ANSI_escape_code - -/// Similar codes \e[s, \e[u don't work in VT100 and Mosh. -#define SAVE_CURSOR_POSITION "\033""7" -#define RESTORE_CURSOR_POSITION "\033""8" - #define CLEAR_TO_END_OF_LINE "\033[K" -/// This codes are possibly not supported everywhere. -#define DISABLE_LINE_WRAPPING "\033[?7l" -#define ENABLE_LINE_WRAPPING "\033[?7h" namespace DB { @@ -133,8 +125,6 @@ private: bool stdin_is_a_tty = false; /// stdin is a terminal. bool stdout_is_a_tty = false; /// stdout is a terminal. - uint16_t terminal_width = 0; /// Terminal width is needed to render progress bar. - std::unique_ptr connection; /// Connection to DB. String query_id; /// Current query_id. String query; /// Current query. @@ -1122,11 +1112,16 @@ private: /// to avoid losing sync. if (!cancelled) { - auto cancel_query = [&] { + auto cancel_query = [&] + { connection->sendCancel(); cancelled = true; if (is_interactive) + { + if (written_progress_chars) + clearProgress(); std::cout << "Cancelling query." << std::endl; + } /// Pressing Ctrl+C twice results in shut down. interrupt_listener.unblock(); @@ -1436,7 +1431,7 @@ private: { written_progress_chars = 0; if (!send_logs) - std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE; + std::cerr << "\r" CLEAR_TO_END_OF_LINE; } @@ -1461,20 +1456,14 @@ private: "\033[1m↗\033[0m", }; - if (!send_logs) - { - if (written_progress_chars) - message << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE; - else - message << SAVE_CURSOR_POSITION; - } + auto indicator = indicators[increment % 8]; - message << DISABLE_LINE_WRAPPING; + if (!send_logs && written_progress_chars) + message << '\r'; size_t prefix_size = message.count(); - message << indicators[increment % 8] - << " Progress: "; + message << indicator << " Progress: "; message << formatReadableQuantity(progress.read_rows) << " rows, " @@ -1488,7 +1477,7 @@ private: else message << ". "; - written_progress_chars = message.count() - prefix_size - (increment % 8 == 7 ? 10 : 13); /// Don't count invisible output (escape sequences). + written_progress_chars = message.count() - prefix_size - (strlen(indicator) - 2); /// Don't count invisible output (escape sequences). /// If the approximate number of rows to process is known, we can display a progress bar and percentage. if (progress.total_rows_to_read > 0) @@ -1506,7 +1495,7 @@ private: if (show_progress_bar) { - ssize_t width_of_progress_bar = static_cast(terminal_width) - written_progress_chars - strlen(" 99%"); + ssize_t width_of_progress_bar = static_cast(getTerminalWidth()) - written_progress_chars - strlen(" 99%"); if (width_of_progress_bar > 0) { std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.read_rows, 0, total_rows_corrected, width_of_progress_bar)); @@ -1521,7 +1510,8 @@ private: message << ' ' << (99 * progress.read_rows / total_rows_corrected) << '%'; } - message << ENABLE_LINE_WRAPPING; + message << CLEAR_TO_END_OF_LINE; + if (send_logs) message << '\n'; @@ -1589,7 +1579,11 @@ private: resetOutput(); if (is_interactive && !written_first_block) + { + if (written_progress_chars) + clearProgress(); std::cout << "Ok." << std::endl; + } } static void showClientVersion() @@ -1687,6 +1681,7 @@ public: stdin_is_a_tty = isatty(STDIN_FILENO); stdout_is_a_tty = isatty(STDOUT_FILENO); + uint64_t terminal_width = 0; if (stdin_is_a_tty) terminal_width = getTerminalWidth(); From e63fe6da8404f29a26da91fdc12753feaf31996e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 11 Apr 2020 23:07:11 +0300 Subject: [PATCH 282/752] Update msgpack.cmake --- cmake/find/msgpack.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake index 093555bebc0..46344fc162f 100644 --- a/cmake/find/msgpack.cmake +++ b/cmake/find/msgpack.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library if (USE_INTERNAL_MSGPACK_LIBRARY) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include/msgpack.hpp") - message(WARNING "submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init --recursive") + message(WARNING "Submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init --recursive") set(USE_INTERNAL_MSGPACK_LIBRARY 0) set(MISSING_INTERNAL_MSGPACK_LIBRARY 1) endif() From 121bf7b8c2bbfc2f09379afa2232a8b1b8754844 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 00:37:19 +0300 Subject: [PATCH 283/752] Whitespace #9968 --- src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 93553e0619e..e42ac942362 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -627,7 +627,7 @@ bool SplitTokenExtractor::next(const char * data, size_t len, size_t * pos, size // With the help of https://www.strchr.com/strcmp_and_strlen_using_sse_4.2 const auto alnum_chars_ranges = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, '\xFF', '\x80', 'z', 'a', 'Z', 'A', '9', '0'); - // Every bit represents if `haystack` character is in the ranges (1) or not(0) + // Every bit represents if `haystack` character is in the ranges (1) or not (0) const int result_bitmask = _mm_cvtsi128_si32(_mm_cmpestrm(alnum_chars_ranges, 8, haystack, haystack_length, _SIDD_CMP_RANGES)); #else // NOTE: -1 and +1 required since SSE2 has no `>=` and `<=` instructions on packed 8-bit integers (epi8). From 85448f4b133527e60af4ff56500f9d8fc1181dc7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 12 Apr 2020 00:41:52 +0300 Subject: [PATCH 284/752] Add test from the #2610 (closes: #2610) --- .../01227_distributed_global_in_issue_2610.reference | 3 +++ .../0_stateless/01227_distributed_global_in_issue_2610.sql | 6 ++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/01227_distributed_global_in_issue_2610.reference create mode 100644 tests/queries/0_stateless/01227_distributed_global_in_issue_2610.sql diff --git a/tests/queries/0_stateless/01227_distributed_global_in_issue_2610.reference b/tests/queries/0_stateless/01227_distributed_global_in_issue_2610.reference new file mode 100644 index 00000000000..083edaac248 --- /dev/null +++ b/tests/queries/0_stateless/01227_distributed_global_in_issue_2610.reference @@ -0,0 +1,3 @@ +2 +2 +2 diff --git a/tests/queries/0_stateless/01227_distributed_global_in_issue_2610.sql b/tests/queries/0_stateless/01227_distributed_global_in_issue_2610.sql new file mode 100644 index 00000000000..a063e417e3a --- /dev/null +++ b/tests/queries/0_stateless/01227_distributed_global_in_issue_2610.sql @@ -0,0 +1,6 @@ +-- Test from the issue https://github.com/ClickHouse/ClickHouse/issues/2610 +drop table if exists data_01227; +create table data_01227 (key Int) Engine=MergeTree() order by key; +insert into data_01227 select * from numbers(10); +select * from remote('127.1', currentDatabase(), data_01227) prewhere key global in (select key from data_01227 prewhere key = 2); +select * from cluster('test_cluster_two_shards', currentDatabase(), data_01227) prewhere key global in (select key from data_01227 prewhere key = 2); From cf9f00644e2a4365eeb8386deb1d5ecc06aad5ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 01:13:06 +0300 Subject: [PATCH 285/752] Fix the issue with arrayJoin and PREWHERE optimization #10092 --- src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index fa29494d1c9..749c0d64525 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -202,10 +202,10 @@ void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, cond_it); total_size_of_moved_conditions += cond_it->columns_size; - /// Move all other conditions that depend on the same set of columns. + /// Move all other viable conditions that depend on the same set of columns. for (auto jt = where_conditions.begin(); jt != where_conditions.end();) { - if (jt->columns_size == cond_it->columns_size && jt->identifiers == cond_it->identifiers) + if (jt->viable && jt->columns_size == cond_it->columns_size && jt->identifiers == cond_it->identifiers) prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, jt++); else ++jt; From cf483b7ecc95d9f0cf1bccd11e53ceb0291c11af Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 01:14:55 +0300 Subject: [PATCH 286/752] Added a test --- .../1_stateful/00093_prewhere_array_join.reference | 0 tests/queries/1_stateful/00093_prewhere_array_join.sql | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/queries/1_stateful/00093_prewhere_array_join.reference create mode 100644 tests/queries/1_stateful/00093_prewhere_array_join.sql diff --git a/tests/queries/1_stateful/00093_prewhere_array_join.reference b/tests/queries/1_stateful/00093_prewhere_array_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/1_stateful/00093_prewhere_array_join.sql b/tests/queries/1_stateful/00093_prewhere_array_join.sql new file mode 100644 index 00000000000..a1263144bb1 --- /dev/null +++ b/tests/queries/1_stateful/00093_prewhere_array_join.sql @@ -0,0 +1,9 @@ +SELECT arrayJoin([SearchEngineID]) AS search_engine, URL FROM test.hits WHERE SearchEngineID != 0 AND search_engine != 0 FORMAT Null; + +SELECT + arrayJoin([0]) AS browser, + arrayJoin([SearchEngineID]) AS search_engine, + URL +FROM test.hits +WHERE 1 AND (SearchEngineID != 0) AND (browser != 0) AND (search_engine != 0) +FORMAT Null; From a332d8b01ecdd52b06abdd9f630597870a28b9fb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 12 Apr 2020 01:22:10 +0300 Subject: [PATCH 287/752] Cover GLOBAL IN for Distributed over Distributed --- tests/queries/0_stateless/01223_dist_on_dist.reference | 2 ++ tests/queries/0_stateless/01223_dist_on_dist.sql | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/01223_dist_on_dist.reference b/tests/queries/0_stateless/01223_dist_on_dist.reference index 4a5dd8f316c..aca2f070db4 100644 --- a/tests/queries/0_stateless/01223_dist_on_dist.reference +++ b/tests/queries/0_stateless/01223_dist_on_dist.reference @@ -99,3 +99,5 @@ merge() distributed_group_by_no_merge 33 33 +GLOBAL IN +1 diff --git a/tests/queries/0_stateless/01223_dist_on_dist.sql b/tests/queries/0_stateless/01223_dist_on_dist.sql index 1b9175f622e..65a240fd48b 100644 --- a/tests/queries/0_stateless/01223_dist_on_dist.sql +++ b/tests/queries/0_stateless/01223_dist_on_dist.sql @@ -82,6 +82,10 @@ select count() from merge_dist_01223; select 'distributed_group_by_no_merge'; select count() from merge_dist_01223 settings distributed_group_by_no_merge=1; +-- global in +select 'GLOBAL IN'; +select distinct * from dist_01223 where key global in (select toInt32(1)); + drop table merge_dist_01223; drop table dist_01223; drop table dist_layer_01223; From 29189d427604a49b3ce6eb1bd7210efb85f41d55 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 01:33:16 +0300 Subject: [PATCH 288/752] Also add stateless test to illustrate the issue more clear --- .../0_stateless/01115_prewhere_array_join.reference | 0 tests/queries/0_stateless/01115_prewhere_array_join.sql | 7 +++++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/01115_prewhere_array_join.reference create mode 100644 tests/queries/0_stateless/01115_prewhere_array_join.sql diff --git a/tests/queries/0_stateless/01115_prewhere_array_join.reference b/tests/queries/0_stateless/01115_prewhere_array_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01115_prewhere_array_join.sql b/tests/queries/0_stateless/01115_prewhere_array_join.sql new file mode 100644 index 00000000000..e614bdf402b --- /dev/null +++ b/tests/queries/0_stateless/01115_prewhere_array_join.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS prewhere; + +CREATE TABLE prewhere (light UInt8, heavy String) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO prewhere SELECT 0, randomPrintableASCII(10000) FROM numbers(10000); +SELECT arrayJoin([light]) != 0 AS cond, length(heavy) FROM prewhere WHERE light != 0 AND cond != 0; + +DROP TABLE prewhere; From 2adeabd3c77b63b89b17ef00ec1e182c53aeb106 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Apr 2020 01:58:45 +0300 Subject: [PATCH 289/752] Update Settings.h --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 342d9bef58e..8138af31d5f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -406,7 +406,7 @@ struct Settings : public SettingsCollection M(SettingBool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(SettingUInt64, max_parser_depth, 1000, "Maximum parser depth.", 0) \ M(SettingSeconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ - M(SettingBool, transform_null_in, false, "Enable null verification of the 'IN' operator.", 0) \ + M(SettingBool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ M(SettingSeconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \ \ From 194dcc01fb96f5e808d7ba3b22523037b7d2e98a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 12 Apr 2020 00:28:04 +0300 Subject: [PATCH 290/752] Allow literals for GLOBAL IN --- src/Interpreters/GlobalSubqueriesVisitor.h | 15 ++++++++++++++- .../01226_dist_on_dist_global_in.reference | 6 ++++++ .../01226_dist_on_dist_global_in.sql | 10 ++++++++++ .../01224_dist_on_dist_global_in.reference | 4 ---- .../bugs/01224_dist_on_dist_global_in.sql | 18 ------------------ 5 files changed, 30 insertions(+), 23 deletions(-) create mode 100644 tests/queries/0_stateless/01226_dist_on_dist_global_in.reference create mode 100644 tests/queries/0_stateless/01226_dist_on_dist_global_in.sql delete mode 100644 tests/queries/bugs/01224_dist_on_dist_global_in.reference delete mode 100644 tests/queries/bugs/01224_dist_on_dist_global_in.sql diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index e577219629c..78d98805814 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -166,7 +167,19 @@ private: { if (func.name == "globalIn" || func.name == "globalNotIn") { - data.addExternalStorage(func.arguments->children[1]); + ASTPtr & ast = func.arguments->children[1]; + + /// Literal can use regular IN + if (ast->as()) + { + if (func.name == "globalIn") + func.name = "in"; + else + func.name = "notIn"; + return; + } + + data.addExternalStorage(ast); data.has_global_subqueries = true; } } diff --git a/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference b/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference new file mode 100644 index 00000000000..3d8d7fb770d --- /dev/null +++ b/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference @@ -0,0 +1,6 @@ +GLOBAL IN +0 +0 +0 +0 +GLOBAL NOT IN diff --git a/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql b/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql new file mode 100644 index 00000000000..588ea9c1048 --- /dev/null +++ b/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql @@ -0,0 +1,10 @@ +SELECT 'GLOBAL IN'; +select * from remote('localhost', system.one) where dummy global in (0); +select * from remote('localhost', system.one) where toUInt64(dummy) global in numbers(1); +select * from remote('localhost', system.one) where dummy global in system.one; +select * from remote('localhost', system.one) where dummy global in (select 0); +SELECT 'GLOBAL NOT IN'; +select * from remote('localhost', system.one) where dummy global not in (0); +select * from remote('localhost', system.one) where toUInt64(dummy) global not in numbers(1); +select * from remote('localhost', system.one) where dummy global not in system.one; +select * from remote('localhost', system.one) where dummy global not in (select 0); diff --git a/tests/queries/bugs/01224_dist_on_dist_global_in.reference b/tests/queries/bugs/01224_dist_on_dist_global_in.reference deleted file mode 100644 index 7f75aa873cb..00000000000 --- a/tests/queries/bugs/01224_dist_on_dist_global_in.reference +++ /dev/null @@ -1,4 +0,0 @@ -GLOBAL IN distributed_group_by_no_merge -1 -GLOBAL IN -1 diff --git a/tests/queries/bugs/01224_dist_on_dist_global_in.sql b/tests/queries/bugs/01224_dist_on_dist_global_in.sql deleted file mode 100644 index e363fef2d2b..00000000000 --- a/tests/queries/bugs/01224_dist_on_dist_global_in.sql +++ /dev/null @@ -1,18 +0,0 @@ -create table if not exists data_01224 (key Int) Engine=Memory(); -create table if not exists dist_layer_01224 as data_01224 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01224); -create table if not exists dist_01224 as data_01224 Engine=Distributed(test_cluster_two_shards, currentDatabase(), dist_layer_01224); - -select * from dist_01224; -insert into data_01224 select * from numbers(3); - --- "Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery" -select 'GLOBAL IN distributed_group_by_no_merge'; -select distinct * from dist_01224 where key global in (1) settings distributed_group_by_no_merge=1; - --- requires #9923 -select 'GLOBAL IN'; -select distinct * from dist_01224 where key global in (1); - -drop table dist_01224; -drop table dist_layer_01224; -drop table data_01224; From c5c1a8def75e1b47fd8e77553fdbdf980631ffc9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 03:03:05 +0300 Subject: [PATCH 291/752] Added a test from Andrey #2641 --- .../01116_asof_join_dolbyzerr.reference | 3 +++ .../0_stateless/01116_asof_join_dolbyzerr.sql | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference create mode 100644 tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference new file mode 100644 index 00000000000..1055a67ea5b --- /dev/null +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference @@ -0,0 +1,3 @@ +v1 o1 ['s2','s1'] +v1 o2 ['s4'] +v2 o3 ['s5','s3'] diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql new file mode 100644 index 00000000000..8a94b6ddd24 --- /dev/null +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql @@ -0,0 +1,18 @@ +CREATE TEMPORARY TABLE sessions (date DateTime, visitorId String, sessionId String); +CREATE TEMPORARY TABLE orders (date DateTime, visitorId String, orderId String); + +INSERT INTO sessions VALUES ('2018-01-01 00:00:00', 'v1', 's1'), ('2018-01-02 00:00:00', 'v1', 's2'), ('2018-01-03 00:00:00', 'v2', 's3'), ('2018-01-04 00:00:00', 'v1', 's4'), ('2018-01-05 00:00:00', 'v2', 's5'), ('2018-01-06 00:00:00', 'v3', 's6'); +INSERT INTO orders VALUES ('2018-01-03 00:00:00', 'v1', 'o1'), ('2018-01-05 00:00:00', 'v1', 'o2'), ('2018-01-06 00:00:00', 'v2', 'o3'); + +SELECT + visitorId, + orderId, + groupUniqArray(sessionId) +FROM sessions +ASOF INNER JOIN orders ON (sessions.visitorId = orders.visitorId) AND (sessions.date <= orders.date) +GROUP BY + visitorId, + orderId +ORDER BY + visitorId ASC, + orderId ASC; From dec3e0f9861f7876ee8a9dd3a97b9f88240cd284 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 03:38:25 +0300 Subject: [PATCH 292/752] Make least and greatest functions case insensitive for compatibility with MySQL --- src/Functions/greatest.cpp | 2 +- src/Functions/least.cpp | 2 +- tests/queries/0_stateless/01117_greatest_least_case.reference | 2 ++ tests/queries/0_stateless/01117_greatest_least_case.sql | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01117_greatest_least_case.reference create mode 100644 tests/queries/0_stateless/01117_greatest_least_case.sql diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 6eb123708a4..9abf85e751b 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -57,7 +57,7 @@ using FunctionGreatest = FunctionBinaryArithmetic; void registerFunctionGreatest(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index 47af759c956..f2e7c1f15d2 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -57,7 +57,7 @@ using FunctionLeast = FunctionBinaryArithmetic; void registerFunctionLeast(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(FunctionFactory::CaseInsensitive); } } diff --git a/tests/queries/0_stateless/01117_greatest_least_case.reference b/tests/queries/0_stateless/01117_greatest_least_case.reference new file mode 100644 index 00000000000..4bbcfcf5682 --- /dev/null +++ b/tests/queries/0_stateless/01117_greatest_least_case.reference @@ -0,0 +1,2 @@ +2 +-1 diff --git a/tests/queries/0_stateless/01117_greatest_least_case.sql b/tests/queries/0_stateless/01117_greatest_least_case.sql new file mode 100644 index 00000000000..21bfd240f5a --- /dev/null +++ b/tests/queries/0_stateless/01117_greatest_least_case.sql @@ -0,0 +1,2 @@ +SELECT GREATEST(1, 2); +SELECT LEAST(1, -1); From 754967bde6178ef7fd358b0fd7c37b92ce264b94 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 04:24:00 +0300 Subject: [PATCH 293/752] Add function "isConstant" --- src/Functions/isConstant.cpp | 52 +++++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + .../0_stateless/01118_is_constant.reference | 9 ++++ .../queries/0_stateless/01118_is_constant.sql | 10 ++++ 4 files changed, 73 insertions(+) create mode 100644 src/Functions/isConstant.cpp create mode 100644 tests/queries/0_stateless/01118_is_constant.reference create mode 100644 tests/queries/0_stateless/01118_is_constant.sql diff --git a/src/Functions/isConstant.cpp b/src/Functions/isConstant.cpp new file mode 100644 index 00000000000..705b4eaac78 --- /dev/null +++ b/src/Functions/isConstant.cpp @@ -0,0 +1,52 @@ +#include +#include +#include +#include + + +namespace DB +{ + +/// Returns 1 if and only if the argument is constant expression. +/// This function is exists for development, debugging and demonstration purposes. +class FunctionIsConstant : public IFunction +{ +public: + static constexpr auto name = "isConstant"; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + bool useDefaultImplementationForNulls() const override { return false; } + + size_t getNumberOfArguments() const override + { + return 1; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const auto & elem = block.getByPosition(arguments[0]); + block.getByPosition(result).column = ColumnUInt8::create(input_rows_count, isColumnConst(*elem.column)); + } +}; + + +void registerFunctionIsConstant(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 44e26542c7d..30cab4cc53a 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -56,6 +56,7 @@ void registerFunctionBasename(FunctionFactory &); void registerFunctionTransform(FunctionFactory &); void registerFunctionGetMacro(FunctionFactory &); void registerFunctionGetScalar(FunctionFactory &); +void registerFunctionIsConstant(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -114,6 +115,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionTransform(factory); registerFunctionGetMacro(factory); registerFunctionGetScalar(factory); + registerFunctionIsConstant(factory); #if USE_ICU registerFunctionConvertCharset(factory); diff --git a/tests/queries/0_stateless/01118_is_constant.reference b/tests/queries/0_stateless/01118_is_constant.reference new file mode 100644 index 00000000000..aba2b912a08 --- /dev/null +++ b/tests/queries/0_stateless/01118_is_constant.reference @@ -0,0 +1,9 @@ +1 +1 +0 +1 +1 +--- +0 +0 +--- diff --git a/tests/queries/0_stateless/01118_is_constant.sql b/tests/queries/0_stateless/01118_is_constant.sql new file mode 100644 index 00000000000..5cbff986dd2 --- /dev/null +++ b/tests/queries/0_stateless/01118_is_constant.sql @@ -0,0 +1,10 @@ +select isConstant(1); +select isConstant([1]); +select isConstant(arrayJoin([1])); +SELECT isConstant((SELECT 1)); +SELECT isConstant(x) FROM (SELECT 1 x); +SELECT '---'; +SELECT isConstant(x) FROM (SELECT 1 x UNION ALL SELECT 2); +SELECT '---'; +select isConstant(); -- { serverError 42 } +select isConstant(1, 2); -- { serverError 42 } From 716ddc4580381ca173824f3d5733c898f65b1777 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Apr 2020 04:26:11 +0300 Subject: [PATCH 294/752] Update isConstant.cpp --- src/Functions/isConstant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/isConstant.cpp b/src/Functions/isConstant.cpp index 705b4eaac78..5416fbd2d3e 100644 --- a/src/Functions/isConstant.cpp +++ b/src/Functions/isConstant.cpp @@ -8,7 +8,7 @@ namespace DB { /// Returns 1 if and only if the argument is constant expression. -/// This function is exists for development, debugging and demonstration purposes. +/// This function exists for development, debugging and demonstration purposes. class FunctionIsConstant : public IFunction { public: From 01bc88a85113cf7b47a2026a06a090ec86c4e230 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 05:05:30 +0300 Subject: [PATCH 295/752] Fix wrong whitespaces in debug output --- src/Interpreters/ColumnNamesContext.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ColumnNamesContext.cpp b/src/Interpreters/ColumnNamesContext.cpp index 4d23c6f0e8b..380d5f9ebc3 100644 --- a/src/Interpreters/ColumnNamesContext.cpp +++ b/src/Interpreters/ColumnNamesContext.cpp @@ -87,6 +87,7 @@ std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols) os << "'" << pr.first << "'"; for (auto & alias : pr.second.aliases) os << "/'" << alias << "'"; + os << ", "; } os << " source_tables: "; for (const auto & x : cols.tables) @@ -94,24 +95,24 @@ std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols) auto alias = x.alias(); auto name = x.name(); if (alias && name) - os << "'" << *alias << "'/'" << *name << "' "; + os << "'" << *alias << "'/'" << *name << "', "; else if (alias) - os << "'" << *alias << "' "; + os << "'" << *alias << "', "; else if (name) - os << "'" << *name << "' "; + os << "'" << *name << "', "; } os << "table_aliases: "; for (const auto & x : cols.table_aliases) - os << "'" << x << "' "; + os << "'" << x << "', "; os << "complex_aliases: "; for (const auto & x : cols.complex_aliases) - os << "'" << x << "' "; + os << "'" << x << "', "; os << "masked_columns: "; for (const auto & x : cols.masked_columns) - os << "'" << x << "' "; + os << "'" << x << "', "; os << "array_join_columns: "; for (const auto & x : cols.array_join_columns) - os << "'" << x << "' "; + os << "'" << x << "', "; return os; } From f8e1f1b69bd565111a7c8b748ef6d96380485ad7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 05:06:58 +0300 Subject: [PATCH 296/752] Fix wrong whitespaces in debug output --- src/Interpreters/ColumnNamesContext.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ColumnNamesContext.cpp b/src/Interpreters/ColumnNamesContext.cpp index 380d5f9ebc3..c8fde183d96 100644 --- a/src/Interpreters/ColumnNamesContext.cpp +++ b/src/Interpreters/ColumnNamesContext.cpp @@ -89,7 +89,7 @@ std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols) os << "/'" << alias << "'"; os << ", "; } - os << " source_tables: "; + os << "source_tables: "; for (const auto & x : cols.tables) { auto alias = x.alias(); From d43903211aad13b46ca2cf45b036b30a30fb5983 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 05:51:56 +0300 Subject: [PATCH 297/752] Better exception message #9810 --- src/Functions/FunctionsConversion.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index d201b967fb1..b493aef4cac 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2394,10 +2394,17 @@ protected: DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override { - const auto type_col = checkAndGetColumnConst(arguments.back().column.get()); + const auto & column = arguments.back().column; + if (!column) + throw Exception("Second argument to " + getName() + " must be a constant string describing type." + " Instead there is non-constant column of type " + arguments.back().type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto type_col = checkAndGetColumnConst(column.get()); if (!type_col) - throw Exception("Second argument to " + getName() + " must be a constant string describing type", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("Second argument to " + getName() + " must be a constant string describing type." + " Instead there is a column with the following structure: " + column->dumpStructure(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return DataTypeFactory::instance().get(type_col->getValue()); } From d6544159ba78930b268229a810a44581c3e6b035 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Apr 2020 06:18:21 +0300 Subject: [PATCH 298/752] Update HashJoin.cpp --- src/Interpreters/HashJoin.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index d8c0d239c96..16d4932bb14 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1162,15 +1162,15 @@ DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null if (!sample_block_with_columns_to_add.has(column_name)) throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR); - auto ctn = sample_block_with_columns_to_add.getByName(column_name); + auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null) { if (!ctn.type->canBeInsideNullable()) - throw Exception("Type " + ctn.type->getName() + "cannot be inside Nullable", ErrorCodes::LOGICAL_ERROR); + throw Exception("Type " + elem.type->getName() + " cannot be inside Nullable", ErrorCodes::LOGICAL_ERROR); else - ctn.type = makeNullable(ctn.type); + elem.type = makeNullable(elem.type); } - return ctn.type; + return elem.type; } From a2418f94df9c58e8eba9704215d1f04e299b919f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Apr 2020 06:19:13 +0300 Subject: [PATCH 299/752] Update HashJoin.cpp --- src/Interpreters/HashJoin.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 16d4932bb14..5845fd131d2 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1165,7 +1165,7 @@ DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null) { - if (!ctn.type->canBeInsideNullable()) + if (!elem.type->canBeInsideNullable()) throw Exception("Type " + elem.type->getName() + " cannot be inside Nullable", ErrorCodes::LOGICAL_ERROR); else elem.type = makeNullable(elem.type); @@ -1194,15 +1194,15 @@ void HashJoin::joinGet(Block & block, const String & column_name, bool or_null) checkTypeOfKey(block, right_table_keys); - auto ctn = sample_block_with_columns_to_add.getByName(column_name); + auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null) - ctn.type = makeNullable(ctn.type); - ctn.column = ctn.type->createColumn(); + elem.type = makeNullable(elem.type); + elem.column = elem.type->createColumn(); if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) && kind == ASTTableJoin::Kind::Left) { - joinGetImpl(block, {ctn}, std::get(data->maps)); + joinGetImpl(block, {elem}, std::get(data->maps)); } else throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR); From ca5172cc63fd8ec0c6e28e5c511f2bba86991b3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 06:25:47 +0300 Subject: [PATCH 300/752] Merging #10094 --- src/Interpreters/HashJoin.cpp | 7 +------ .../0_stateless/01240_join_get_or_null.reference | 1 + tests/queries/0_stateless/01240_join_get_or_null.sql | 10 ++++++++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 5845fd131d2..b8da03acb8b 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1164,12 +1164,7 @@ DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR); auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null) - { - if (!elem.type->canBeInsideNullable()) - throw Exception("Type " + elem.type->getName() + " cannot be inside Nullable", ErrorCodes::LOGICAL_ERROR); - else - elem.type = makeNullable(elem.type); - } + elem.type = makeNullable(elem.type); return elem.type; } diff --git a/tests/queries/0_stateless/01240_join_get_or_null.reference b/tests/queries/0_stateless/01240_join_get_or_null.reference index dec7d2fabd2..96e34d5a44c 100644 --- a/tests/queries/0_stateless/01240_join_get_or_null.reference +++ b/tests/queries/0_stateless/01240_join_get_or_null.reference @@ -1 +1,2 @@ \N +\N diff --git a/tests/queries/0_stateless/01240_join_get_or_null.sql b/tests/queries/0_stateless/01240_join_get_or_null.sql index d1b9a07540a..48fd8228b55 100644 --- a/tests/queries/0_stateless/01240_join_get_or_null.sql +++ b/tests/queries/0_stateless/01240_join_get_or_null.sql @@ -1,7 +1,13 @@ DROP TABLE IF EXISTS join_test; CREATE TABLE join_test (id UInt16, num UInt16) engine = Join(ANY, LEFT, id); - SELECT joinGetOrNull('join_test', 'num', 500); - +DROP TABLE join_test; + +CREATE TABLE join_test (id UInt16, num Nullable(UInt16)) engine = Join(ANY, LEFT, id); +SELECT joinGetOrNull('join_test', 'num', 500); +DROP TABLE join_test; + +CREATE TABLE join_test (id UInt16, num Array(UInt16)) engine = Join(ANY, LEFT, id); +SELECT joinGetOrNull('join_test', 'num', 500); -- { serverError 43 } DROP TABLE join_test; From e7f399f6527841b4352dd1e480bac3a15587fef6 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Sun, 12 Apr 2020 13:37:39 +0800 Subject: [PATCH 301/752] MySQLHandler: max_allowed_packet returned by default when server setup with select variables --- programs/server/MySQLHandler.cpp | 6 ++++-- .../test_mysql_protocol/clients/java/0.reference | 4 ++-- .../integration/test_mysql_protocol/clients/java/Test.java | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/programs/server/MySQLHandler.cpp b/programs/server/MySQLHandler.cpp index b72aa8104d3..bfab19061ce 100644 --- a/programs/server/MySQLHandler.cpp +++ b/programs/server/MySQLHandler.cpp @@ -284,15 +284,17 @@ void MySQLHandler::comQuery(ReadBuffer & payload) } else { - String replacement_query = "select ''"; + String replacement_query = "SELECT ''"; bool should_replace = false; bool with_output = false; // Translate query from MySQL to ClickHouse. - // This is a temporary workaround until ClickHouse supports the syntax "@@var_name". + // Required parameters when setup: + // * max_allowed_packet, default 64MB, https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_max_allowed_packet if (isFederatedServerSetupSelectVarCommand(query)) { should_replace = true; + replacement_query = "SELECT 67108864 AS max_allowed_packet"; } // This is a workaround in order to support adding ClickHouse to MySQL using federated server. diff --git a/tests/integration/test_mysql_protocol/clients/java/0.reference b/tests/integration/test_mysql_protocol/clients/java/0.reference index bcf9e3dde94..3e3e20d1ebb 100644 --- a/tests/integration/test_mysql_protocol/clients/java/0.reference +++ b/tests/integration/test_mysql_protocol/clients/java/0.reference @@ -1,5 +1,5 @@ -33jdbc -44ck +33jdbcnull +44cknull 0 1 2 diff --git a/tests/integration/test_mysql_protocol/clients/java/Test.java b/tests/integration/test_mysql_protocol/clients/java/Test.java index 50ce824f67c..89659529679 100644 --- a/tests/integration/test_mysql_protocol/clients/java/Test.java +++ b/tests/integration/test_mysql_protocol/clients/java/Test.java @@ -5,8 +5,8 @@ import java.sql.SQLException; import java.sql.Statement; class JavaConnectorTest { - private static final String CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS default.test1 (age Int32, name String) Engine = Memory"; - private static final String INSERT_SQL = "INSERT INTO default.test1 VALUES(33, 'jdbc'),(44, 'ck')"; + private static final String CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS default.test1 (`age` Int32, `name` String, `int_nullable` Nullable(Int32)) Engine = Memory"; + private static final String INSERT_SQL = "INSERT INTO default.test1(`age`, `name`) VALUES(33, 'jdbc'),(44, 'ck')"; private static final String SELECT_SQL = "SELECT * FROM default.test1"; private static final String SELECT_NUMBER_SQL = "SELECT * FROM system.numbers LIMIT 13"; private static final String DROP_TABLE_SQL = "DROP TABLE default.test1"; @@ -41,7 +41,7 @@ class JavaConnectorTest { } } - String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?maxAllowedPacket=67108864&useSSL=false", host, port, database); + String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s", host, port, database); Connection conn = null; Statement stmt = null; @@ -55,6 +55,7 @@ class JavaConnectorTest { while (rs.next()) { System.out.print(rs.getString("age")); System.out.print(rs.getString("name")); + System.out.print(rs.getString("int_nullable")); System.out.println(); } From 365b5207b7d02384f4c3e28402c4b3d748f443df Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 5 Apr 2020 00:07:00 +0300 Subject: [PATCH 302/752] Add log_queries_min_type to filter which entries will be written to query_log Can be used to write into query_log only failed queries (i.e. on memory exceeded error), by using: set log_queries_min_type='EXCEPTION_WHILE_PROCESSING' --- docs/en/operations/settings/settings.md | 18 +++++++++++++++++ src/Core/Settings.h | 2 +- src/Core/SettingsCollection.cpp | 7 +++++++ src/Core/SettingsCollection.h | 10 ++++++++++ src/Interpreters/QueryLog.h | 9 ++------- src/Interpreters/executeQuery.cpp | 20 +++++++++---------- .../01231_log_queries_min_type.reference | 5 +++++ .../01231_log_queries_min_type.sql | 15 ++++++++++++++ 8 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/01231_log_queries_min_type.reference create mode 100644 tests/queries/0_stateless/01231_log_queries_min_type.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 69c444ebaef..37b4c713f91 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -507,6 +507,24 @@ Example: log_queries=1 ``` +## log\_queries\_min\_type {#settings-log-queries-min-type} + +`query_log` minimal type to log. + +Possible values: +- `QUERY_START` (`=1`) +- `QUERY_FINISH` (`=2`) +- `EXCEPTION_BEFORE_START` (`=3`) +- `EXCEPTION_WHILE_PROCESSING` (`=4`) + +Default value: `QUERY_START`. + +Can be used to limit which entiries will goes to `query_log`, say you are interesting only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`: + +``` text +log_queries_min_type='EXCEPTION_WHILE_PROCESSING' +``` + ## log\_query\_threads {#settings-log-query-threads} Setting up query threads logging. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8138af31d5f..725171d4a1b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -149,7 +149,7 @@ struct Settings : public SettingsCollection M(SettingInt64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \ \ M(SettingBool, log_queries, 0, "Log requests and write the log to the system table.", 0) \ - \ + M(SettingLogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "query_log minimal type to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \ M(SettingUInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \ \ M(SettingDistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \ diff --git a/src/Core/SettingsCollection.cpp b/src/Core/SettingsCollection.cpp index 6d879b27181..238ac1c3c62 100644 --- a/src/Core/SettingsCollection.cpp +++ b/src/Core/SettingsCollection.cpp @@ -542,6 +542,13 @@ IMPLEMENT_SETTING_ENUM(FormatSettings::DateTimeInputFormat, DATE_TIME_INPUT_FORM M(trace, "trace") IMPLEMENT_SETTING_ENUM(LogsLevel, LOGS_LEVEL_LIST_OF_NAMES, ErrorCodes::BAD_ARGUMENTS) +#define LOG_QUERIES_TYPE_LIST_OF_NAMES(M) \ + M(QUERY_START, "QUERY_START") \ + M(QUERY_FINISH, "QUERY_FINISH") \ + M(EXCEPTION_BEFORE_START, "EXCEPTION_BEFORE_START") \ + M(EXCEPTION_WHILE_PROCESSING, "EXCEPTION_WHILE_PROCESSING") +IMPLEMENT_SETTING_ENUM(QueryLogElementType, LOG_QUERIES_TYPE_LIST_OF_NAMES, ErrorCodes::BAD_ARGUMENTS) + namespace details { diff --git a/src/Core/SettingsCollection.h b/src/Core/SettingsCollection.h index da21412b7c1..d93772e86ed 100644 --- a/src/Core/SettingsCollection.h +++ b/src/Core/SettingsCollection.h @@ -298,6 +298,16 @@ enum class LogsLevel }; using SettingLogsLevel = SettingEnum; +// Make it signed for compatibility with DataTypeEnum8 +enum QueryLogElementType : int8_t +{ + QUERY_START = 1, + QUERY_FINISH = 2, + EXCEPTION_BEFORE_START = 3, + EXCEPTION_WHILE_PROCESSING = 4, +}; +using SettingLogQueriesType = SettingEnum; + enum class SettingsBinaryFormat { diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 836b37095e9..ec14f5e97fb 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace ProfileEvents @@ -22,13 +23,7 @@ namespace DB /// A struct which will be inserted as row into query_log table struct QueryLogElement { - enum Type : int8_t // Make it signed for compatibility with DataTypeEnum8 - { - QUERY_START = 1, - QUERY_FINISH = 2, - EXCEPTION_BEFORE_START = 3, - EXCEPTION_WHILE_PROCESSING = 4, - }; + using Type = QueryLogElementType; Type type = QUERY_START; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c9c66832f08..68bebb83619 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -157,7 +157,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c /// Log the start of query execution into the table if necessary. QueryLogElement elem; - elem.type = QueryLogElement::EXCEPTION_BEFORE_START; + elem.type = QueryLogElementType::EXCEPTION_BEFORE_START; elem.event_time = current_time; elem.query_start_time = current_time; @@ -175,7 +175,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c /// Update performance counters before logging to query_log CurrentThread::finalizePerformanceCounters(); - if (settings.log_queries) + if (settings.log_queries && elem.type >= settings.log_queries_min_type) if (auto query_log = context.getQueryLog()) query_log->add(elem); } @@ -400,7 +400,7 @@ static std::tuple executeQueryImpl( { QueryLogElement elem; - elem.type = QueryLogElement::QUERY_START; + elem.type = QueryLogElementType::QUERY_START; elem.event_time = current_time; elem.query_start_time = current_time; @@ -412,7 +412,7 @@ static std::tuple executeQueryImpl( bool log_queries = settings.log_queries && !internal; /// Log into system table start of query execution, if need. - if (log_queries) + if (log_queries && elem.type >= settings.log_queries_min_type) { if (settings.log_query_settings) elem.query_settings = std::make_shared(context.getSettingsRef()); @@ -422,7 +422,7 @@ static std::tuple executeQueryImpl( } /// Also make possible for caller to log successful query finish and exception during execution. - auto finish_callback = [elem, &context, log_queries] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable + auto finish_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable { QueryStatus * process_list_elem = context.getProcessListElement(); @@ -436,7 +436,7 @@ static std::tuple executeQueryImpl( double elapsed_seconds = info.elapsed_seconds; - elem.type = QueryLogElement::QUERY_FINISH; + elem.type = QueryLogElementType::QUERY_FINISH; elem.event_time = time(nullptr); elem.query_duration_ms = elapsed_seconds * 1000; @@ -484,19 +484,19 @@ static std::tuple executeQueryImpl( elem.thread_ids = std::move(info.thread_ids); elem.profile_counters = std::move(info.profile_counters); - if (log_queries) + if (log_queries && elem.type >= log_queries_min_type) { if (auto query_log = context.getQueryLog()) query_log->add(elem); } }; - auto exception_callback = [elem, &context, log_queries, quota(quota)] () mutable + auto exception_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type, quota(quota)] () mutable { if (quota) quota->used(Quota::ERRORS, 1, /* check_exceeded = */ false); - elem.type = QueryLogElement::EXCEPTION_WHILE_PROCESSING; + elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; elem.event_time = time(nullptr); elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); @@ -529,7 +529,7 @@ static std::tuple executeQueryImpl( logException(context, elem); /// In case of exception we log internal queries also - if (log_queries) + if (log_queries && elem.type >= log_queries_min_type) { if (auto query_log = context.getQueryLog()) query_log->add(elem); diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.reference b/tests/queries/0_stateless/01231_log_queries_min_type.reference new file mode 100644 index 00000000000..a358d022033 --- /dev/null +++ b/tests/queries/0_stateless/01231_log_queries_min_type.reference @@ -0,0 +1,5 @@ +01231_log_queries_min_type/QUERY_START +2 +01231_log_queries_min_type/EXCEPTION_BEFORE_START +2 +3 diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql new file mode 100644 index 00000000000..f2229c94a8a --- /dev/null +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -0,0 +1,15 @@ +set log_queries=1; + +select '01231_log_queries_min_type/QUERY_START'; +system flush logs; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; + +set log_queries_min_type='EXCEPTION_BEFORE_START'; +select '01231_log_queries_min_type/EXCEPTION_BEFORE_START'; +system flush logs; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; + +set log_queries_min_type='EXCEPTION_WHILE_PROCESSING'; +select '01231_log_queries_min_type/', max(number) from system.numbers limit 1e6 settings max_rows_to_read='100K'; -- { serverError 158; } +system flush logs; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; From 676964de658b21e56a49408d708dcad689c14616 Mon Sep 17 00:00:00 2001 From: Maroun Maroun Date: Sun, 12 Apr 2020 12:26:06 +0300 Subject: [PATCH 303/752] Fix typo in the getting started tutorial: "it's" -> "its" (#10201) --- docs/en/getting_started/tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting_started/tutorial.md b/docs/en/getting_started/tutorial.md index 08cca45d21d..9763f814d59 100644 --- a/docs/en/getting_started/tutorial.md +++ b/docs/en/getting_started/tutorial.md @@ -108,7 +108,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r 1. Name of table to create. 2. Table schema, i.e. list of columns and their [data types](../sql_reference/data_types/index.md). -3. [Table engine](../engines/table_engines/index.md) and it’s settings, which determines all the details on how queries to this table will be physically executed. +3. [Table engine](../engines/table_engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. Yandex.Metrica is a web analytics service, and sample dataset doesn’t cover its full functionality, so there are only two tables to create: From f5c463e9adccea74507720c71f0456d44c4a54d6 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Sun, 12 Apr 2020 21:28:36 +0800 Subject: [PATCH 304/752] Fix path and typo in the tests.md --- docs/en/development/tests.md | 12 ++++++------ docs/es/development/tests.md | 6 +++--- docs/fa/development/tests.md | 6 +++--- docs/fr/development/tests.md | 6 +++--- docs/ja/development/tests.md | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 02620b92367..45adb221b5b 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -15,7 +15,7 @@ Tests are located in `queries` directory. There are two subdirectories: `statele Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. -To run all tests, use `testskhouse-test` tool. Look `--help` for the list of possible options. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. +To run all tests, use `clickhouse-test` tool. Look `--help` for the list of possible options. You can simply run all tests or run subset of tests filtered by substring in test name: `./clickhouse-test substring`. The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. @@ -34,13 +34,13 @@ disable these groups of tests using `--no-zookeeper`, `--no-shard` and ## Known Bugs {#known-bugs} -If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `queries/bugs` directory. These tests will be moved to `teststests_stateless` when bugs are fixed. +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `tests/queries/bugs` directory. These tests will be moved to `tests/queries/0_stateless` when bugs are fixed. ## Integration Tests {#integration-tests} Integration tests allow to test ClickHouse in clustered configuration and ClickHouse interaction with other servers like MySQL, Postgres, MongoDB. They are useful to emulate network splits, packet drops, etc. These tests are run under Docker and create multiple containers with various software. -See `testsgration/README.md` on how to run these tests. +See `tests/integration/README.md` on how to run these tests. Note that integration of ClickHouse with third-party drivers is not tested. Also we currently don’t have integration tests with our JDBC and ODBC drivers. @@ -54,7 +54,7 @@ It’s not necessarily to have unit tests if the code is already covered by func Performance tests allow to measure and compare performance of some isolated part of ClickHouse on synthetic queries. Tests are located at `tests/performance`. Each test is represented by `.xml` file with description of test case. Tests are run with `clickhouse performance-test` tool (that is embedded in `clickhouse` binary). See `--help` for invocation. -Each test run one or miltiple queries (possibly with combinations of parameters) in a loop with some conditions for stop (like “maximum execution speed is not changing in three seconds”) and measure some metrics about query performance (like “maximum execution speed”). Some tests can contain preconditions on preloaded test dataset. +Each test run one or multiple queries (possibly with combinations of parameters) in a loop with some conditions for stop (like “maximum execution speed is not changing in three seconds”) and measure some metrics about query performance (like “maximum execution speed”). Some tests can contain preconditions on preloaded test dataset. If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other perf tools during your tests. @@ -64,13 +64,13 @@ Some programs in `tests` directory are not prepared tests, but are test tools. F You can also place pair of files `.sh` and `.reference` along with the tool to run it on some predefined input - then script result can be compared to `.reference` file. These kind of tests are not automated. -## Miscellanous Tests {#miscellanous-tests} +## Miscellaneous Tests {#miscellaneous-tests} There are tests for external dictionaries located at `tests/external_dictionaries` and for machine learned models in `tests/external_models`. These tests are not updated and must be transferred to integration tests. There is separate test for quorum inserts. This test run ClickHouse cluster on separate servers and emulate various failure cases: network split, packet drop (between ClickHouse nodes, between ClickHouse and ZooKeeper, between ClickHouse server and client, etc.), `kill -9`, `kill -STOP` and `kill -CONT` , like [Jepsen](https://aphyr.com/tags/Jepsen). Then the test checks that all acknowledged inserts was written and all rejected inserts was not. -Quorum test was written by separate team before ClickHouse was open-sourced. This team no longer work with ClickHouse. Test was accidentially written in Java. For these reasons, quorum test must be rewritten and moved to integration tests. +Quorum test was written by separate team before ClickHouse was open-sourced. This team no longer work with ClickHouse. Test was accidentally written in Java. For these reasons, quorum test must be rewritten and moved to integration tests. ## Manual Testing {#manual-testing} diff --git a/docs/es/development/tests.md b/docs/es/development/tests.md index 388931e9436..12afbb68f2e 100644 --- a/docs/es/development/tests.md +++ b/docs/es/development/tests.md @@ -17,7 +17,7 @@ Las pruebas se encuentran en `queries` directorio. Hay dos subdirectorios: `stat Cada prueba puede ser de dos tipos: `.sql` y `.sh`. `.sql` test es el script SQL simple que se canaliza a `clickhouse-client --multiquery --testmode`. `.sh` test es un script que se ejecuta por sí mismo. -Para ejecutar todas las pruebas, use `testskhouse-test` herramienta. Mira `--help` para la lista de posibles opciones. Simplemente puede ejecutar todas las pruebas o ejecutar un subconjunto de pruebas filtradas por subcadena en el nombre de la prueba: `./clickhouse-test substring`. +Para ejecutar todas las pruebas, use `clickhouse-test` herramienta. Mira `--help` para la lista de posibles opciones. Simplemente puede ejecutar todas las pruebas o ejecutar un subconjunto de pruebas filtradas por subcadena en el nombre de la prueba: `./clickhouse-test substring`. La forma más sencilla de invocar pruebas funcionales es copiar `clickhouse-client` a `/usr/bin/`, ejecutar `clickhouse-server` y luego ejecutar `./clickhouse-test` de su propio directorio. @@ -36,13 +36,13 @@ deshabilitar estos grupos de pruebas utilizando `--no-zookeeper`, `--no-shard` y ## Bugs conocidos {#known-bugs} -Si conocemos algunos errores que se pueden reproducir fácilmente mediante pruebas funcionales, colocamos pruebas funcionales preparadas en `queries/bugs` directorio. Estas pruebas se moverán a `teststests_stateless` cuando se corrigen errores. +Si conocemos algunos errores que se pueden reproducir fácilmente mediante pruebas funcionales, colocamos pruebas funcionales preparadas en `tests/queries/bugs` directorio. Estas pruebas se moverán a `tests/queries/0_stateless` cuando se corrigen errores. ## Pruebas de integración {#integration-tests} Las pruebas de integración permiten probar ClickHouse en la configuración agrupada y la interacción de ClickHouse con otros servidores como MySQL, Postgres, MongoDB. Son útiles para emular divisiones de red, caídas de paquetes, etc. Estas pruebas se ejecutan bajo Docker y crean múltiples contenedores con varios software. -Ver `testsgration/README.md` sobre cómo ejecutar estas pruebas. +Ver `tests/integration/README.md` sobre cómo ejecutar estas pruebas. Tenga en cuenta que la integración de ClickHouse con controladores de terceros no se ha probado. Además, actualmente no tenemos pruebas de integración con nuestros controladores JDBC y ODBC. diff --git a/docs/fa/development/tests.md b/docs/fa/development/tests.md index 874ac3063b9..922bc43fd46 100644 --- a/docs/fa/development/tests.md +++ b/docs/fa/development/tests.md @@ -18,7 +18,7 @@ toc_title: "\u0646\u062D\u0648\u0647 \u0627\u062C\u0631\u0627\u06CC \u062A\u0633 هر تست می تواند یکی از دو نوع باشد: `.sql` و `.sh`. `.sql` تست اسکریپت ساده مربع است که به لوله کشی است `clickhouse-client --multiquery --testmode`. `.sh` تست یک اسکریپت است که به خودی خود اجرا است. -برای اجرای تمام تست ها استفاده کنید `testskhouse-test` ابزار. نگاه کن `--help` برای لیستی از گزینه های ممکن. شما به سادگی می توانید تمام تست ها را اجرا کنید یا زیر مجموعه ای از تست های فیلتر شده توسط زیر رشته را در نام تست اجرا کنید: `./clickhouse-test substring`. +برای اجرای تمام تست ها استفاده کنید `clickhouse-test` ابزار. نگاه کن `--help` برای لیستی از گزینه های ممکن. شما به سادگی می توانید تمام تست ها را اجرا کنید یا زیر مجموعه ای از تست های فیلتر شده توسط زیر رشته را در نام تست اجرا کنید: `./clickhouse-test substring`. ساده ترین راه برای فراخوانی تست های کاربردی کپی است `clickhouse-client` به `/usr/bin/` فرار کن `clickhouse-server` و سپس اجرا کنید `./clickhouse-test` از دایرکتوری خود را. @@ -37,13 +37,13 @@ toc_title: "\u0646\u062D\u0648\u0647 \u0627\u062C\u0631\u0627\u06CC \u062A\u0633 ## اشکالات شناخته شده {#known-bugs} -اگر ما می دانیم برخی از اشکالات است که می تواند به راحتی توسط تست های کاربردی تکثیر, ما تست های عملکردی تهیه شده در `queries/bugs` فهرست راهنما. این تست خواهد شد به نقل مکان کرد `teststests_stateless` هنگامی که اشکالات ثابت هستند. +اگر ما می دانیم برخی از اشکالات است که می تواند به راحتی توسط تست های کاربردی تکثیر, ما تست های عملکردی تهیه شده در `tests/queries/bugs` فهرست راهنما. این تست خواهد شد به نقل مکان کرد `tests/queries/0_stateless` هنگامی که اشکالات ثابت هستند. ## تست های ادغام {#integration-tests} ادغام آزمون اجازه می دهد برای تست clickhouse در خوشه پیکربندی و clickhouse تعامل با سرور های دیگر مانند mysql, postgres, mongodb. مفید برای تقلید انشعابات شبکه قطره بسته و غیره هستند. این تست ها تحت کارگر بارانداز اجرا و ایجاد ظروف متعدد با نرم افزار های مختلف. -ببینید `testsgration/README.md` در مورد چگونگی اجرای این تست. +ببینید `tests/integration/README.md` در مورد چگونگی اجرای این تست. توجه داشته باشید که ادغام کلیک با رانندگان شخص ثالث تست نشده است. همچنین ما در حال حاضر تست های یکپارچه سازی با رانندگان جی بی سی و بی سی ما ندارد. diff --git a/docs/fr/development/tests.md b/docs/fr/development/tests.md index e5c8a50fa31..6637e9546fe 100644 --- a/docs/fr/development/tests.md +++ b/docs/fr/development/tests.md @@ -17,7 +17,7 @@ Les Tests sont situés dans `queries` répertoire. Il y a deux sous-répertoires Chaque test peut être de deux types: `.sql` et `.sh`. `.sql` test est le script SQL simple qui est canalisé vers `clickhouse-client --multiquery --testmode`. `.sh` test est un script qui est exécuté par lui-même. -Pour exécuter tous les tests, utilisez `testskhouse-test` outil. Regarder `--help` pour la liste des options possibles. Vous pouvez simplement exécuter tous les tests ou exécuter un sous ensemble de tests filtrés par sous chaîne dans le nom du test: `./clickhouse-test substring`. +Pour exécuter tous les tests, utilisez `clickhouse-test` outil. Regarder `--help` pour la liste des options possibles. Vous pouvez simplement exécuter tous les tests ou exécuter un sous ensemble de tests filtrés par sous chaîne dans le nom du test: `./clickhouse-test substring`. Le moyen le plus simple d'invoquer des tests fonctionnels est de copier `clickhouse-client` de `/usr/bin/`, exécuter `clickhouse-server` et puis exécutez `./clickhouse-test` à partir de son propre répertoire. @@ -36,13 +36,13 @@ désactivez ces groupes de tests en utilisant `--no-zookeeper`, `--no-shard` et ## Bugs connus {#known-bugs} -Si nous connaissons des bugs qui peuvent être facilement reproduits par des tests fonctionnels, nous plaçons des tests fonctionnels préparés dans `queries/bugs` répertoire. Ces tests seront déplacés à `teststests_stateless` quand les bugs sont corrigés. +Si nous connaissons des bugs qui peuvent être facilement reproduits par des tests fonctionnels, nous plaçons des tests fonctionnels préparés dans `tests/queries/bugs` répertoire. Ces tests seront déplacés à `tests/queries/0_stateless` quand les bugs sont corrigés. ## Les Tests D'Intégration {#integration-tests} Les tests d'intégration permettent de tester ClickHouse en configuration cluster et clickhouse interaction avec D'autres serveurs comme MySQL, Postgres, MongoDB. Ils sont utiles pour émuler les splits réseau, les chutes de paquets, etc. Ces tests sont exécutés sous Docker et créent plusieurs conteneurs avec divers logiciels. -Voir `testsgration/README.md` sur la façon d'exécuter ces tests. +Voir `tests/integration/README.md` sur la façon d'exécuter ces tests. Notez que l'intégration de ClickHouse avec des pilotes tiers n'est pas testée. De plus, nous n'avons actuellement pas de tests d'intégration avec nos pilotes JDBC et ODBC. diff --git a/docs/ja/development/tests.md b/docs/ja/development/tests.md index 27b8870461e..b6c5abea621 100644 --- a/docs/ja/development/tests.md +++ b/docs/ja/development/tests.md @@ -17,7 +17,7 @@ toc_title: "ClickHouse\u30C6\u30B9\u30C8\u3092\u5B9F\u884C\u3059\u308B\u65B9\u6C それぞれの試験できるの種類: `.sql` と `.sh`. `.sql` testは、パイプ処理される単純なSQLスクリプトです `clickhouse-client --multiquery --testmode`. `.sh` テストは、単独で実行されるスクリプトです。 -すべてのテストを実行するには、 `testskhouse-test` ツール。 見て! `--help` 可能なオプションのリストについて。 できるだけ実行すべての試験または実行のサブセットの試験フィルター部分文字列の試験名: `./clickhouse-test substring`. +すべてのテストを実行するには、 `clickhouse-test` ツール。 見て! `--help` 可能なオプションのリストについて。 できるだけ実行すべての試験または実行のサブセットの試験フィルター部分文字列の試験名: `./clickhouse-test substring`. 機能テストを呼び出す最も簡単な方法は、コピーすることです `clickhouse-client` に `/usr/bin/`、実行 `clickhouse-server` そして、実行 `./clickhouse-test` 独自のディレクトリから。 @@ -36,13 +36,13 @@ toc_title: "ClickHouse\u30C6\u30B9\u30C8\u3092\u5B9F\u884C\u3059\u308B\u65B9\u6C ## 既知のバグ {#known-bugs} -機能テストで簡単に再現できるいくつかのバグを知っていれば、準備された機能テストを `queries/bugs` ディレクトリ。 これらのテストはに移動されます `teststests_stateless` バグが修正されたとき。 +機能テストで簡単に再現できるいくつかのバグを知っていれば、準備された機能テストを `tests/queries/bugs` ディレクトリ。 これらのテストはに移動されます `tests/queries/0_stateless` バグが修正されたとき。 ## 統合テスト {#integration-tests} 統合テストでは、クラスター化された設定でclickhouseをテストし、mysql、postgres、mongodbのような他のサーバーとのclickhouseの相互作用を可能にします。 それらはネットワークの割れ目、包みの低下、等を競争して有用である。 これらの試験する方向に作用しdockerを複数の容器を様々なソフトウェアです。 -見る `testsgration/README.md` これらのテストを実行する方法について。 +見る `tests/integration/README.md` これらのテストを実行する方法について。 ClickHouseとサードパーティドライバの統合はテストされていません。 また、現在、JDBCおよびODBCドライバとの統合テストはありません。 From 82ef20d6dce2ddac4a40c956a43d0dbed5f76bcd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 12 Apr 2020 17:17:34 +0300 Subject: [PATCH 305/752] Fix build. --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb8172993a5..6549105318e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -311,7 +311,7 @@ add_object_library(clickhouse_processors_formats Processors/Formats) add_object_library(clickhouse_processors_formats_impl Processors/Formats/Impl) add_object_library(clickhouse_processors_transforms Processors/Transforms) add_object_library(clickhouse_processors_sources Processors/Sources) -add_object_library(clickhouse_processors_sources Processors/Merges) +add_object_library(clickhouse_processors_merges Processors/Merges) if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) From eaba5c6c73f06c96856743e50dd9a47694418e7f Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev Date: Sat, 11 Apr 2020 13:57:13 +0300 Subject: [PATCH 306/752] Remove mutable defaults from helpers/cluster.py --- tests/integration/helpers/cluster.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5dc93cb338a..717fab11449 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -139,12 +139,12 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, + def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, - hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", - stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=[]): + hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", + stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -161,13 +161,14 @@ class ClickHouseCluster: raise Exception("Can\'t add instance `%s': there is already an instance with the same name!" % name) instance = ClickHouseInstance( - self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, + self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {}, + with_zookeeper, self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, - env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, + env_variables=env_variables or {}, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address, - with_installed_binary=with_installed_binary, tmpfs=tmpfs) + with_installed_binary=with_installed_binary, tmpfs=tmpfs or []) self.instances[name] = instance if ipv4_address is not None or ipv6_address is not None: @@ -580,17 +581,17 @@ class ClickHouseInstance: self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, base_configs_dir, server_bin_path, odbc_bridge_bin_path, - clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, + clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", - stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=[]): + stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None): self.name = name - self.base_cmd = cluster.base_cmd[:] + self.base_cmd = cluster.base_cmd self.docker_id = cluster.get_instance_docker_id(self.name) self.cluster = cluster self.hostname = hostname if hostname is not None else self.name - self.tmpfs = tmpfs[:] + self.tmpfs = tmpfs or [] self.custom_config_dir = p.abspath(p.join(base_path, custom_config_dir)) if custom_config_dir else None self.custom_main_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_main_configs] self.custom_user_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_user_configs] @@ -611,7 +612,7 @@ class ClickHouseInstance: self.path = p.join(self.cluster.instances_dir, name) self.docker_compose_path = p.join(self.path, 'docker_compose.yml') - self.env_variables = env_variables + self.env_variables = env_variables or {} if with_odbc_drivers: self.odbc_ini_path = os.path.dirname(self.docker_compose_path) + "/odbc.ini:/etc/odbc.ini" self.with_mysql = True @@ -1041,4 +1042,4 @@ class ClickHouseKiller(object): self.clickhouse_node.kill_clickhouse() def __exit__(self, exc_type, exc_val, exc_tb): - self.clickhouse_node.restore_clickhouse() \ No newline at end of file + self.clickhouse_node.restore_clickhouse() From 4da19d122d7532ff4a68cf39ed2147029ef5ace3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 12 Apr 2020 18:02:17 +0300 Subject: [PATCH 307/752] Added IProcessor::onUpdatePorts --- src/Processors/Executors/PipelineExecutor.cpp | 2 ++ src/Processors/IProcessor.h | 4 ++++ src/Processors/Sources/SourceFromInputStream.h | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index f2d2477991e..78229e4d379 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -263,6 +263,8 @@ bool PipelineExecutor::tryAddProcessorToStackIfUpdated(Edge & edge, Queue & queu node.status = ExecStatus::Preparing; return prepareProcessor(edge.to, thread_number, queue, std::move(lock)); } + else + graph[edge.to].processor->onUpdatePorts(); return true; } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index a613e8008d0..8f43a5e149b 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -233,6 +233,10 @@ public: onCancel(); } + /// Additional method which is called in case if ports were updated while work() method. + /// May be used to stop execution in rare cases. + virtual void onUpdatePorts() {}; + virtual ~IProcessor() = default; auto & getInputs() { return inputs; } diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h index b547e6a6d1f..13d42c937f3 100644 --- a/src/Processors/Sources/SourceFromInputStream.h +++ b/src/Processors/Sources/SourceFromInputStream.h @@ -37,6 +37,13 @@ public: void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } + /// Stop reading from stream if output port is finished. + void onUpdatePorts() override + { + if (getPort().isFinished()) + onCancel(); + } + protected: void onCancel() override { stream->cancel(false); } From bff1f24cf717e004a2b04abaea28a6d82bd2c721 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 12 Apr 2020 18:21:21 +0300 Subject: [PATCH 308/752] Added IProcessor::onUpdatePorts --- src/Processors/Sources/SourceFromInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h index 13d42c937f3..88a045e65a2 100644 --- a/src/Processors/Sources/SourceFromInputStream.h +++ b/src/Processors/Sources/SourceFromInputStream.h @@ -41,7 +41,7 @@ public: void onUpdatePorts() override { if (getPort().isFinished()) - onCancel(); + cancel(); } protected: From 2b052a44d98bb91981f97ec8b0664283e9dafbbc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 12 Apr 2020 18:25:51 +0300 Subject: [PATCH 309/752] Added test --- .../01245_limit_infinite_sources.reference | 1 + .../0_stateless/01245_limit_infinite_sources.sql | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/01245_limit_infinite_sources.reference create mode 100644 tests/queries/0_stateless/01245_limit_infinite_sources.sql diff --git a/tests/queries/0_stateless/01245_limit_infinite_sources.reference b/tests/queries/0_stateless/01245_limit_infinite_sources.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01245_limit_infinite_sources.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01245_limit_infinite_sources.sql b/tests/queries/0_stateless/01245_limit_infinite_sources.sql new file mode 100644 index 00000000000..803a2d14c39 --- /dev/null +++ b/tests/queries/0_stateless/01245_limit_infinite_sources.sql @@ -0,0 +1,11 @@ +SELECT number +FROM +( + SELECT zero AS number + FROM remote('127.0.0.2', system.zeros) + UNION ALL + SELECT number + sleep(0.5) + FROM system.numbers +) +WHERE number = 1 +LIMIT 1 From d4a3ef2fdc342cf0951022f9183844505548a5b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 19:15:53 +0300 Subject: [PATCH 310/752] Fixed wrong code (no changes in behaviour) --- src/IO/parseDateTimeBestEffort.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 6e747b13b3f..84a40144155 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -69,7 +69,6 @@ template inline void readDecimalNumber(T & res, size_t num_digits, const char * src) { #define READ_DECIMAL_NUMBER(N) do { res *= common::exp10_i32(N); readDecimalNumber(res, src); src += (N); num_digits -= (N); } while (false) - while (num_digits) { switch (num_digits) @@ -80,7 +79,7 @@ inline void readDecimalNumber(T & res, size_t num_digits, const char * src) default: READ_DECIMAL_NUMBER(4); break; } } -#undef DECIMAL_NUMBER_CASE +#undef READ_DECIMAL_NUMBER } struct DateTimeSubsecondPart From 6de712f0f4a7e854c9c881b2121b88d4ee450ea2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 06:20:15 +0300 Subject: [PATCH 311/752] Fix joined constants, development --- src/Interpreters/ColumnNamesContext.cpp | 4 ++++ src/Interpreters/ExpressionAnalyzer.cpp | 3 +++ src/Interpreters/RequiredSourceColumnsVisitor.cpp | 2 ++ src/Interpreters/SyntaxAnalyzer.cpp | 7 +++++++ src/Interpreters/TableJoin.cpp | 10 +++++++++- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 9 +++++++-- 6 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ColumnNamesContext.cpp b/src/Interpreters/ColumnNamesContext.cpp index c8fde183d96..d577fea97ae 100644 --- a/src/Interpreters/ColumnNamesContext.cpp +++ b/src/Interpreters/ColumnNamesContext.cpp @@ -24,6 +24,8 @@ bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast) if (required_names.count(alias)) masked_columns.insert(alias); + std::cerr << "Alias: " << alias << "\n"; + complex_aliases.insert(alias); return true; } @@ -33,6 +35,8 @@ void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node) if (!IdentifierSemantic::getColumnName(node)) return; + std::cerr << "Identifier: " << node.name << "\n"; + /// There should be no complex cases after query normalization. Names to aliases: one-to-many. String alias = node.tryGetAlias(); required_names[node.name].addInclusion(alias); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9ec32737fdc..4ea762c0d6e 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -559,6 +559,9 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer /// Actions which need to be calculated on joined block. ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin()); + std::cerr << "Joined block actions: " << joined_block_actions->getSampleBlock().dumpStructure() + << "\n\n" << toString(joined_block_actions->getRequiredColumns()) << "\n"; + if (!subquery_for_join.source) { NamesWithAliases required_columns_with_aliases = diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index 5a740805560..469a5852fa5 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -88,12 +88,14 @@ void RequiredSourceColumnsMatcher::visit(const ASTPtr & ast, Data & data) visit(*t, ast, data); return; } + if (auto * t = ast->as()) { data.addTableAliasIfAny(*ast); visit(*t, ast, data); return; } + if (ast->as()) { data.addTableAliasIfAny(*ast); diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index 5c1b6c7e62b..bd317d61668 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -638,9 +638,13 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query) /// We calculate required_source_columns with source_columns modifications and swap them on exit required_source_columns = source_columns; + std::cerr << queryToString(query) << "\n"; + RequiredSourceColumnsVisitor::Data columns_context; RequiredSourceColumnsVisitor(columns_context).visit(query); + std::cerr << columns_context << "\n"; + NameSet source_column_names; for (const auto & column : source_columns) source_column_names.insert(column.name); @@ -922,6 +926,9 @@ void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings /// Creates a dictionary `aliases`: alias -> ASTPtr QueryAliasesVisitor(aliases).visit(query); + for (const auto & alias : aliases) + std::cerr << "Alias: " << alias.first << ": " << queryToString(alias.second) << "\n"; + /// Mark table ASTIdentifiers with not a column marker MarkTableIdentifiersVisitor::Data identifiers_data{aliases}; MarkTableIdentifiersVisitor(identifiers_data).visit(query); diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 30b5e8e4483..3286bbbefd1 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -5,6 +7,8 @@ #include #include +#include + #include @@ -79,7 +83,9 @@ void TableJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_colu dedup_columns.push_back(column); auto & inserted = dedup_columns.back(); - if (left_table_columns.count(column.name)) + /// Also qualify unusual column names - that does not look like identifiers. + + if (left_table_columns.count(column.name) || !isValidIdentifierBegin(column.name.at(0))) inserted.name = right_table_prefix + column.name; original_names[inserted.name] = column.name; @@ -157,6 +163,8 @@ NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column) { + std::cerr << "Adding " << joined_column.name << "\n"; + if (join_use_nulls && isLeftOrFull(table_join.kind)) { auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 17b1bc004f8..b97aa01826c 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include namespace DB @@ -107,8 +109,9 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, IdentifierSemantic::setMembership(identifier, table_pos); /// In case if column from the joined table are in source columns, change it's name to qualified. + /// Also always leave unusual identifiers qualified. auto & table = data.tables[table_pos].table; - if (table_pos && data.hasColumn(short_name)) + if (table_pos && (data.hasColumn(short_name) || !isValidIdentifierBegin(short_name.at(0)))) IdentifierSemantic::setColumnLongName(identifier, table); else IdentifierSemantic::setColumnShortName(identifier, table); @@ -128,7 +131,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D func_arguments->children.clear(); } -void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) +void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk &, const ASTPtr & ast, Data & data) { if (ast->children.size() != 1) throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); @@ -174,6 +177,8 @@ static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, String table_name = table.getQualifiedNamePrefix(false); auto identifier = std::make_shared(std::vector{table_name, column_name}); + std::cerr << "Expanded identifier: " << queryToString(identifier) << "\n"; + bool added = false; if (aliases && aliases->count(identifier->name)) { From ec4889e43e4c564de279c0af61e0d61fb98533bf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 19:20:12 +0300 Subject: [PATCH 312/752] Remove debug output --- src/Interpreters/ColumnNamesContext.cpp | 4 ---- src/Interpreters/ExpressionAnalyzer.cpp | 3 --- src/Interpreters/SyntaxAnalyzer.cpp | 7 ------- src/Interpreters/TableJoin.cpp | 4 ---- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 3 --- 5 files changed, 21 deletions(-) diff --git a/src/Interpreters/ColumnNamesContext.cpp b/src/Interpreters/ColumnNamesContext.cpp index d577fea97ae..c8fde183d96 100644 --- a/src/Interpreters/ColumnNamesContext.cpp +++ b/src/Interpreters/ColumnNamesContext.cpp @@ -24,8 +24,6 @@ bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast) if (required_names.count(alias)) masked_columns.insert(alias); - std::cerr << "Alias: " << alias << "\n"; - complex_aliases.insert(alias); return true; } @@ -35,8 +33,6 @@ void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node) if (!IdentifierSemantic::getColumnName(node)) return; - std::cerr << "Identifier: " << node.name << "\n"; - /// There should be no complex cases after query normalization. Names to aliases: one-to-many. String alias = node.tryGetAlias(); required_names[node.name].addInclusion(alias); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 4ea762c0d6e..9ec32737fdc 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -559,9 +559,6 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer /// Actions which need to be calculated on joined block. ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin()); - std::cerr << "Joined block actions: " << joined_block_actions->getSampleBlock().dumpStructure() - << "\n\n" << toString(joined_block_actions->getRequiredColumns()) << "\n"; - if (!subquery_for_join.source) { NamesWithAliases required_columns_with_aliases = diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index bd317d61668..5c1b6c7e62b 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -638,13 +638,9 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query) /// We calculate required_source_columns with source_columns modifications and swap them on exit required_source_columns = source_columns; - std::cerr << queryToString(query) << "\n"; - RequiredSourceColumnsVisitor::Data columns_context; RequiredSourceColumnsVisitor(columns_context).visit(query); - std::cerr << columns_context << "\n"; - NameSet source_column_names; for (const auto & column : source_columns) source_column_names.insert(column.name); @@ -926,9 +922,6 @@ void SyntaxAnalyzer::normalize(ASTPtr & query, Aliases & aliases, const Settings /// Creates a dictionary `aliases`: alias -> ASTPtr QueryAliasesVisitor(aliases).visit(query); - for (const auto & alias : aliases) - std::cerr << "Alias: " << alias.first << ": " << queryToString(alias.second) << "\n"; - /// Mark table ASTIdentifiers with not a column marker MarkTableIdentifiersVisitor::Data identifiers_data{aliases}; MarkTableIdentifiersVisitor(identifiers_data).visit(query); diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 3286bbbefd1..339fe2dceb3 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -1,5 +1,3 @@ -#include - #include #include @@ -163,8 +161,6 @@ NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column) { - std::cerr << "Adding " << joined_column.name << "\n"; - if (join_use_nulls && isLeftOrFull(table_join.kind)) { auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index b97aa01826c..7c31a6db546 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -18,7 +18,6 @@ #include #include #include -#include namespace DB @@ -177,8 +176,6 @@ static void addIdentifier(ASTs & nodes, const DatabaseAndTableWithAlias & table, String table_name = table.getQualifiedNamePrefix(false); auto identifier = std::make_shared(std::vector{table_name, column_name}); - std::cerr << "Expanded identifier: " << queryToString(identifier) << "\n"; - bool added = false; if (aliases && aliases->count(identifier->name)) { From 66d9ba93894eac80d316da1e1f4ce04e5c2d9d98 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 19:24:16 +0300 Subject: [PATCH 313/752] Added a test --- .../0_stateless/01120_join_constants.reference | 2 ++ .../0_stateless/01120_join_constants.sql | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/01120_join_constants.reference create mode 100644 tests/queries/0_stateless/01120_join_constants.sql diff --git a/tests/queries/0_stateless/01120_join_constants.reference b/tests/queries/0_stateless/01120_join_constants.reference new file mode 100644 index 00000000000..a16427fbdf7 --- /dev/null +++ b/tests/queries/0_stateless/01120_join_constants.reference @@ -0,0 +1,2 @@ +1 hello 1 world world 1 +2 hello 0 world 1 diff --git a/tests/queries/0_stateless/01120_join_constants.sql b/tests/queries/0_stateless/01120_join_constants.sql new file mode 100644 index 00000000000..443559c3ea1 --- /dev/null +++ b/tests/queries/0_stateless/01120_join_constants.sql @@ -0,0 +1,17 @@ +SELECT + t1.*, + t2.*, + 'world', + isConstant('world') +FROM +( + SELECT + arrayJoin([1, 2]) AS k, + 'hello' +) AS t1 +LEFT JOIN +( + SELECT + arrayJoin([1, 3]) AS k, + 'world' +) AS t2 ON t1.k = t2.k; From b00330b5db5e95ba8be818885a95d2c7eee322b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 19:25:20 +0300 Subject: [PATCH 314/752] Added bug --- tests/queries/bugs/join_constants_on.sql | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/queries/bugs/join_constants_on.sql diff --git a/tests/queries/bugs/join_constants_on.sql b/tests/queries/bugs/join_constants_on.sql new file mode 100644 index 00000000000..ae967e07adb --- /dev/null +++ b/tests/queries/bugs/join_constants_on.sql @@ -0,0 +1,2 @@ +select cast(1, 'UInt8') from (select arrayJoin([1, 2]) as a) t1 left join (select 1 as b) t2 on b = ignore('UInt8'); +select isConstant('UInt8'), toFixedString('hello', toUInt8(substring('UInt8', 5, 1))) from (select arrayJoin([1, 2]) as a) t1 left join (select 1 as b) t2 on b = ignore('UInt8'); From ea7eb2f4afae6890bf23a7f74c19391d4cb67a7f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 19:33:44 +0300 Subject: [PATCH 315/752] Removed old command line option for client --- programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index fef89d9df35..e01eef98006 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1710,7 +1710,6 @@ public: ("database,d", po::value(), "database") ("pager", po::value(), "pager") ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") - ("always_load_suggestion_data", "Load suggestion data even if clickhouse-client is run in non-interactive mode. Used for testing.") ("suggestion_limit", po::value()->default_value(10000), "Suggestion limit for how many databases, tables and columns to fetch.") ("multiline,m", "multiline") From d252c59513db55439ee912690427e1a2f6041ff6 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Sun, 12 Apr 2020 20:04:52 +0300 Subject: [PATCH 316/752] Added a test that produces segfault in StorageSystemTables --- ...ecreate_reattach_and_show_tables.reference | 1 + ...rrent_recreate_reattach_and_show_tables.sh | 109 ++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100755 tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.reference create mode 100755 tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh diff --git a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.reference b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.reference new file mode 100755 index 00000000000..678f9a34e6f --- /dev/null +++ b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.reference @@ -0,0 +1 @@ +Test OK diff --git a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh new file mode 100755 index 00000000000..8bf21d3cb02 --- /dev/null +++ b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +export CURR_DATABASE="test_lazy_01014_concurrent_${CLICKHOUSE_DATABASE}" + + +function recreate_lazy_func1() +{ + $CLICKHOUSE_CLIENT -q " + CREATE TABLE $CURR_DATABASE.log (a UInt64, b UInt64) ENGINE = Log; + "; + + while true; do + $CLICKHOUSE_CLIENT -q " + DETACH TABLE $CURR_DATABASE.log; + "; + + $CLICKHOUSE_CLIENT -q " + ATTACH TABLE $CURR_DATABASE.log; + "; + done +} + +function recreate_lazy_func2() +{ + while true; do + $CLICKHOUSE_CLIENT -q " + CREATE TABLE $CURR_DATABASE.tlog (a UInt64, b UInt64) ENGINE = TinyLog; + "; + + $CLICKHOUSE_CLIENT -q " + DROP TABLE $CURR_DATABASE.tlog; + "; + done +} + +function recreate_lazy_func3() +{ + $CLICKHOUSE_CLIENT -q " + CREATE TABLE $CURR_DATABASE.slog (a UInt64, b UInt64) ENGINE = StripeLog; + "; + + while true; do + $CLICKHOUSE_CLIENT -q " + ATTACH TABLE $CURR_DATABASE.slog; + "; + + $CLICKHOUSE_CLIENT -q " + DETACH TABLE $CURR_DATABASE.slog; + "; + done +} + +function recreate_lazy_func4() +{ + while true; do + $CLICKHOUSE_CLIENT -q " + CREATE TABLE $CURR_DATABASE.tlog2 (a UInt64, b UInt64) ENGINE = TinyLog; + "; + + $CLICKHOUSE_CLIENT -q " + DROP TABLE $CURR_DATABASE.tlog2; + "; + done +} + +function show_tables_func() +{ + while true; do + $CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE database = '$CURR_DATABASE' FORMAT Null"; + done +} + + +export -f recreate_lazy_func1; +export -f recreate_lazy_func2; +export -f recreate_lazy_func3; +export -f recreate_lazy_func4; +export -f show_tables_func; + + +${CLICKHOUSE_CLIENT} -n -q " + DROP DATABASE IF EXISTS $CURR_DATABASE; + CREATE DATABASE $CURR_DATABASE ENGINE = Lazy(1); +" + + +TIMEOUT=30 + +timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & +timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & +timeout $TIMEOUT bash -c recreate_lazy_func3 2> /dev/null & +timeout $TIMEOUT bash -c recreate_lazy_func4 2> /dev/null & +timeout $TIMEOUT bash -c show_tables_func 2> /dev/null & + +wait +sleep 1 + +${CLICKHOUSE_CLIENT} -n -q " + DROP TABLE IF EXISTS $CURR_DATABASE.log; + DROP TABLE IF EXISTS $CURR_DATABASE.slog; + DROP TABLE IF EXISTS $CURR_DATABASE.tlog; + DROP TABLE IF EXISTS $CURR_DATABASE.tlog2; +" +# DROP DATABASE $CURR_DATABASE; -- This fails for some reason + +echo "Test OK" From 2eb2e4cf41909dc82ccf5cd30c02f81941e40e36 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Sun, 12 Apr 2020 20:17:41 +0300 Subject: [PATCH 317/752] Added proper nullptr check --- src/Storages/System/StorageSystemTables.cpp | 23 +++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index f4ce4a8b717..81ff6a03e12 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -239,20 +239,25 @@ protected: StoragePtr table = nullptr; TableStructureReadLockHolder lock; - try + if (need_lock_structure) { - if (need_lock_structure) + table = tables_it->table(); + if (table == nullptr) + { + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) + continue; + } + try { - table = tables_it->table(); lock = table->lockStructureForShare( false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); } - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::TABLE_IS_DROPPED) - continue; - throw; + catch (const Exception & e) + { + if (e.code() == ErrorCodes::TABLE_IS_DROPPED) + continue; + throw; + } } ++rows_count; From 718e4bcdf89c96375ff655b233241eee9e12fda4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Apr 2020 22:01:02 +0300 Subject: [PATCH 318/752] Update IProcessor.h --- src/Processors/IProcessor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 8f43a5e149b..b7c230cb6de 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -235,7 +235,7 @@ public: /// Additional method which is called in case if ports were updated while work() method. /// May be used to stop execution in rare cases. - virtual void onUpdatePorts() {}; + virtual void onUpdatePorts() {} virtual ~IProcessor() = default; From b56945fa1b4d5cd74aa9daf0b98f15645c19899f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 22:07:16 +0300 Subject: [PATCH 319/752] Remove some bugs --- .../0_stateless/01121_remote_scalar_subquery.reference | 2 ++ .../01121_remote_scalar_subquery.sql} | 0 .../01122_totals_rollup_having_block_header.reference | 0 .../01122_totals_rollup_having_block_header.sql} | 6 ++---- tests/queries/bugs/00938_client_suggestions.sh | 6 ------ 5 files changed, 4 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/01121_remote_scalar_subquery.reference rename tests/queries/{bugs/remote_scalar_subquery.sql => 0_stateless/01121_remote_scalar_subquery.sql} (100%) create mode 100644 tests/queries/0_stateless/01122_totals_rollup_having_block_header.reference rename tests/queries/{bugs/totals_rollup_having_block_header.sql => 0_stateless/01122_totals_rollup_having_block_header.sql} (75%) delete mode 100755 tests/queries/bugs/00938_client_suggestions.sh diff --git a/tests/queries/0_stateless/01121_remote_scalar_subquery.reference b/tests/queries/0_stateless/01121_remote_scalar_subquery.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/01121_remote_scalar_subquery.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/bugs/remote_scalar_subquery.sql b/tests/queries/0_stateless/01121_remote_scalar_subquery.sql similarity index 100% rename from tests/queries/bugs/remote_scalar_subquery.sql rename to tests/queries/0_stateless/01121_remote_scalar_subquery.sql diff --git a/tests/queries/0_stateless/01122_totals_rollup_having_block_header.reference b/tests/queries/0_stateless/01122_totals_rollup_having_block_header.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/bugs/totals_rollup_having_block_header.sql b/tests/queries/0_stateless/01122_totals_rollup_having_block_header.sql similarity index 75% rename from tests/queries/bugs/totals_rollup_having_block_header.sql rename to tests/queries/0_stateless/01122_totals_rollup_having_block_header.sql index 4f7f9692fd0..4f4f3355912 100644 --- a/tests/queries/bugs/totals_rollup_having_block_header.sql +++ b/tests/queries/0_stateless/01122_totals_rollup_having_block_header.sql @@ -1,5 +1,3 @@ --- triggers assertion in debug build - DROP TABLE IF EXISTS test.rollup_having; CREATE TABLE test.rollup_having ( a Nullable(String), @@ -10,7 +8,7 @@ INSERT INTO test.rollup_having VALUES (NULL, NULL); INSERT INTO test.rollup_having VALUES ('a', NULL); INSERT INTO test.rollup_having VALUES ('a', 'b'); -SELECT a, b, count(*) FROM test.rollup_having GROUP BY a, b WITH ROLLUP WITH TOTALS HAVING a IS NOT NULL; -SELECT a, b, count(*) FROM test.rollup_having GROUP BY a, b WITH ROLLUP WITH TOTALS HAVING a IS NOT NULL and b IS NOT NULL; +SELECT a, b, count(*) FROM test.rollup_having GROUP BY a, b WITH ROLLUP WITH TOTALS HAVING a IS NOT NULL; -- { serverError 48 } +SELECT a, b, count(*) FROM test.rollup_having GROUP BY a, b WITH ROLLUP WITH TOTALS HAVING a IS NOT NULL and b IS NOT NULL; -- { serverError 48 } DROP TABLE test.rollup_having; diff --git a/tests/queries/bugs/00938_client_suggestions.sh b/tests/queries/bugs/00938_client_suggestions.sh deleted file mode 100755 index b4bd9e4480d..00000000000 --- a/tests/queries/bugs/00938_client_suggestions.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. $CURDIR/../shell_config.sh - -for i in {1..100}; do $CLICKHOUSE_CLIENT --always_load_suggestion_data --query="SELECT 1 FORMAT Null"; done From 8cb4dd275a1554f2f995dd27472f42f7b4359b53 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 22:23:59 +0300 Subject: [PATCH 320/752] Remove default argument (harmful) #10082 --- src/IO/parseDateTimeBestEffort.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 84a40144155..2924ad88506 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -89,7 +89,12 @@ struct DateTimeSubsecondPart }; template -ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, DateTimeSubsecondPart * fractional = nullptr) +ReturnType parseDateTimeBestEffortImpl( + time_t & res, + ReadBuffer & in, + const DateLUTImpl & local_time_zone, + const DateLUTImpl & utc_time_zone, + DateTimeSubsecondPart * fractional) { auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]]) { @@ -581,12 +586,12 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone); + parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { - return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone); + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) From 142087c4f7538d42a652294524f7351b71a9d0c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 22:34:04 +0300 Subject: [PATCH 321/752] Fix "parseDateTimeBestEffort" for strings in RFC-2822 when day of week is Thuesday or Thursday #10082 --- src/IO/parseDateTimeBestEffort.cpp | 5 ++++- .../01123_parse_date_time_best_effort_even_more.reference | 2 ++ .../01123_parse_date_time_best_effort_even_more.sql | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.reference create mode 100644 tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.sql diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 2924ad88506..68565782edf 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -371,7 +371,10 @@ ReturnType parseDateTimeBestEffortImpl( { char c = *in.position(); - if (c == ' ' || c == 'T') + /// 'T' is a separator between date and time according to ISO 8601. + /// But don't skip it if we didn't read the date part yet, because 'T' is also a prefix or 'Tue' and 'Thu'. + + if (c == ' ' || (c == 'T' && year && !has_time)) { ++in.position(); } diff --git a/tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.reference b/tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.reference new file mode 100644 index 00000000000..558ba34abcd --- /dev/null +++ b/tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.reference @@ -0,0 +1,2 @@ +2018-08-18 07:22:16 +2018-08-16 07:22:16 diff --git a/tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.sql b/tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.sql new file mode 100644 index 00000000000..a4f6f173402 --- /dev/null +++ b/tests/queries/0_stateless/01123_parse_date_time_best_effort_even_more.sql @@ -0,0 +1,2 @@ +SELECT toTimeZone(parseDateTimeBestEffort('Thu, 18 Aug 2018 07:22:16 GMT'), 'UTC'); +SELECT toTimeZone(parseDateTimeBestEffort('Tue, 16 Aug 2018 07:22:16 GMT'), 'UTC'); From 3f1658c0e931e8c7467ac959ec2ab175a90d3663 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Apr 2020 22:35:23 +0300 Subject: [PATCH 322/752] Update parseDateTimeBestEffort.cpp --- src/IO/parseDateTimeBestEffort.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 68565782edf..7e40909226c 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -372,7 +372,7 @@ ReturnType parseDateTimeBestEffortImpl( char c = *in.position(); /// 'T' is a separator between date and time according to ISO 8601. - /// But don't skip it if we didn't read the date part yet, because 'T' is also a prefix or 'Tue' and 'Thu'. + /// But don't skip it if we didn't read the date part yet, because 'T' is also a prefix for 'Tue' and 'Thu'. if (c == ' ' || (c == 'T' && year && !has_time)) { From 20dcc4decd4baf058cc9e754516c659f98dcf2cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 23:05:58 +0300 Subject: [PATCH 323/752] Fixed build on FreeBSD according to the advice from Vitaly @hellvesper --- src/Processors/RowsBeforeLimitCounter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/RowsBeforeLimitCounter.h b/src/Processors/RowsBeforeLimitCounter.h index abee5a09405..36ea4a557a8 100644 --- a/src/Processors/RowsBeforeLimitCounter.h +++ b/src/Processors/RowsBeforeLimitCounter.h @@ -17,7 +17,7 @@ public: uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); } - void setAppliedLimit() { has_applied_limit.store(true, std::memory_order::release); } + void setAppliedLimit() { has_applied_limit.store(true, std::memory_order_release); } bool hasAppliedLimit() const { return has_applied_limit.load(std::memory_order_acquire); } private: From ceb5c1964af484bfdfdab15a6e385cd73049b9c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Apr 2020 23:48:51 +0300 Subject: [PATCH 324/752] Update cctz just in case #10211 --- contrib/cctz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz b/contrib/cctz index 4f9776a310f..44541cf2b85 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 4f9776a310f4952454636363def82c2bf6641d5f +Subproject commit 44541cf2b85ced2a6e5ad4276183a9812d1a54ab From b9931863eff3b528109b89d94a555ed81575cc07 Mon Sep 17 00:00:00 2001 From: Avogar Date: Mon, 13 Apr 2020 00:01:17 +0300 Subject: [PATCH 325/752] Fix FixedString packing --- src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 7c5e2c5b522..cef7b001505 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -85,7 +85,6 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_uint64(assert_cast(column).getElement(row_num)); return; } - case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { const StringRef & string = assert_cast(column).getDataAt(row_num); @@ -93,6 +92,13 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr packer.pack_str_body(string.data, string.size); return; } + case TypeIndex::FixedString: + { + const StringRef & string = assert_cast(column).getDataAt(row_num); + packer.pack_str(string.size); + packer.pack_str_body(string.data, string.size); + return; + } case TypeIndex::Array: { auto nested_type = assert_cast(*data_type).getNestedType(); From 983950d4ec79cc07c945424d687e72a97b0c979c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 00:07:11 +0300 Subject: [PATCH 326/752] Convert types in Views --- src/Storages/StorageView.cpp | 10 ++++++++-- .../queries/0_stateless/01124_view_bad_types.reference | 10 ++++++++++ .../01124_view_bad_types.sql} | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01124_view_bad_types.reference rename tests/queries/{bugs/view_bad_types.sql => 0_stateless/01124_view_bad_types.sql} (84%) diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 05feeb7d786..78e3c50a879 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -12,13 +12,12 @@ #include #include -#include - #include #include #include #include +#include namespace DB @@ -78,8 +77,15 @@ Pipes StorageView::read( /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. for (auto & pipe : pipes) + { pipe.addSimpleTransform(std::make_shared(pipe.getHeader())); + /// And also convert to expected structure. + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), getSampleBlockForColumns(column_names), + ConvertingTransform::MatchColumnsMode::Name, context)); + } + return pipes; } diff --git a/tests/queries/0_stateless/01124_view_bad_types.reference b/tests/queries/0_stateless/01124_view_bad_types.reference new file mode 100644 index 00000000000..af98bcd6397 --- /dev/null +++ b/tests/queries/0_stateless/01124_view_bad_types.reference @@ -0,0 +1,10 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 diff --git a/tests/queries/bugs/view_bad_types.sql b/tests/queries/0_stateless/01124_view_bad_types.sql similarity index 84% rename from tests/queries/bugs/view_bad_types.sql rename to tests/queries/0_stateless/01124_view_bad_types.sql index 38daabfd6b8..81fc53930c1 100644 --- a/tests/queries/bugs/view_bad_types.sql +++ b/tests/queries/0_stateless/01124_view_bad_types.sql @@ -5,7 +5,7 @@ INSERT INTO test.table SELECT * FROM system.numbers LIMIT 10; DROP TABLE IF EXISTS test.view; CREATE VIEW test.view (x UInt64) AS SELECT * FROM test.table; -SELECT x, any(x) FROM test.view GROUP BY x; +SELECT x, any(x) FROM test.view GROUP BY x ORDER BY x; DROP TABLE test.view; DROP TABLE test.table; From d49dc5c008cbd3802dd35066f4607e5f3e21fde9 Mon Sep 17 00:00:00 2001 From: Avogar Date: Mon, 13 Apr 2020 00:16:27 +0300 Subject: [PATCH 327/752] Add test --- tests/queries/0_stateless/01098_msgpack_format.reference | 3 +++ tests/queries/0_stateless/01098_msgpack_format.sh | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/tests/queries/0_stateless/01098_msgpack_format.reference b/tests/queries/0_stateless/01098_msgpack_format.reference index 8059526a38f..ad116a5ba91 100644 --- a/tests/queries/0_stateless/01098_msgpack_format.reference +++ b/tests/queries/0_stateless/01098_msgpack_format.reference @@ -8,3 +8,6 @@ [[1,2,3],[1001,2002],[3167]] [[['one'],['two']],[['three']],[['four'],['five']]] [0,1,2,3,42,253,254,255] [255,254,253,42,3,2,1,0] +2020-01-01 +2020-01-02 +2020-01-02 diff --git a/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh index afebd6de3dc..233399570bb 100755 --- a/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/tests/queries/0_stateless/01098_msgpack_format.sh @@ -52,3 +52,11 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; $CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (date FixedString(10)) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO msgpack VALUES ('2020-01-01'), ('2020-01-02'), ('2020-01-02')"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM msgpack"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE msgpack"; + From 3215257a90e914cf00a8399336493252e66056d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 00:27:11 +0300 Subject: [PATCH 328/752] Fixed "generateRandom" function for Date type #9973 --- src/Storages/StorageGenerateRandom.cpp | 5 ++++- .../0_stateless/01125_generate_random_qoega.reference | 1 + tests/queries/0_stateless/01125_generate_random_qoega.sql | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01125_generate_random_qoega.reference create mode 100644 tests/queries/0_stateless/01125_generate_random_qoega.sql diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 1fd2d4ec2d8..d0772254045 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -205,7 +205,10 @@ ColumnPtr fillColumnWithRandomData( { auto column = ColumnUInt16::create(); column->getData().resize(limit); - fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(UInt16), rng); + + for (size_t i = 0; i < limit; ++i) + column->getData()[i] = rng() % (DATE_LUT_MAX_DAY_NUM + 1); /// Slow + return column; } case TypeIndex::UInt32: [[fallthrough]]; diff --git a/tests/queries/0_stateless/01125_generate_random_qoega.reference b/tests/queries/0_stateless/01125_generate_random_qoega.reference new file mode 100644 index 00000000000..1cb416a722b --- /dev/null +++ b/tests/queries/0_stateless/01125_generate_random_qoega.reference @@ -0,0 +1 @@ +100 4456446406473339606 diff --git a/tests/queries/0_stateless/01125_generate_random_qoega.sql b/tests/queries/0_stateless/01125_generate_random_qoega.sql new file mode 100644 index 00000000000..7fb586ad2b5 --- /dev/null +++ b/tests/queries/0_stateless/01125_generate_random_qoega.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS mass_table_117; +CREATE TABLE mass_table_117 (`dt` Date, `site_id` Int32, `site_key` String) ENGINE = MergeTree(dt, (site_id, site_key, dt), 8192); +INSERT INTO mass_table_117 SELECT * FROM generateRandom('`dt` Date,`site_id` Int32,`site_key` String', 1, 10, 2) LIMIT 100; +SELECT count(), sum(cityHash64(*)) FROM mass_table_117; +DROP TABLE mass_table_117; From 9860ffee5189189b7285dc5641c92c35bae49591 Mon Sep 17 00:00:00 2001 From: Avogar Date: Mon, 13 Apr 2020 00:59:28 +0300 Subject: [PATCH 329/752] Add MsgPack performance test --- tests/performance/parse_engine_file.xml | 1 + tests/performance/select_format.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/performance/parse_engine_file.xml b/tests/performance/parse_engine_file.xml index fb10fa97915..c96f4e537ff 100644 --- a/tests/performance/parse_engine_file.xml +++ b/tests/performance/parse_engine_file.xml @@ -34,6 +34,7 @@ RowBinary Native Avro + MsgPack diff --git a/tests/performance/select_format.xml b/tests/performance/select_format.xml index b8df874304f..e47d981c4d7 100644 --- a/tests/performance/select_format.xml +++ b/tests/performance/select_format.xml @@ -44,6 +44,7 @@ ODBCDriver2 MySQLWire Avro + MsgPack From 4788eb3423a575dc23f207963bf22c79acac1088 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 01:00:03 +0300 Subject: [PATCH 330/752] Make the assertion in code consistent with the real partition expression --- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 34c615994f0..23a60ddab78 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -210,8 +210,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa const auto & date_lut = DateLUT::instance(); - DayNum min_month = date_lut.toFirstDayNumOfMonth(DayNum(min_date)); - DayNum max_month = date_lut.toFirstDayNumOfMonth(DayNum(max_date)); + auto min_month = date_lut.toNumYYYYMM(min_date); + auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) throw Exception("Logical error: part spans more than one month.", ErrorCodes::LOGICAL_ERROR); From d1eaa34cd9f24341fb3e9e96a09d38a5b894eb4b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 01:00:10 +0300 Subject: [PATCH 331/752] Added a test --- .../01126_month_partitioning_consistent_code.reference | 0 .../0_stateless/01126_month_partitioning_consistent_code.sql | 4 ++++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/01126_month_partitioning_consistent_code.reference create mode 100644 tests/queries/0_stateless/01126_month_partitioning_consistent_code.sql diff --git a/tests/queries/0_stateless/01126_month_partitioning_consistent_code.reference b/tests/queries/0_stateless/01126_month_partitioning_consistent_code.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01126_month_partitioning_consistent_code.sql b/tests/queries/0_stateless/01126_month_partitioning_consistent_code.sql new file mode 100644 index 00000000000..c9bfbbe5111 --- /dev/null +++ b/tests/queries/0_stateless/01126_month_partitioning_consistent_code.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS mt; +CREATE TABLE mt (d Date, x UInt8) ENGINE = MergeTree(d, x, 8192); +INSERT INTO mt VALUES (52392, 1), (62677, 2); +DROP TABLE mt; From 860e9092f19b379c4dbb53174c513159507aced5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 01:25:41 +0300 Subject: [PATCH 332/752] Fixed another inconsistency in partition names --- base/common/DateLUTImpl.cpp | 16 ++++++++++++++-- base/common/DateLUTImpl.h | 2 +- src/Storages/MergeTree/MergeTreePartInfo.cpp | 4 ++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/base/common/DateLUTImpl.cpp b/base/common/DateLUTImpl.cpp index d7ab0046992..a7ca21c984e 100644 --- a/base/common/DateLUTImpl.cpp +++ b/base/common/DateLUTImpl.cpp @@ -133,7 +133,10 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_) } /// Fill lookup table for years and months. - for (size_t day = 0; day < DATE_LUT_SIZE && lut[day].year <= DATE_LUT_MAX_YEAR; ++day) + size_t year_months_lut_index = 0; + size_t first_day_of_last_month = 0; + + for (size_t day = 0; day < DATE_LUT_SIZE; ++day) { const Values & values = lut[day]; @@ -141,7 +144,16 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_) { if (values.month == 1) years_lut[values.year - DATE_LUT_MIN_YEAR] = day; - years_months_lut[(values.year - DATE_LUT_MIN_YEAR) * 12 + values.month - 1] = day; + + year_months_lut_index = (values.year - DATE_LUT_MIN_YEAR) * 12 + values.month - 1; + years_months_lut[year_months_lut_index] = day; + first_day_of_last_month = day; } } + + /// Fill the rest of lookup table with the same last month (2106-02-01). + for (; year_months_lut_index < DATE_LUT_YEARS * 12; ++year_months_lut_index) + { + years_months_lut[year_months_lut_index] = first_day_of_last_month; + } } diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index d9d27c56ee3..ec32d62bcad 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -12,7 +12,7 @@ /// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check. #define DATE_LUT_SIZE 0x10000 #define DATE_LUT_MIN_YEAR 1970 -#define DATE_LUT_MAX_YEAR 2105 /// Last supported year +#define DATE_LUT_MAX_YEAR 2106 /// Last supported year (incomplete) #define DATE_LUT_YEARS (1 + DATE_LUT_MAX_YEAR - DATE_LUT_MIN_YEAR) /// Number of years in lookup table #if defined(__PPC__) diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 43bd9538e3e..d30f6470bb1 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -120,8 +120,8 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd); max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd); - DayNum min_month = date_lut.toFirstDayNumOfMonth(min_date); - DayNum max_month = date_lut.toFirstDayNumOfMonth(max_date); + auto min_month = date_lut.toNumYYYYMM(min_date); + auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) throw Exception("Part name " + part_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); From a517111259af9a7a03d9e94045335d72d01286f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 01:30:10 +0300 Subject: [PATCH 333/752] Added one more test --- ...month_partitioning_consistency_select.reference | 4 ++++ ...01127_month_partitioning_consistency_select.sql | 14 ++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/01127_month_partitioning_consistency_select.reference create mode 100644 tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql diff --git a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.reference b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.reference new file mode 100644 index 00000000000..1b08e7f2d6f --- /dev/null +++ b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.reference @@ -0,0 +1,4 @@ +Q1 2106-02-07 Hello +Q2 0000-00-00 World +Q1 2106-02-07 Hello +Q2 0000-00-00 World diff --git a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql new file mode 100644 index 00000000000..59edd0c37b8 --- /dev/null +++ b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS mt; +CREATE TABLE mt (d Date, x String) ENGINE = MergeTree(d, x, 8192); +INSERT INTO mt VALUES ('2106-02-07', 'Hello'), ('1970-01-01', 'World'); + +SELECT 'Q1', * FROM mt WHERE d = '2106-02-07'; +SELECT 'Q2', * FROM mt WHERE d = '1970-01-01'; + +DETACH TABLE mt; +ATTACH TABLE mt; + +SELECT 'Q1', * FROM mt WHERE d = '2106-02-07'; +SELECT 'Q2', * FROM mt WHERE d = '1970-01-01'; + +DROP TABLE mt; From 8ad89a82d4522ab84d32068ef3d8d9a063165d3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 02:08:32 +0300 Subject: [PATCH 334/752] Support Nested types in "generateRandom" --- src/Storages/StorageGenerateRandom.cpp | 28 +++++++++++++++---- .../01128_generate_random_nested.reference | 2 ++ .../01128_generate_random_nested.sql | 8 ++++++ 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/01128_generate_random_nested.reference create mode 100644 tests/queries/0_stateless/01128_generate_random_nested.sql diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index d0772254045..70b84c076b7 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -57,7 +58,12 @@ void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64 & rng) ColumnPtr fillColumnWithRandomData( - const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg64 & rng, const Context & context) + const DataTypePtr type, + UInt64 limit, + UInt64 max_array_length, + UInt64 max_string_length, + pcg64 & rng, + const Context & context) { TypeIndex idx = type->getTypeId(); @@ -340,14 +346,24 @@ public: protected: Chunk generate() override { + /// To support Nested types, we will collect them to single Array of Tuple. + auto names_and_types = Nested::collect(block_header.getNamesAndTypesList()); + Columns columns; - columns.reserve(block_header.columns()); - DataTypes types = block_header.getDataTypes(); + columns.reserve(names_and_types.size()); - for (const auto & type : types) - columns.emplace_back(fillColumnWithRandomData(type, block_size, max_array_length, max_string_length, rng, context)); + Block compact_block; + for (const auto & elem : names_and_types) + { + compact_block.insert( + { + fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context), + elem.type, + elem.name + }); + } - return {std::move(columns), block_size}; + return {Nested::flatten(compact_block).getColumns(), block_size}; } private: diff --git a/tests/queries/0_stateless/01128_generate_random_nested.reference b/tests/queries/0_stateless/01128_generate_random_nested.reference new file mode 100644 index 00000000000..d9d2b251702 --- /dev/null +++ b/tests/queries/0_stateless/01128_generate_random_nested.reference @@ -0,0 +1,2 @@ +100 12366141706519416319 +109 2990700419202507835 diff --git a/tests/queries/0_stateless/01128_generate_random_nested.sql b/tests/queries/0_stateless/01128_generate_random_nested.sql new file mode 100644 index 00000000000..2af52e69893 --- /dev/null +++ b/tests/queries/0_stateless/01128_generate_random_nested.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS mass_table_312; +CREATE TABLE mass_table_312 (d Date DEFAULT '2000-01-01', x UInt64, n Nested(a String, b String)) ENGINE = MergeTree(d, x, 1); +INSERT INTO mass_table_312 SELECT * FROM generateRandom('`d` Date,`x` UInt64,`n.a` Array(String),`n.b` Array(String)', 1, 10, 2) LIMIT 100; + +SELECT count(), sum(cityHash64(*)) FROM mass_table_312; +SELECT count(), sum(cityHash64(*)) FROM mass_table_312 ARRAY JOIN n; + +DROP TABLE mass_table_312; From 0cf882f4dfb09790e09635601b210c8e5814d0d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 02:20:49 +0300 Subject: [PATCH 335/752] Update test --- .../01087_table_function_generate.reference | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/01087_table_function_generate.reference b/tests/queries/0_stateless/01087_table_function_generate.reference index 68238faee48..d7cc6b0a933 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.reference +++ b/tests/queries/0_stateless/01087_table_function_generate.reference @@ -1,14 +1,14 @@ UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8 -2804162938822577320 -2776833771540858 3467776823 1163715250 23903 13655 137 -41 -7885388429666205427 -1363628932535403038 484159052 -308788249 56810 -22227 51 -41 -4357435422797280898 1355609803008819271 4126129912 -852056475 64304 -11401 139 86 -5935810273536892891 -804738887697332962 3109335413 -80126721 258 12889 18 88 -368066018677693974 -4927165984347126295 1015254922 2026080544 44305 21973 16 0 -8124171311239967992 -1179703908046100129 1720727300 -138469036 61343 10573 252 -32 -15657812979985370729 -5733276247123822513 3254757884 -500590428 45913 19153 105 -102 -18371568619324220532 -6793779541583578394 1686821450 -455892108 49050 -28603 248 80 -821735343441964030 3148260644406230976 256251035 -885069056 58858 -29361 58 61 -9558594037060121162 -2907172753635797124 4276198376 1947296644 26801 -13531 204 -66 +2804162938822577320 -2776833771540858 3467776823 1163715250 31161 -2916 220 -117 +7885388429666205427 -1363628932535403038 484159052 -308788249 43346 13638 143 -105 +4357435422797280898 1355609803008819271 4126129912 -852056475 34184 9166 49 33 +5935810273536892891 -804738887697332962 3109335413 -80126721 47877 -31421 186 -77 +368066018677693974 -4927165984347126295 1015254922 2026080544 46037 -29626 240 108 +8124171311239967992 -1179703908046100129 1720727300 -138469036 33028 -12819 138 16 +15657812979985370729 -5733276247123822513 3254757884 -500590428 3829 30527 3 -81 +18371568619324220532 -6793779541583578394 1686821450 -455892108 43475 2284 252 -90 +821735343441964030 3148260644406230976 256251035 -885069056 11643 11455 176 90 +9558594037060121162 -2907172753635797124 4276198376 1947296644 45922 26632 97 43 - Enum8(\'hello\' = 1, \'world\' = 5) hello @@ -47,16 +47,16 @@ h o - Date DateTime DateTime(\'Europe/Moscow\') -2106-02-07 2050-12-17 02:46:35 2096-02-16 22:18:22 -2106-02-07 2013-10-17 23:35:26 1976-01-24 12:52:48 -2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50 -1997-04-11 1972-09-18 23:44:08 2040-07-10 14:46:42 -2103-11-03 2044-11-23 20:57:12 1970-10-09 02:30:14 -2066-11-19 2029-12-10 03:13:55 2106-01-30 21:52:44 -2064-08-14 2016-07-14 11:33:45 2096-12-12 00:40:50 -2046-09-13 2085-07-10 18:51:14 2096-01-15 16:31:33 -2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39 -2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22 +2077-09-17 1970-10-09 02:30:14 2074-08-12 11:31:27 +2005-11-19 2106-01-30 21:52:44 2097-05-25 07:54:35 +2007-02-24 2096-12-12 00:40:50 1988-08-10 11:16:31 +2019-06-30 2096-01-15 16:31:33 2063-10-20 08:48:17 +2039-01-16 2103-02-11 16:44:39 2036-10-09 04:29:10 +1994-11-03 1980-01-02 05:18:22 2055-12-23 12:33:52 +2083-08-20 2079-06-11 16:29:02 2000-12-05 17:46:24 +2030-06-25 2100-03-01 18:50:22 1993-03-25 01:19:12 +2087-03-16 2034-08-25 19:46:33 2045-12-10 16:47:40 +2006-04-30 2069-09-30 16:07:48 2084-08-26 03:33:12 - DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\') 1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215 @@ -225,14 +225,14 @@ RL,{Xs\\tw [114] -84125.1554 ('2023-06-06 06:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea') [124] -114719.5228 ('2010-11-11 22:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7') - -[] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1984-12-06','2028-08-17 06:05:01','2036-04-02 23:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d') F743 -[-102,-118] 392272782 Eb -14818.0200 o -2.664492247169164e59 ('2082-12-26','2052-09-09 06:50:50','2088-04-21 05:07:08.245','aeb9c26e-0ee7-2b8e-802b-2a96319b8e60') CBF4 -[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('2106-02-07','2090-07-31 16:45:26','2076-07-10 09:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304') EB1D -[-28,100] 3675466147 { -146685.1749 h 3.6676044396877755e142 ('2017-10-25','2100-02-28 18:07:18','2055-10-14 06:36:20.056','14949dae-dfa8-a124-af83-887348b2f609') 6D88 -[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2106-02-07','2074-08-10 06:25:12','1976-12-04 18:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf') AD43 -[11,-36] 3308237300 \N 171205.1896 \N 5.634708707075817e195 ('1974-10-31','1993-12-24 09:38:45','2038-07-15 05:22:51.805','63d999b8-8cca-e237-c4a4-4dd7d0096f65') 609E -[39] 1614362420 `4A8P 157144.0630 o -1.1843143253872814e-255 ('2106-02-07','2072-09-28 18:27:27','2073-07-10 12:19:58.146','6483f5c0-8733-364c-4fa0-9948d32e8903') A886 -[48,-120] 3848918261 1 Date: Sun, 5 Apr 2020 00:07:00 +0300 Subject: [PATCH 336/752] Add log_queries_min_type to filter which entries will be written to query_log Can be used to write into query_log only failed queries (i.e. on memory exceeded error), by using: set log_queries_min_type='EXCEPTION_WHILE_PROCESSING' --- docs/en/operations/settings/settings.md | 18 +++++++++++++++++ src/Core/Settings.h | 2 +- src/Core/SettingsCollection.cpp | 7 +++++++ src/Core/SettingsCollection.h | 10 ++++++++++ src/Interpreters/QueryLog.h | 9 ++------- src/Interpreters/executeQuery.cpp | 20 +++++++++---------- .../01231_log_queries_min_type.reference | 5 +++++ .../01231_log_queries_min_type.sql | 15 ++++++++++++++ 8 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/01231_log_queries_min_type.reference create mode 100644 tests/queries/0_stateless/01231_log_queries_min_type.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 69c444ebaef..37b4c713f91 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -507,6 +507,24 @@ Example: log_queries=1 ``` +## log\_queries\_min\_type {#settings-log-queries-min-type} + +`query_log` minimal type to log. + +Possible values: +- `QUERY_START` (`=1`) +- `QUERY_FINISH` (`=2`) +- `EXCEPTION_BEFORE_START` (`=3`) +- `EXCEPTION_WHILE_PROCESSING` (`=4`) + +Default value: `QUERY_START`. + +Can be used to limit which entiries will goes to `query_log`, say you are interesting only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`: + +``` text +log_queries_min_type='EXCEPTION_WHILE_PROCESSING' +``` + ## log\_query\_threads {#settings-log-query-threads} Setting up query threads logging. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8138af31d5f..725171d4a1b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -149,7 +149,7 @@ struct Settings : public SettingsCollection M(SettingInt64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \ \ M(SettingBool, log_queries, 0, "Log requests and write the log to the system table.", 0) \ - \ + M(SettingLogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "query_log minimal type to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \ M(SettingUInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \ \ M(SettingDistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \ diff --git a/src/Core/SettingsCollection.cpp b/src/Core/SettingsCollection.cpp index 6d879b27181..238ac1c3c62 100644 --- a/src/Core/SettingsCollection.cpp +++ b/src/Core/SettingsCollection.cpp @@ -542,6 +542,13 @@ IMPLEMENT_SETTING_ENUM(FormatSettings::DateTimeInputFormat, DATE_TIME_INPUT_FORM M(trace, "trace") IMPLEMENT_SETTING_ENUM(LogsLevel, LOGS_LEVEL_LIST_OF_NAMES, ErrorCodes::BAD_ARGUMENTS) +#define LOG_QUERIES_TYPE_LIST_OF_NAMES(M) \ + M(QUERY_START, "QUERY_START") \ + M(QUERY_FINISH, "QUERY_FINISH") \ + M(EXCEPTION_BEFORE_START, "EXCEPTION_BEFORE_START") \ + M(EXCEPTION_WHILE_PROCESSING, "EXCEPTION_WHILE_PROCESSING") +IMPLEMENT_SETTING_ENUM(QueryLogElementType, LOG_QUERIES_TYPE_LIST_OF_NAMES, ErrorCodes::BAD_ARGUMENTS) + namespace details { diff --git a/src/Core/SettingsCollection.h b/src/Core/SettingsCollection.h index da21412b7c1..d93772e86ed 100644 --- a/src/Core/SettingsCollection.h +++ b/src/Core/SettingsCollection.h @@ -298,6 +298,16 @@ enum class LogsLevel }; using SettingLogsLevel = SettingEnum; +// Make it signed for compatibility with DataTypeEnum8 +enum QueryLogElementType : int8_t +{ + QUERY_START = 1, + QUERY_FINISH = 2, + EXCEPTION_BEFORE_START = 3, + EXCEPTION_WHILE_PROCESSING = 4, +}; +using SettingLogQueriesType = SettingEnum; + enum class SettingsBinaryFormat { diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 836b37095e9..ec14f5e97fb 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace ProfileEvents @@ -22,13 +23,7 @@ namespace DB /// A struct which will be inserted as row into query_log table struct QueryLogElement { - enum Type : int8_t // Make it signed for compatibility with DataTypeEnum8 - { - QUERY_START = 1, - QUERY_FINISH = 2, - EXCEPTION_BEFORE_START = 3, - EXCEPTION_WHILE_PROCESSING = 4, - }; + using Type = QueryLogElementType; Type type = QUERY_START; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c9c66832f08..68bebb83619 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -157,7 +157,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c /// Log the start of query execution into the table if necessary. QueryLogElement elem; - elem.type = QueryLogElement::EXCEPTION_BEFORE_START; + elem.type = QueryLogElementType::EXCEPTION_BEFORE_START; elem.event_time = current_time; elem.query_start_time = current_time; @@ -175,7 +175,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c /// Update performance counters before logging to query_log CurrentThread::finalizePerformanceCounters(); - if (settings.log_queries) + if (settings.log_queries && elem.type >= settings.log_queries_min_type) if (auto query_log = context.getQueryLog()) query_log->add(elem); } @@ -400,7 +400,7 @@ static std::tuple executeQueryImpl( { QueryLogElement elem; - elem.type = QueryLogElement::QUERY_START; + elem.type = QueryLogElementType::QUERY_START; elem.event_time = current_time; elem.query_start_time = current_time; @@ -412,7 +412,7 @@ static std::tuple executeQueryImpl( bool log_queries = settings.log_queries && !internal; /// Log into system table start of query execution, if need. - if (log_queries) + if (log_queries && elem.type >= settings.log_queries_min_type) { if (settings.log_query_settings) elem.query_settings = std::make_shared(context.getSettingsRef()); @@ -422,7 +422,7 @@ static std::tuple executeQueryImpl( } /// Also make possible for caller to log successful query finish and exception during execution. - auto finish_callback = [elem, &context, log_queries] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable + auto finish_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type] (IBlockInputStream * stream_in, IBlockOutputStream * stream_out) mutable { QueryStatus * process_list_elem = context.getProcessListElement(); @@ -436,7 +436,7 @@ static std::tuple executeQueryImpl( double elapsed_seconds = info.elapsed_seconds; - elem.type = QueryLogElement::QUERY_FINISH; + elem.type = QueryLogElementType::QUERY_FINISH; elem.event_time = time(nullptr); elem.query_duration_ms = elapsed_seconds * 1000; @@ -484,19 +484,19 @@ static std::tuple executeQueryImpl( elem.thread_ids = std::move(info.thread_ids); elem.profile_counters = std::move(info.profile_counters); - if (log_queries) + if (log_queries && elem.type >= log_queries_min_type) { if (auto query_log = context.getQueryLog()) query_log->add(elem); } }; - auto exception_callback = [elem, &context, log_queries, quota(quota)] () mutable + auto exception_callback = [elem, &context, log_queries, log_queries_min_type = settings.log_queries_min_type, quota(quota)] () mutable { if (quota) quota->used(Quota::ERRORS, 1, /* check_exceeded = */ false); - elem.type = QueryLogElement::EXCEPTION_WHILE_PROCESSING; + elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; elem.event_time = time(nullptr); elem.query_duration_ms = 1000 * (elem.event_time - elem.query_start_time); @@ -529,7 +529,7 @@ static std::tuple executeQueryImpl( logException(context, elem); /// In case of exception we log internal queries also - if (log_queries) + if (log_queries && elem.type >= log_queries_min_type) { if (auto query_log = context.getQueryLog()) query_log->add(elem); diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.reference b/tests/queries/0_stateless/01231_log_queries_min_type.reference new file mode 100644 index 00000000000..a358d022033 --- /dev/null +++ b/tests/queries/0_stateless/01231_log_queries_min_type.reference @@ -0,0 +1,5 @@ +01231_log_queries_min_type/QUERY_START +2 +01231_log_queries_min_type/EXCEPTION_BEFORE_START +2 +3 diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql new file mode 100644 index 00000000000..f2229c94a8a --- /dev/null +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -0,0 +1,15 @@ +set log_queries=1; + +select '01231_log_queries_min_type/QUERY_START'; +system flush logs; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; + +set log_queries_min_type='EXCEPTION_BEFORE_START'; +select '01231_log_queries_min_type/EXCEPTION_BEFORE_START'; +system flush logs; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; + +set log_queries_min_type='EXCEPTION_WHILE_PROCESSING'; +select '01231_log_queries_min_type/', max(number) from system.numbers limit 1e6 settings max_rows_to_read='100K'; -- { serverError 158; } +system flush logs; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; From c7eaaaf7fe1a340eeb20f5edfdd0f6d24aa40157 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 13 Apr 2020 04:33:05 +0300 Subject: [PATCH 337/752] Small refactoring of SystemLogs --- src/Interpreters/MetricLog.cpp | 7 +++ src/Interpreters/MetricLog.h | 2 + src/Interpreters/SystemLog.cpp | 30 +++++------ src/Interpreters/SystemLog.h | 96 +++++++++++++++++++++------------- 4 files changed, 84 insertions(+), 51 deletions(-) diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 5622e0c65b0..bd898170705 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -70,6 +70,13 @@ void MetricLog::stopCollectMetric() } +void MetricLog::shutdown() +{ + stopCollectMetric(); + stopFlushThread(); +} + + inline UInt64 time_in_milliseconds(std::chrono::time_point timepoint) { return std::chrono::duration_cast(timepoint.time_since_epoch()).count(); diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index c55bad2c12f..a90ce923494 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -34,6 +34,8 @@ class MetricLog : public SystemLog using SystemLog::SystemLog; public: + void shutdown() override; + /// Launches a background thread to collect metrics with interval void startCollectMetric(size_t collect_interval_milliseconds_); diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index a78342f8b17..fc0f2f98125 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -83,6 +83,19 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi size_t collect_interval_milliseconds = config.getUInt64("metric_log.collect_interval_milliseconds"); metric_log->startCollectMetric(collect_interval_milliseconds); } + + if (query_log) + logs.emplace_back(query_log.get()); + if (query_thread_log) + logs.emplace_back(query_thread_log.get()); + if (part_log) + logs.emplace_back(part_log.get()); + if (trace_log) + logs.emplace_back(trace_log.get()); + if (text_log) + logs.emplace_back(text_log.get()); + if (metric_log) + logs.emplace_back(metric_log.get()); } @@ -93,21 +106,8 @@ SystemLogs::~SystemLogs() void SystemLogs::shutdown() { - if (query_log) - query_log->shutdown(); - if (query_thread_log) - query_thread_log->shutdown(); - if (part_log) - part_log->shutdown(); - if (trace_log) - trace_log->shutdown(); - if (text_log) - text_log->shutdown(); - if (metric_log) - { - metric_log->stopCollectMetric(); - metric_log->shutdown(); - } + for (auto & log : logs) + log->shutdown(); } } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 87da342ae1f..7c8dc1606f7 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -2,6 +2,9 @@ #include #include +#include +#include + #include #include #include @@ -59,13 +62,20 @@ namespace ErrorCodes #define DBMS_SYSTEM_LOG_QUEUE_SIZE 1048576 + class Context; -class QueryLog; -class QueryThreadLog; -class PartLog; -class TextLog; -class TraceLog; -class MetricLog; + + +class ISystemLog +{ +public: + virtual String getName() = 0; + virtual ASTPtr getCreateTableQuery() = 0; + virtual void flush() = 0; + virtual void shutdown() = 0; + virtual ~ISystemLog() = default; +}; + /// System logs should be destroyed in destructor of the last Context and before tables, /// because SystemLog destruction makes insert query while flushing data into underlying tables @@ -82,11 +92,13 @@ struct SystemLogs std::shared_ptr trace_log; /// Used to log traces from query profiler std::shared_ptr text_log; /// Used to log all text messages. std::shared_ptr metric_log; /// Used to log all metrics. + + std::vector logs; }; template -class SystemLog : private boost::noncopyable +class SystemLog : public ISystemLog, private boost::noncopyable { public: using Self = SystemLog; @@ -106,18 +118,28 @@ public: const String & storage_def_, size_t flush_interval_milliseconds_); - ~SystemLog(); - /** Append a record into log. * Writing to table will be done asynchronously and in case of failure, record could be lost. */ void add(const LogElement & element); + void stopFlushThread(); + /// Flush data in the buffer to disk - void flush(); + void flush() override; /// Stop the background flush thread before destructor. No more data will be written. - void shutdown(); + void shutdown() override + { + stopFlushThread(); + } + + String getName() override + { + return LogElement::name(); + } + + ASTPtr getCreateTableQuery() override; protected: Logger * log; @@ -250,7 +272,7 @@ void SystemLog::flush() template -void SystemLog::shutdown() +void SystemLog::stopFlushThread() { { std::unique_lock lock(mutex); @@ -270,13 +292,6 @@ void SystemLog::shutdown() } -template -SystemLog::~SystemLog() -{ - shutdown(); -} - - template void SystemLog::savingThreadFunction() { @@ -399,7 +414,7 @@ void SystemLog::prepareTable() rename->elements.emplace_back(elem); LOG_DEBUG(log, "Existing table " << description << " for system log has obsolete or different structure." - " Renaming it to " << backQuoteIfNeed(to.table)); + " Renaming it to " << backQuoteIfNeed(to.table)); InterpreterRenameQuery(rename, context).execute(); @@ -415,22 +430,7 @@ void SystemLog::prepareTable() /// Create the table. LOG_DEBUG(log, "Creating new table " << description << " for " + LogElement::name()); - auto create = std::make_shared(); - - create->database = table_id.database_name; - create->table = table_id.table_name; - - Block sample = LogElement::createBlock(); - - auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList())); - create->set(create->columns_list, new_columns_list); - - ParserStorage storage_parser; - ASTPtr storage_ast = parseQuery( - storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), - "Storage to create table for " + LogElement::name(), 0); - create->set(create->storage, storage_ast); + auto create = getCreateTableQuery(); InterpreterCreateQuery interpreter(create, context); interpreter.setInternal(true); @@ -442,4 +442,28 @@ void SystemLog::prepareTable() is_prepared = true; } + +template +ASTPtr SystemLog::getCreateTableQuery() +{ + auto create = std::make_shared(); + + create->database = table_id.database_name; + create->table = table_id.table_name; + + Block sample = LogElement::createBlock(); + + auto new_columns_list = std::make_shared(); + new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList())); + create->set(create->columns_list, new_columns_list); + + ParserStorage storage_parser; + ASTPtr storage_ast = parseQuery( + storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), + "Storage to create table for " + LogElement::name(), 0); + create->set(create->storage, storage_ast); + + return create; +} + } From df28eca407d069c6730d13a6d5c3a620f68c5304 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev Date: Mon, 13 Apr 2020 01:03:44 +0300 Subject: [PATCH 338/752] Add test for communication between ClickHouse and Zookeeper over SSL --- tests/integration/helpers/cluster.py | 10 +- .../helpers/zookeeper-ssl-entrypoint.sh | 95 +++++++++++++++++++ .../configs/zookeeper_config_with_ssl.xml | 20 ++++ .../configs_secure/client.crt | 19 ++++ .../configs_secure/client.key | 28 ++++++ .../configs_secure/conf.d/remote_servers.xml | 17 ++++ .../configs_secure/conf.d/ssl_conf.xml | 16 ++++ .../integration/test_zookeeper_config/test.py | 54 ++++++++++- 8 files changed, 255 insertions(+), 4 deletions(-) create mode 100755 tests/integration/helpers/zookeeper-ssl-entrypoint.sh create mode 100644 tests/integration/test_zookeeper_config/configs/zookeeper_config_with_ssl.xml create mode 100644 tests/integration/test_zookeeper_config/configs_secure/client.crt create mode 100644 tests/integration/test_zookeeper_config/configs_secure/client.key create mode 100644 tests/integration/test_zookeeper_config/configs_secure/conf.d/remote_servers.xml create mode 100644 tests/integration/test_zookeeper_config/configs_secure/conf.d/ssl_conf.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 717fab11449..b5cae86dc2d 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -144,7 +144,8 @@ class ClickHouseCluster: with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", - stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None): + stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None, + zookeeper_docker_compose_path=None): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -179,10 +180,13 @@ class ClickHouseCluster: cmds = [] if with_zookeeper and not self.with_zookeeper: + if not zookeeper_docker_compose_path: + zookeeper_docker_compose_path = p.join(HELPERS_DIR, 'docker_compose_zookeeper.yml') + self.with_zookeeper = True - self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_zookeeper.yml')]) + self.base_cmd.extend(['--file', zookeeper_docker_compose_path]) self.base_zookeeper_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', - self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_zookeeper.yml')] + self.project_name, '--file', zookeeper_docker_compose_path] cmds.append(self.base_zookeeper_cmd) if with_mysql and not self.with_mysql: diff --git a/tests/integration/helpers/zookeeper-ssl-entrypoint.sh b/tests/integration/helpers/zookeeper-ssl-entrypoint.sh new file mode 100755 index 00000000000..3ddb21881d6 --- /dev/null +++ b/tests/integration/helpers/zookeeper-ssl-entrypoint.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +set -e + +export ZOO_SERVER_CNXN_FACTORY=org.apache.zookeeper.server.NettyServerCnxnFactory +export ZOO_SSL_KEYSTORE_LOCATION=/conf/certs/zookeeper.p12 +export ZOO_SSL_KEYSTORE_PASSWORD=password +export ZOO_SSL_TRUSTSTORE_LOCATION=/conf/certs/truststore.p12 +export ZOO_SSL_TRUSTSTORE_PASSWORD=password + + +# Allow the container to be started with `--user` +if [[ "$1" = 'zkServer.sh' && "$(id -u)" = '0' ]]; then + chown -R zookeeper "$ZOO_DATA_DIR" "$ZOO_DATA_LOG_DIR" "$ZOO_LOG_DIR" "$ZOO_CONF_DIR" + exec gosu zookeeper "$0" "$@" +fi + +# Generate the config only if it doesn't exist +if [[ ! -f "$ZOO_CONF_DIR/zoo.cfg" ]]; then + CONFIG="$ZOO_CONF_DIR/zoo.cfg" + { + echo "dataDir=$ZOO_DATA_DIR" + echo "dataLogDir=$ZOO_DATA_LOG_DIR" + + echo "tickTime=$ZOO_TICK_TIME" + echo "initLimit=$ZOO_INIT_LIMIT" + echo "syncLimit=$ZOO_SYNC_LIMIT" + + echo "autopurge.snapRetainCount=$ZOO_AUTOPURGE_SNAPRETAINCOUNT" + echo "autopurge.purgeInterval=$ZOO_AUTOPURGE_PURGEINTERVAL" + echo "maxClientCnxns=$ZOO_MAX_CLIENT_CNXNS" + echo "standaloneEnabled=$ZOO_STANDALONE_ENABLED" + echo "admin.enableServer=$ZOO_ADMINSERVER_ENABLED" + } >> "$CONFIG" + if [[ -z $ZOO_SERVERS ]]; then + ZOO_SERVERS="server.1=localhost:2888:3888;2181" + fi + + for server in $ZOO_SERVERS; do + echo "$server" >> "$CONFIG" + done + + if [[ -n $ZOO_4LW_COMMANDS_WHITELIST ]]; then + echo "4lw.commands.whitelist=$ZOO_4LW_COMMANDS_WHITELIST" >> "$CONFIG" + fi + + + if [[ -n $ZOO_SSL_QUORUM ]]; then + { + echo "sslQuorum=$ZOO_SSL_QUORUM" + echo "serverCnxnFactory=$ZOO_SERVER_CNXN_FACTORY" + echo "ssl.quorum.keyStore.location=$ZOO_SSL_QUORUM_KEYSTORE_LOCATION" + echo "ssl.quorum.keyStore.password=$ZOO_SSL_QUORUM_KEYSTORE_PASSWORD" + echo "ssl.quorum.trustStore.location=$ZOO_SSL_QUORUM_TRUSTSTORE_LOCATION" + echo "ssl.quorum.trustStore.password=$ZOO_SSL_QUORUM_TRUSTSTORE_PASSWORD" + } >> "$CONFIG" + fi + + if [[ -n $ZOO_PORT_UNIFICATION ]]; then + echo "portUnification=$ZOO_PORT_UNIFICATION" >> "$CONFIG" + fi + + if [[ -n $ZOO_SECURE_CLIENT_PORT ]]; then + { + echo "secureClientPort=$ZOO_SECURE_CLIENT_PORT" + echo "serverCnxnFactory=$ZOO_SERVER_CNXN_FACTORY" + echo "ssl.keyStore.location=$ZOO_SSL_KEYSTORE_LOCATION" + echo "ssl.keyStore.password=$ZOO_SSL_KEYSTORE_PASSWORD" + echo "ssl.trustStore.location=$ZOO_SSL_TRUSTSTORE_LOCATION" + echo "ssl.trustStore.password=$ZOO_SSL_TRUSTSTORE_PASSWORD" + } >> "$CONFIG" + fi + + if [[ -n $ZOO_CLIENT_PORT_UNIFICATION ]]; then + echo "client.portUnification=$ZOO_CLIENT_PORT_UNIFICATION" >> "$CONFIG" + fi +fi + +# Write myid only if it doesn't exist +if [[ ! -f "$ZOO_DATA_DIR/myid" ]]; then + echo "${ZOO_MY_ID:-1}" > "$ZOO_DATA_DIR/myid" +fi + +mkdir -p $(dirname $ZOO_SSL_KEYSTORE_LOCATION) +mkdir -p $(dirname $ZOO_SSL_TRUSTSTORE_LOCATION) + +if [[ ! -f "$ZOO_SSL_KEYSTORE_LOCATION" ]]; then + keytool -genkeypair -alias zookeeper -keyalg RSA -validity 365 -keysize 2048 -dname "cn=zookeeper" -keypass password -keystore $ZOO_SSL_KEYSTORE_LOCATION -storepass password -deststoretype pkcs12 +fi + +if [[ ! -f "$ZOO_SSL_TRUSTSTORE_LOCATION" ]]; then + keytool -importcert -alias zookeeper -file /clickhouse-config/client.crt -keystore $ZOO_SSL_TRUSTSTORE_LOCATION -storepass password -noprompt -deststoretype pkcs12 +fi + +exec "$@" diff --git a/tests/integration/test_zookeeper_config/configs/zookeeper_config_with_ssl.xml b/tests/integration/test_zookeeper_config/configs/zookeeper_config_with_ssl.xml new file mode 100644 index 00000000000..fc03b609146 --- /dev/null +++ b/tests/integration/test_zookeeper_config/configs/zookeeper_config_with_ssl.xml @@ -0,0 +1,20 @@ + + + + zoo1 + 2281 + 1 + + + zoo2 + 2281 + 1 + + + zoo3 + 2281 + 1 + + 3000 + + diff --git a/tests/integration/test_zookeeper_config/configs_secure/client.crt b/tests/integration/test_zookeeper_config/configs_secure/client.crt new file mode 100644 index 00000000000..7ade2d96273 --- /dev/null +++ b/tests/integration/test_zookeeper_config/configs_secure/client.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIC/TCCAeWgAwIBAgIJANjx1QSR77HBMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAgFw0xODA3MzAxODE2MDhaGA8yMjkyMDUxNDE4MTYwOFow +FDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +CgKCAQEAs9uSo6lJG8o8pw0fbVGVu0tPOljSWcVSXH9uiJBwlZLQnhN4SFSFohfI +4K8U1tBDTnxPLUo/V1K9yzoLiRDGMkwVj6+4+hE2udS2ePTQv5oaMeJ9wrs+5c9T +4pOtlq3pLAdm04ZMB1nbrEysceVudHRkQbGHzHp6VG29Fw7Ga6YpqyHQihRmEkTU +7UCYNA+Vk7aDPdMS/khweyTpXYZimaK9f0ECU3/VOeG3fH6Sp2X6FN4tUj/aFXEj +sRmU5G2TlYiSIUMF2JPdhSihfk1hJVALrHPTU38SOL+GyyBRWdNcrIwVwbpvsvPg +pryMSNxnpr0AK0dFhjwnupIv5hJIOQIDAQABo1AwTjAdBgNVHQ4EFgQUjPLb3uYC +kcamyZHK4/EV8jAP0wQwHwYDVR0jBBgwFoAUjPLb3uYCkcamyZHK4/EV8jAP0wQw +DAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAM/ocuDvfPus/KpMVD51j +4IdlU8R0vmnYLQ+ygzOAo7+hUWP5j0yvq4ILWNmQX6HNvUggCgFv9bjwDFhb/5Vr +85ieWfTd9+LTjrOzTw4avdGwpX9G+6jJJSSq15tw5ElOIFb/qNA9O4dBiu8vn03C +L/zRSXrARhSqTW5w/tZkUcSTT+M5h28+Lgn9ysx4Ff5vi44LJ1NnrbJbEAIYsAAD ++UA+4MBFKx1r6hHINULev8+lCfkpwIaeS8RL+op4fr6kQPxnULw8wT8gkuc8I4+L +P9gg/xDHB44T3ADGZ5Ib6O0DJaNiToO6rnoaaxs0KkotbvDWvRoxEytSbXKoYjYp +0g== +-----END CERTIFICATE----- diff --git a/tests/integration/test_zookeeper_config/configs_secure/client.key b/tests/integration/test_zookeeper_config/configs_secure/client.key new file mode 100644 index 00000000000..f0fb61ac443 --- /dev/null +++ b/tests/integration/test_zookeeper_config/configs_secure/client.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCz25KjqUkbyjyn +DR9tUZW7S086WNJZxVJcf26IkHCVktCeE3hIVIWiF8jgrxTW0ENOfE8tSj9XUr3L +OguJEMYyTBWPr7j6ETa51LZ49NC/mhox4n3Cuz7lz1Pik62WreksB2bThkwHWdus +TKxx5W50dGRBsYfMenpUbb0XDsZrpimrIdCKFGYSRNTtQJg0D5WTtoM90xL+SHB7 +JOldhmKZor1/QQJTf9U54bd8fpKnZfoU3i1SP9oVcSOxGZTkbZOViJIhQwXYk92F +KKF+TWElUAusc9NTfxI4v4bLIFFZ01ysjBXBum+y8+CmvIxI3GemvQArR0WGPCe6 +ki/mEkg5AgMBAAECggEATrbIBIxwDJOD2/BoUqWkDCY3dGevF8697vFuZKIiQ7PP +TX9j4vPq0DfsmDjHvAPFkTHiTQXzlroFik3LAp+uvhCCVzImmHq0IrwvZ9xtB43f +7Pkc5P6h1l3Ybo8HJ6zRIY3TuLtLxuPSuiOMTQSGRL0zq3SQ5DKuGwkz+kVjHXUN +MR2TECFwMHKQ5VLrC+7PMpsJYyOMlDAWhRfUalxC55xOXTpaN8TxNnwQ8K2ISVY5 +212Jz/a4hn4LdwxSz3Tiu95PN072K87HLWx3EdT6vW4Ge5P/A3y+smIuNAlanMnu +plHBRtpATLiTxZt/n6npyrfQVbYjSH7KWhB8hBHtaQKBgQDh9Cq1c/KtqDtE0Ccr +/r9tZNTUwBE6VP+3OJeKdEdtsfuxjOCkS1oAjgBJiSDOiWPh1DdoDeVZjPKq6pIu +Mq12OE3Doa8znfCXGbkSzEKOb2unKZMJxzrz99kXt40W5DtrqKPNb24CNqTiY8Aa +CjtcX+3weat82VRXvph6U8ltMwKBgQDLxjiQQzNoY7qvg7CwJCjf9qq8jmLK766g +1FHXopqS+dTxDLM8eJSRrpmxGWJvNeNc1uPhsKsKgotqAMdBUQTf7rSTbt4MyoH5 +bUcRLtr+0QTK9hDWMOOvleqNXha68vATkohWYfCueNsC60qD44o8RZAS6UNy3ENq +cM1cxqe84wKBgQDKkHutWnooJtajlTxY27O/nZKT/HA1bDgniMuKaz4R4Gr1PIez +on3YW3V0d0P7BP6PWRIm7bY79vkiMtLEKdiKUGWeyZdo3eHvhDb/3DCawtau8L2K +GZsHVp2//mS1Lfz7Qh8/L/NedqCQ+L4iWiPnZ3THjjwn3CoZ05ucpvrAMwKBgB54 +nay039MUVq44Owub3KDg+dcIU62U+cAC/9oG7qZbxYPmKkc4oL7IJSNecGHA5SbU +2268RFdl/gLz6tfRjbEOuOHzCjFPdvAdbysanpTMHLNc6FefJ+zxtgk9sJh0C4Jh +vxFrw9nTKKzfEl12gQ1SOaEaUIO0fEBGbe8ZpauRAoGAMAlGV+2/K4ebvAJKOVTa +dKAzQ+TD2SJmeR1HZmKDYddNqwtZlzg3v4ZhCk4eaUmGeC1Bdh8MDuB3QQvXz4Dr +vOIP4UVaOr+uM+7TgAgVnP4/K6IeJGzUDhX93pmpWhODfdu/oojEKVcpCojmEmS1 +KCBtmIrQLqzMpnBpLNuSY+Q= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_zookeeper_config/configs_secure/conf.d/remote_servers.xml b/tests/integration/test_zookeeper_config/configs_secure/conf.d/remote_servers.xml new file mode 100644 index 00000000000..01865e33a85 --- /dev/null +++ b/tests/integration/test_zookeeper_config/configs_secure/conf.d/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + + node1 + 9000 + + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_zookeeper_config/configs_secure/conf.d/ssl_conf.xml b/tests/integration/test_zookeeper_config/configs_secure/conf.d/ssl_conf.xml new file mode 100644 index 00000000000..5e6f5f37624 --- /dev/null +++ b/tests/integration/test_zookeeper_config/configs_secure/conf.d/ssl_conf.xml @@ -0,0 +1,16 @@ + + + + /etc/clickhouse-server/client.crt + /etc/clickhouse-server/client.key + true + true + sslv2,sslv3 + true + none + + RejectCertificateHandler + + + + diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py index d9323ae16f3..4be99c8711d 100644 --- a/tests/integration/test_zookeeper_config/test.py +++ b/tests/integration/test_zookeeper_config/test.py @@ -1,7 +1,11 @@ from __future__ import print_function from helpers.cluster import ClickHouseCluster +import helpers import pytest import time +from tempfile import NamedTemporaryFile +from os import path as p, unlink + def test_chroot_with_same_root(): @@ -100,10 +104,58 @@ def test_identity(): with pytest.raises(Exception): cluster_2.start(destroy_dirs=False) node2.query(''' - CREATE TABLE simple (date Date, id UInt32) + CREATE TABLE simple (date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '1', date, id, 8192); ''') finally: cluster_1.shutdown() cluster_2.shutdown() + + +def test_secure_connection(): + # We need absolute path in zookeeper volumes. Generate it dynamically. + TEMPLATE = ''' + zoo{zoo_id}: + image: zookeeper:3.5.6 + restart: always + environment: + ZOO_TICK_TIME: 500 + ZOO_MY_ID: {zoo_id} + ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 + ZOO_SECURE_CLIENT_PORT: 2281 + volumes: + - {helpers_dir}/zookeeper-ssl-entrypoint.sh:/zookeeper-ssl-entrypoint.sh + - {configs_dir}:/clickhouse-config + command: ["zkServer.sh", "start-foreground"] + entrypoint: /zookeeper-ssl-entrypoint.sh + ''' + configs_dir = p.abspath(p.join(p.dirname(__file__), 'configs_secure')) + helpers_dir = p.abspath(p.dirname(helpers.__file__)) + + cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_with_ssl.xml') + + docker_compose = NamedTemporaryFile(delete=False) + + docker_compose.write( + "version: '2.2'\nservices:\n" + + TEMPLATE.format(zoo_id=1, configs_dir=configs_dir, helpers_dir=helpers_dir) + + TEMPLATE.format(zoo_id=2, configs_dir=configs_dir, helpers_dir=helpers_dir) + + TEMPLATE.format(zoo_id=3, configs_dir=configs_dir, helpers_dir=helpers_dir) + ) + docker_compose.close() + + node1 = cluster.add_instance('node1', config_dir='configs_secure', with_zookeeper=True, + zookeeper_docker_compose_path=docker_compose.name) + node2 = cluster.add_instance('node2', config_dir='configs_secure', with_zookeeper=True, + zookeeper_docker_compose_path=docker_compose.name) + + try: + cluster.start() + + assert node1.query("SELECT count() FROM system.zookeeper WHERE path = '/'") == '2\n' + assert node2.query("SELECT count() FROM system.zookeeper WHERE path = '/'") == '2\n' + + finally: + cluster.shutdown() + unlink(docker_compose.name) From f7f020263344e3c5055a630441e0673bcbff3d42 Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2020 08:04:57 +0000 Subject: [PATCH 339/752] Bump nltk from 3.4.5 to 3.5 in /docs/tools Bumps [nltk](https://github.com/nltk/nltk) from 3.4.5 to 3.5. - [Release notes](https://github.com/nltk/nltk/releases) - [Changelog](https://github.com/nltk/nltk/blob/develop/ChangeLog) - [Commits](https://github.com/nltk/nltk/compare/3.4.5...3.5) Signed-off-by: dependabot-preview[bot] --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 0e3e3c24b5f..228229ac30d 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -20,7 +20,7 @@ MarkupSafe==1.1.1 mkdocs==1.1 mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.6 -nltk==3.4.5 +nltk==3.5 nose==1.3.7 protobuf==3.11.3 numpy==1.18.2 From f1fbd60442d6b101338f4c94e9ae7578ab9e1e08 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 13 Apr 2020 11:16:20 +0300 Subject: [PATCH 340/752] Add instana --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index f7006ae15c8..a1494c23066 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -76,5 +76,6 @@ toc_title: Adopters | [ЦВТ](https://htc-cs.ru/) | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | | [МКБ](https://mkb.ru/) | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | | [金数据](https://jinshuju.net) | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| [Instana](https://www.instana.com) | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) From 29fc8f145b898e2f854935e039a70e0a3c0907a6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 11:50:14 +0300 Subject: [PATCH 341/752] Fix tests in debug. --- src/Processors/Sources/SourceFromInputStream.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Sources/SourceFromInputStream.cpp b/src/Processors/Sources/SourceFromInputStream.cpp index e7ca28f72b9..e34fbd359ae 100644 --- a/src/Processors/Sources/SourceFromInputStream.cpp +++ b/src/Processors/Sources/SourceFromInputStream.cpp @@ -176,6 +176,9 @@ Chunk SourceFromInputStream::generate() return {}; } + if (isCancelled()) + return {}; + #ifndef NDEBUG assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream"); #endif From 9a9bedc8ccd7f1ded6c1a237b7452481887651f8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 12:02:50 +0300 Subject: [PATCH 342/752] Fix test for streams. --- .../Executors/TreeExecutorBlockInputStream.cpp | 14 ++++++++++++-- .../Executors/TreeExecutorBlockInputStream.h | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/Processors/Executors/TreeExecutorBlockInputStream.cpp b/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index 84fd97f4781..f797fee3ab5 100644 --- a/src/Processors/Executors/TreeExecutorBlockInputStream.cpp +++ b/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -164,7 +164,7 @@ void TreeExecutorBlockInputStream::execute(bool on_totals, bool on_extremes) } }; - while (!stack.empty()) + while (!stack.empty() && !is_cancelled) { IProcessor * node = stack.top(); @@ -295,7 +295,7 @@ void TreeExecutorBlockInputStream::initRowsBeforeLimit() Block TreeExecutorBlockInputStream::readImpl() { - while (true) + while (!is_cancelled) { if (input_port->isFinished()) { @@ -338,6 +338,8 @@ Block TreeExecutorBlockInputStream::readImpl() execute(false, false); } + + return {}; } void TreeExecutorBlockInputStream::setProgressCallback(const ProgressCallback & callback) @@ -373,4 +375,12 @@ void TreeExecutorBlockInputStream::addTotalRowsApprox(size_t value) sources_with_progress.front()->addTotalRowsApprox(value); } +void TreeExecutorBlockInputStream::cancel(bool kill) +{ + IBlockInputStream::cancel(kill); + + for (auto & processor : processors) + processor->cancel(); +} + } diff --git a/src/Processors/Executors/TreeExecutorBlockInputStream.h b/src/Processors/Executors/TreeExecutorBlockInputStream.h index dfe8e66ed09..d96492b3fb8 100644 --- a/src/Processors/Executors/TreeExecutorBlockInputStream.h +++ b/src/Processors/Executors/TreeExecutorBlockInputStream.h @@ -39,6 +39,8 @@ public: String getName() const override { return "TreeExecutor"; } Block getHeader() const override { return root->getOutputs().front().getHeader(); } + void cancel(bool kill) override; + /// This methods does not affect TreeExecutor as IBlockInputStream itself. /// They just passed to all SourceWithProgress processors. void setProgressCallback(const ProgressCallback & callback) final; From 46c0b65c9542d41c80349c9c33b8bc6ddcad075e Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Mon, 13 Apr 2020 12:20:38 +0300 Subject: [PATCH 343/752] Minor fix in MergeTree over S3 tests. --- tests/integration/test_merge_tree_s3/test.py | 2 +- tests/integration/test_replicated_merge_tree_s3/test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 4beb33604be..50cf532e9a4 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -67,7 +67,7 @@ def create_table(cluster, table_name, additional_settings=None): PARTITION BY dt ORDER BY (dt, id) SETTINGS - storage_policy = 's3', + storage_policy='s3', old_parts_lifetime=0, index_granularity=512 """.format(table_name) diff --git a/tests/integration/test_replicated_merge_tree_s3/test.py b/tests/integration/test_replicated_merge_tree_s3/test.py index 8689e7ccf5d..d6b6015a388 100644 --- a/tests/integration/test_replicated_merge_tree_s3/test.py +++ b/tests/integration/test_replicated_merge_tree_s3/test.py @@ -67,7 +67,7 @@ def create_table(cluster): ) ENGINE=ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/s3', '{instance}') PARTITION BY dt ORDER BY (dt, id) - SETTINGS storage_policy = 's3' + SETTINGS storage_policy='s3' """ for node in cluster.instances.values(): From 91e9a543d49a3e82dc433fdd46d284689ffc23ad Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Mon, 13 Apr 2020 14:20:13 +0300 Subject: [PATCH 344/752] Support new vXX-backported labels (#10231) --- utils/github/__main__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/github/__main__.py b/utils/github/__main__.py index e05d27c03d6..401908298eb 100644 --- a/utils/github/__main__.py +++ b/utils/github/__main__.py @@ -129,6 +129,7 @@ if bad_commits and not args.login: # TODO: check backports. if need_backporting: re_vlabel = re.compile(r'^v\d+\.\d+$') + re_vlabel_backported = re.compile(r'^v\d+\.\d+-backported$') re_vlabel_conflicts = re.compile(r'^v\d+\.\d+-conflicts$') print('\nPull-requests need to be backported:') @@ -146,8 +147,8 @@ if need_backporting: # FIXME: compatibility logic - check for a manually set label, that indicates status 'backported'. # FIXME: O(n²) - no need to iterate all labels for every `stable` for label in github.get_labels(pull_request): - if re_vlabel.match(label['name']): - if f'v{stable[0]}' == label['name']: + if re_vlabel.match(label['name']) or re_vlabel_backported.match(label['name']): + if f'v{stable[0]}' == label['name'] or f'v{stable[0]}-backported' == label['name']: backport_labeled.add(stable[0]) if re_vlabel_conflicts.match(label['name']): if f'v{stable[0]}-conflicts' == label['name']: From 03dad5ccf5258b9e2fe5f0bb58da6ace17e97617 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 14:45:59 +0300 Subject: [PATCH 345/752] Added AggregatingSortedAlgorithm. --- ...orm.cpp => AggregatingSortedAlgorithm.cpp} | 112 +++-------- .../Merges/AggregatingSortedAlgorithm.h | 176 ++++++++++++++++++ .../Merges/AggregatingSortedTransform.h | 164 +--------------- src/Processors/Merges/IMergingAlgorithm.h | 3 +- .../IMergingAlgorithmWithDelayedChunk.cpp | 50 +++++ .../IMergingAlgorithmWithDelayedChunk.h | 41 ++++ 6 files changed, 307 insertions(+), 239 deletions(-) rename src/Processors/Merges/{AggregatingSortedTransform.cpp => AggregatingSortedAlgorithm.cpp} (68%) create mode 100644 src/Processors/Merges/AggregatingSortedAlgorithm.h create mode 100644 src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp create mode 100644 src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h diff --git a/src/Processors/Merges/AggregatingSortedTransform.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp similarity index 68% rename from src/Processors/Merges/AggregatingSortedTransform.cpp rename to src/Processors/Merges/AggregatingSortedAlgorithm.cpp index fe86d01b390..a185e7cee67 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -8,17 +8,12 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace { - AggregatingSortedTransform::ColumnsDefinition defineColumns( + AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( const Block & header, const SortDescription & description) { - AggregatingSortedTransform::ColumnsDefinition def = {}; + AggregatingSortedAlgorithm::ColumnsDefinition def = {}; size_t num_columns = header.columns(); /// Fill in the column numbers that need to be aggregated. @@ -53,7 +48,7 @@ namespace type = nullptr; // simple aggregate function - AggregatingSortedTransform::SimpleAggregateDescription desc(simple_aggr->getFunction(), i, type); + AggregatingSortedAlgorithm::SimpleAggregateDescription desc(simple_aggr->getFunction(), i, type); if (desc.function->allocatesMemoryInArena()) def.allocates_memory_in_arena = true; @@ -69,7 +64,7 @@ namespace return def; } - MutableColumns getMergedColumns(const Block & header, const AggregatingSortedTransform::ColumnsDefinition & def) + MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def) { MutableColumns columns; columns.resize(header.columns()); @@ -88,34 +83,17 @@ namespace } } -AggregatingSortedTransform::AggregatingSortedTransform( - const Block & header, size_t num_inputs, +AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( + const Block & header_, size_t num_inputs, SortDescription description_, size_t max_block_size) - : IMergingTransform(num_inputs, header, header, true) + : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) + , header(header_) , columns_definition(defineColumns(header, description_)) - , merged_data(getMergedColumns(header, columns_definition), false, max_block_size) - , description(std::move(description_)) - , source_chunks(num_inputs) - , cursors(num_inputs) + , merged_data(getMergedColumns(header, columns_definition), max_block_size, columns_definition) { - merged_data.initAggregateDescription(columns_definition); } -void AggregatingSortedTransform::initializeInputs() -{ - queue = SortingHeap(cursors); - is_queue_initialized = true; -} - -void AggregatingSortedTransform::consume(Chunk chunk, size_t input_number) -{ - updateCursor(std::move(chunk), input_number); - - if (is_queue_initialized) - queue.push(cursors[input_number]); -} - -void AggregatingSortedTransform::updateCursor(Chunk chunk, size_t source_num) +void AggregatingSortedAlgorithm::prepareChunk(Chunk & chunk) const { auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); @@ -128,56 +106,24 @@ void AggregatingSortedTransform::updateCursor(Chunk chunk, size_t source_num) columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); chunk.setColumns(std::move(columns), num_rows); - - auto & source_chunk = source_chunks[source_num]; - - if (source_chunk) - { - /// Extend lifetime of last chunk. - last_chunk = std::move(source_chunk); - last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); - - source_chunk = std::move(chunk); - cursors[source_num].reset(source_chunk.getColumns(), {}); - } - else - { - if (cursors[source_num].has_collation) - throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); - - source_chunk = std::move(chunk); - cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); - } } -void AggregatingSortedTransform::work() +void AggregatingSortedAlgorithm::initialize(Chunks chunks) { - merge(); - prepareOutputChunk(merged_data); + for (auto & chunk : chunks) + if (chunk) + prepareChunk(chunk); - if (has_output_chunk) - { - size_t num_rows = output_chunk.getNumRows(); - auto columns = output_chunk.detachColumns(); - auto & header = getOutputs().back().getHeader(); - - for (auto & desc : columns_definition.columns_to_simple_aggregate) - { - if (desc.inner_type) - { - auto & from_type = desc.inner_type; - auto & to_type = header.getByPosition(desc.column_number).type; - columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type); - } - } - - output_chunk.setColumns(std::move(columns), num_rows); - - merged_data.initAggregateDescription(columns_definition); - } + initializeQueue(std::move(chunks)); } -void AggregatingSortedTransform::merge() +void AggregatingSortedAlgorithm::consume(Chunk chunk, size_t source_num) +{ + prepareChunk(chunk); + updateCursor(std::move(chunk), source_num); +} + +IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() { /// We take the rows in the correct order and put them in `merged_block`, while the rows are no more than `max_block_size` while (queue.isValid()) @@ -213,7 +159,7 @@ void AggregatingSortedTransform::merge() if (merged_data.hasEnoughRows()) { last_key.reset(); - return; + Status(merged_data.pull(columns_definition, header)); } /// We will write the data for the group. We copy the values of ordinary columns. @@ -242,8 +188,7 @@ void AggregatingSortedTransform::merge() { /// We get the next block from the corresponding source, if there is one. queue.removeTop(); - requestDataForInput(current.impl->order); - return; + return Status(current.impl->order); } } @@ -255,10 +200,10 @@ void AggregatingSortedTransform::merge() } last_chunk_sort_columns.clear(); - is_finished = true; + return Status(merged_data.pull(columns_definition, header), true); } -void AggregatingSortedTransform::addRow(SortCursor & cursor) +void AggregatingSortedAlgorithm::addRow(SortCursor & cursor) { for (auto & desc : columns_definition.columns_to_aggregate) desc.column->insertMergeFrom(*cursor->all_columns[desc.column_number], cursor->pos); @@ -270,7 +215,7 @@ void AggregatingSortedTransform::addRow(SortCursor & cursor) } } -void AggregatingSortedTransform::insertSimpleAggregationResult() +void AggregatingSortedAlgorithm::insertSimpleAggregationResult() { for (auto & desc : columns_definition.columns_to_simple_aggregate) { @@ -279,4 +224,5 @@ void AggregatingSortedTransform::insertSimpleAggregationResult() } } + } diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h new file mode 100644 index 00000000000..95a12c49618 --- /dev/null +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -0,0 +1,176 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class ColumnAggregateFunction; + +class AggregatingSortedAlgorithm final : public IMergingAlgorithmWithDelayedChunk +{ +public: + AggregatingSortedAlgorithm( + const Block & header, size_t num_inputs, + SortDescription description_, size_t max_block_size); + + void initialize(Chunks chunks) override; + void consume(Chunk chunk, size_t source_num) override; + Status merge() override; + + struct SimpleAggregateDescription; + + struct ColumnsDefinition + { + struct AggregateDescription + { + ColumnAggregateFunction * column = nullptr; + const size_t column_number = 0; + + AggregateDescription() = default; + explicit AggregateDescription(size_t col_number) : column_number(col_number) {} + }; + + /// Columns with which numbers should not be aggregated. + ColumnNumbers column_numbers_not_to_aggregate; + std::vector columns_to_aggregate; + std::vector columns_to_simple_aggregate; + + /// Does SimpleAggregateFunction allocates memory in arena? + bool allocates_memory_in_arena = false; + }; + +private: + /// Specialization for AggregatingSortedAlgorithm. + struct AggregatingMergedData : public MergedData + { + public: + AggregatingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def) + : MergedData(std::move(columns_), false, max_block_size_) + { + initAggregateDescription(def); + } + + void initializeRow(const ColumnRawPtrs & raw_columns, size_t row, const ColumnNumbers & column_numbers) + { + for (auto column_number : column_numbers) + columns[column_number]->insertFrom(*raw_columns[column_number], row); + + is_group_started = true; + } + + bool isGroupStarted() const { return is_group_started; } + + void insertRow() + { + is_group_started = false; + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; + } + + Chunk pull(ColumnsDefinition & def, const Block & header_) + { + auto chunk = pull(); + + size_t num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + for (auto & desc : def.columns_to_simple_aggregate) + { + if (desc.inner_type) + { + auto & from_type = desc.inner_type; + auto & to_type = header_.getByPosition(desc.column_number).type; + columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type); + } + } + + chunk.setColumns(std::move(columns), num_rows); + initAggregateDescription(def); + } + + private: + bool is_group_started = false; + + /// Initialize aggregate descriptions with columns. + void initAggregateDescription(ColumnsDefinition & def) + { + for (auto & desc : def.columns_to_simple_aggregate) + desc.column = columns[desc.column_number].get(); + + for (auto & desc : def.columns_to_aggregate) + desc.column = typeid_cast(columns[desc.column_number].get()); + } + + using MergedData::pull; + }; + + Block header; + + ColumnsDefinition columns_definition; + AggregatingMergedData merged_data; + + /// Memory pool for SimpleAggregateFunction + /// (only when allocates_memory_in_arena == true). + std::unique_ptr arena; + + void prepareChunk(Chunk & chunk) const; + void addRow(SortCursor & cursor); + void insertSimpleAggregationResult(); + +public: + /// Stores information for aggregation of SimpleAggregateFunction columns + struct SimpleAggregateDescription + { + /// An aggregate function 'anyLast', 'sum'... + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + + size_t column_number = 0; + IColumn * column = nullptr; + const DataTypePtr inner_type; + + AlignedBuffer state; + bool created = false; + + SimpleAggregateDescription(AggregateFunctionPtr function_, const size_t column_number_, DataTypePtr type) + : function(std::move(function_)), column_number(column_number_), inner_type(std::move(type)) + { + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); + } + + void createState() + { + if (created) + return; + function->create(state.data()); + created = true; + } + + void destroyState() + { + if (!created) + return; + function->destroy(state.data()); + created = false; + } + + /// Explicitly destroy aggregation state if the stream is terminated + ~SimpleAggregateDescription() + { + destroyState(); + } + + SimpleAggregateDescription() = default; + SimpleAggregateDescription(SimpleAggregateDescription &&) = default; + SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; + }; +}; + +} diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index 279ac9c7597..89e22173a13 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -1,15 +1,7 @@ #pragma once #include -#include -#include - -#include -#include -#include -#include -#include -#include +#include namespace DB { @@ -22,158 +14,22 @@ class ColumnAggregateFunction; * corresponding to a one value of the primary key. For columns that are not part of the primary key and which do not have the AggregateFunction type, * when merged, the first value is selected. */ -class AggregatingSortedTransform : public IMergingTransform +class AggregatingSortedTransform : public IMergingTransform2 { public: AggregatingSortedTransform( const Block & header, size_t num_inputs, - SortDescription description_, size_t max_block_size); - - struct SimpleAggregateDescription; - - struct ColumnsDefinition + SortDescription description_, size_t max_block_size) + : IMergingTransform2( + num_inputs, header, header, true, + header, + num_inputs, + std::move(description_), + max_block_size) { - struct AggregateDescription - { - ColumnAggregateFunction * column = nullptr; - const size_t column_number = 0; - - AggregateDescription() = default; - explicit AggregateDescription(size_t col_number) : column_number(col_number) {} - }; - - /// Columns with which numbers should not be aggregated. - ColumnNumbers column_numbers_not_to_aggregate; - std::vector columns_to_aggregate; - std::vector columns_to_simple_aggregate; - - /// Does SimpleAggregateFunction allocates memory in arena? - bool allocates_memory_in_arena = false; - }; + } String getName() const override { return "AggregatingSortedTransform"; } - void work() override; - -protected: - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; - -private: - - /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. - struct AggregatingMergedData : public MergedData - { - public: - using MergedData::MergedData; - - void initializeRow(const ColumnRawPtrs & raw_columns, size_t row, const ColumnNumbers & column_numbers) - { - for (auto column_number : column_numbers) - columns[column_number]->insertFrom(*raw_columns[column_number], row); - - is_group_started = true; - } - - bool isGroupStarted() const { return is_group_started; } - - void insertRow() - { - is_group_started = false; - ++total_merged_rows; - ++merged_rows; - /// TODO: sum_blocks_granularity += block_size; - } - - /// Initialize aggregate descriptions with columns. - void initAggregateDescription(ColumnsDefinition & def) - { - for (auto & desc : def.columns_to_simple_aggregate) - desc.column = columns[desc.column_number].get(); - - for (auto & desc : def.columns_to_aggregate) - desc.column = typeid_cast(columns[desc.column_number].get()); - } - private: - bool is_group_started = false; - }; - - ColumnsDefinition columns_definition; - AggregatingMergedData merged_data; - - SortDescription description; - - /// Chunks currently being merged. - std::vector source_chunks; - SortCursorImpls cursors; - - /// In merging algorithm, we need to compare current sort key with the last one. - /// So, sorting columns for last row needed to be stored. - /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). - Chunk last_chunk; - ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. - - detail::RowRef last_key; - - SortingHeap queue; - bool is_queue_initialized = false; - - /// Memory pool for SimpleAggregateFunction - /// (only when allocates_memory_in_arena == true). - std::unique_ptr arena; - - void merge(); - void updateCursor(Chunk chunk, size_t source_num); - void addRow(SortCursor & cursor); - void insertSimpleAggregationResult(); - -public: - /// Stores information for aggregation of SimpleAggregateFunction columns - struct SimpleAggregateDescription - { - /// An aggregate function 'anyLast', 'sum'... - AggregateFunctionPtr function; - IAggregateFunction::AddFunc add_function = nullptr; - - size_t column_number = 0; - IColumn * column = nullptr; - const DataTypePtr inner_type; - - AlignedBuffer state; - bool created = false; - - SimpleAggregateDescription(AggregateFunctionPtr function_, const size_t column_number_, DataTypePtr type) - : function(std::move(function_)), column_number(column_number_), inner_type(std::move(type)) - { - add_function = function->getAddressOfAddFunction(); - state.reset(function->sizeOfData(), function->alignOfData()); - } - - void createState() - { - if (created) - return; - function->create(state.data()); - created = true; - } - - void destroyState() - { - if (!created) - return; - function->destroy(state.data()); - created = false; - } - - /// Explicitly destroy aggregation state if the stream is terminated - ~SimpleAggregateDescription() - { - destroyState(); - } - - SimpleAggregateDescription() = default; - SimpleAggregateDescription(SimpleAggregateDescription &&) = default; - SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; - }; }; } diff --git a/src/Processors/Merges/IMergingAlgorithm.h b/src/Processors/Merges/IMergingAlgorithm.h index edd5b7d883c..263acee4c2d 100644 --- a/src/Processors/Merges/IMergingAlgorithm.h +++ b/src/Processors/Merges/IMergingAlgorithm.h @@ -25,12 +25,11 @@ public: virtual Status merge() = 0; IMergingAlgorithm() = default; - IMergingAlgorithm(IMergingAlgorithm &&) = default; virtual ~IMergingAlgorithm() = default; }; // TODO: use when compile with clang which could support it // template -// concept MergingAlgorithm = std::is_base_of::value && std::is_move_constructible::value; +// concept MergingAlgorithm = std::is_base_of::value; } diff --git a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp new file mode 100644 index 00000000000..5b6c00750ba --- /dev/null +++ b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp @@ -0,0 +1,50 @@ +// +// Created by nik-kochetov on 4/13/20. +// + +#include + + +namespace DB +{ + +IMergingAlgorithmWithDelayedChunk::IMergingAlgorithmWithDelayedChunk( + size_t num_inputs, + SortDescription description_) + : description(std::move(description_)) + , source_chunks(num_inputs) +{ +} + + +void IMergingAlgorithmWithDelayedChunk::initializeQueue(Chunks chunks) +{ + source_chunks.resize(chunks.size()); + + for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) + { + if (!chunks[source_num]) + continue; + + source_chunks[source_num] = std::move(chunks[source_num]); + cursors[source_num] = SortCursorImpl(source_chunks[source_num].getColumns(), description, source_num); + } + + queue = SortingHeap(cursors); +} + +void IMergingAlgorithmWithDelayedChunk::updateCursor(Chunk chunk, size_t source_num) +{ + auto & source_chunk = source_chunks[source_num]; + + /// Extend lifetime of last chunk. + last_chunk = std::move(source_chunk); + last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); + + source_chunk = std::move(chunk); + cursors[source_num].reset(source_chunk.getColumns(), {}); + + queue.push(cursors[source_num]); +} + +} diff --git a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h new file mode 100644 index 00000000000..2a53f22f5aa --- /dev/null +++ b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class IMergingAlgorithmWithDelayedChunk : public IMergingAlgorithm +{ +public: + IMergingAlgorithmWithDelayedChunk( + size_t num_inputs, + SortDescription description_); + +protected: + SortingHeap queue; + + /// Previous row. May refer to last_chunk_sort_columns or row from source_chunks. + detail::RowRef last_key; + + ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. + + void initializeQueue(Chunks chunks); + void updateCursor(Chunk chunk, size_t source_num); + +private: + SortDescription description; + + /// Chunks currently being merged. + std::vector source_chunks; + SortCursorImpls cursors; + + /// In merging algorithm, we need to compare current sort key with the last one. + /// So, sorting columns for last row needed to be stored. + /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). + Chunk last_chunk; +}; + +} From 6bd80a357b221bf9888ae69a75e4b420a150cbda Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 13 Apr 2020 15:00:36 +0300 Subject: [PATCH 346/752] simplified backport script --- utils/simple-backport/backport.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index a0143108383..5cd23b9b541 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -7,8 +7,8 @@ merge_base=$(git merge-base origin/master "origin/$branch") # Make lists of PRs that were merged into each branch. Use first parent here, or else # we'll get weird things like seeing older master that was merged into a PR branch # that was then merged into master. -git log "$merge_base..origin/master" --first-parent --oneline > master-log.txt -git log "$merge_base..origin/$branch" --first-parent --oneline > "$branch-log.txt" +git log "$merge_base..origin/master" --first-parent > master-log.txt +git log "$merge_base..origin/$branch" --first-parent > "$branch-log.txt" # Search for PR numbers in commit messages. First variant is normal merge, and second # variant is squashed. @@ -89,11 +89,14 @@ do continue fi + # Find merge commit SHA for convenience + merge_sha="$(jq -r .merge_commit_sha "$file")" + url="https://github.com/ClickHouse/ClickHouse/pull/$pr" - printf "%s\t%s\t%s\t%s\n" "$action" "$pr" "$url" "$file" >> "$branch-report.tsv" + printf "%s\t%s\t%s\t%s\t%s\n" "$action" "$pr" "$url" "$file" "$merge_sha" >> "$branch-report.tsv" if [ "$action" == "backport" ] then - printf "%s\t%s\n" "$action" "$url" + printf "%s\t%s\t%s\n" "$action" "$url" "$merge_sha" fi done From a6bf7a14c8eb3bc04a40ddb711cf400a90914091 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 15:02:59 +0300 Subject: [PATCH 347/752] Fix build. --- src/Processors/Merges/AggregatingSortedAlgorithm.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index 95a12c49618..3327776b2f4 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -5,12 +5,11 @@ #include #include #include +#include namespace DB { -class ColumnAggregateFunction; - class AggregatingSortedAlgorithm final : public IMergingAlgorithmWithDelayedChunk { public: @@ -78,7 +77,7 @@ private: auto chunk = pull(); size_t num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); + auto columns_ = chunk.detachColumns(); for (auto & desc : def.columns_to_simple_aggregate) { @@ -86,12 +85,14 @@ private: { auto & from_type = desc.inner_type; auto & to_type = header_.getByPosition(desc.column_number).type; - columns[desc.column_number] = recursiveTypeConversion(columns[desc.column_number], from_type, to_type); + columns_[desc.column_number] = recursiveTypeConversion(columns_[desc.column_number], from_type, to_type); } } - chunk.setColumns(std::move(columns), num_rows); + chunk.setColumns(std::move(columns_), num_rows); initAggregateDescription(def); + + return chunk; } private: From d5c055ab03d9c608201f0736d4a46bf0edb1fcdb Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 13 Apr 2020 15:13:13 +0300 Subject: [PATCH 348/752] remove logs --- src/Storages/StorageReplicatedMergeTree.cpp | 11 ----------- tests/integration/test_polymorphic_parts/test.py | 2 -- 2 files changed, 13 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ff4d5df03b9..7548edad48a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2052,8 +2052,6 @@ void StorageReplicatedMergeTree::mutationsUpdatingTask() BackgroundProcessingPoolTaskResult StorageReplicatedMergeTree::queueTask() { - LOG_FATAL(&Poco::Logger::get("queueTask()"), "begin"); - /// If replication queue is stopped exit immediately as we successfully executed the task if (queue.actions_blocker.isCancelled()) { @@ -2583,7 +2581,6 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart( */ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) { - LOG_FATAL(&Poco::Logger::get("updateQuorum"), "BEGIN!"); auto zookeeper = getZooKeeper(); /// Information on which replicas a part has been added, if the quorum has not yet been reached. @@ -3014,7 +3011,6 @@ void StorageReplicatedMergeTree::startup() void StorageReplicatedMergeTree::shutdown() { - LOG_FATAL(&Poco::Logger::get("shutdown"), "SHUTDOWN!"); clearOldPartsFromFilesystem(true); /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. fetcher.blocker.cancelForever(); @@ -5324,17 +5320,10 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti return merger_mutator.ttl_merges_blocker.cancel(); if (action_type == ActionLocks::PartsFetch) - { return fetcher.blocker.cancel(); - } - if (action_type == ActionLocks::PartsSend) - { - LOG_FATAL(&Poco::Logger::get("ActionLock"), "Cancel PartsSend"); return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); - } - if (action_type == ActionLocks::ReplicationQueue) return queue.actions_blocker.cancel(); diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index fdf9ac2834b..f7256de9d9a 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -90,8 +90,6 @@ def test_polymorphic_parts_basics(start_cluster, first_node, second_node): first_node.query("SYSTEM STOP MERGES") second_node.query("SYSTEM STOP MERGES") - print(first_node.query("SELECT * FROM system.settings where name='insert_quorum' format Vertical")) - for size in [300, 300, 600]: insert_random_data('polymorphic_table', first_node, size) second_node.query("SYSTEM SYNC REPLICA polymorphic_table", timeout=20) From 12ca9f5912cb5f8cd95036d96675cd3c2635afa9 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Mon, 13 Apr 2020 15:32:45 +0300 Subject: [PATCH 349/752] disable alwaysReturnsEmptySet() for HashJoin over dictionary --- src/Interpreters/HashJoin.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 48e7e9e9c9a..2a9314f1c80 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -188,7 +188,7 @@ public: /// Sum size in bytes of all buffers, used for JOIN maps and for all memory pools. size_t getTotalByteCount() const final; - bool alwaysReturnsEmptySet() const final { return isInnerOrRight(getKind()) && data->empty; } + bool alwaysReturnsEmptySet() const final { return isInnerOrRight(getKind()) && data->empty && !overDictionary(); } ASTTableJoin::Kind getKind() const { return kind; } ASTTableJoin::Strictness getStrictness() const { return strictness; } From 204a6b2b8c8203d17cf5203e0fa79c068b4a8d30 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 13 Apr 2020 15:42:15 +0300 Subject: [PATCH 350/752] simple backport script --- utils/simple-backport/backport.sh | 38 +++++-------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index 5cd23b9b541..06ec63d25ec 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -56,38 +56,12 @@ do action="backport" fi - # Next, check the tag. They might override the decision. - matched_labels=() - for label in $(jq -r .labels[].name "$file") - do - label_action="" - case "$label" in - pr-must-backport | "v$branch-must-backport") - label_action="backport" - ;; - pr-no-backport | "v$branch-no-backport") - label_action="no-backport" - ;; - "v$branch-conflicts") - label_action="conflict" - ;; - "v$branch" | "v$branch-backported") - label_action="done" - ;; - esac - if [ "$label_action" != "" ] - then - action="$label_action" - matched_labels+=("$label") - fi - done - - # Show an error if there are conflicting labels. - if [ ${#matched_labels[@]} -gt 1 ] - then - >&2 echo "PR #$pr has conflicting labels: ${matched_labels[*]}" - continue - fi + # Next, check the tag. They might override the decision. Checks are ordered by priority. + labels="$(jq -r .labels[].name "$file")" + if echo "$labels" | grep "pr-must-backport\|v$branch-must-backport" > /dev/null; then action="backport"; fi + if echo "$labels" | grep "v$branch-conflicts" > /dev/null; then action="conflict"; fi + if echo "$labels" | grep "pr-no-backport\|v$branch-no-backport" > /dev/null; then action="no-backport"; fi + if echo "$labels" | grep "v$branch\|v$branch-backported" > /dev/null; then action="done"; fi # Find merge commit SHA for convenience merge_sha="$(jq -r .merge_commit_sha "$file")" From 8be5a1f0a5ac176ff7c1c83d9979627b6bc335b9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 13 Apr 2020 15:54:09 +0300 Subject: [PATCH 351/752] simple backport script --- utils/simple-backport/backport.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index 06ec63d25ec..7fbd34f0a08 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -58,10 +58,10 @@ do # Next, check the tag. They might override the decision. Checks are ordered by priority. labels="$(jq -r .labels[].name "$file")" - if echo "$labels" | grep "pr-must-backport\|v$branch-must-backport" > /dev/null; then action="backport"; fi - if echo "$labels" | grep "v$branch-conflicts" > /dev/null; then action="conflict"; fi - if echo "$labels" | grep "pr-no-backport\|v$branch-no-backport" > /dev/null; then action="no-backport"; fi - if echo "$labels" | grep "v$branch\|v$branch-backported" > /dev/null; then action="done"; fi + if echo "$labels" | grep -x "pr-must-backport\|v$branch-must-backport" > /dev/null; then action="backport"; fi + if echo "$labels" | grep -x "v$branch-conflicts" > /dev/null; then action="conflict"; fi + if echo "$labels" | grep -x "pr-no-backport\|v$branch-no-backport" > /dev/null; then action="no-backport"; fi + if echo "$labels" | grep -x "v$branch\|v$branch-backported" > /dev/null; then action="done"; fi # Find merge commit SHA for convenience merge_sha="$(jq -r .merge_commit_sha "$file")" From a2b5fd4b3d3b6cd229367b7e38c30740a3cc3b4f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 16:18:46 +0300 Subject: [PATCH 352/752] Fix build. --- src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp index 5b6c00750ba..4364dd97f76 100644 --- a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp +++ b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp @@ -19,14 +19,14 @@ IMergingAlgorithmWithDelayedChunk::IMergingAlgorithmWithDelayedChunk( void IMergingAlgorithmWithDelayedChunk::initializeQueue(Chunks chunks) { - source_chunks.resize(chunks.size()); + source_chunks = std::move(chunks); for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) { - if (!chunks[source_num]) + if (!source_chunks[source_num]) continue; - source_chunks[source_num] = std::move(chunks[source_num]); + source_chunks[source_num] = std::move(source_chunks[source_num]); cursors[source_num] = SortCursorImpl(source_chunks[source_num].getColumns(), description, source_num); } From 5efdf1203ee1d7acda6218337cacd5567cf4b77e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 16:22:32 +0300 Subject: [PATCH 353/752] Fix build. --- src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp index 4364dd97f76..54d957014ea 100644 --- a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp +++ b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp @@ -13,10 +13,10 @@ IMergingAlgorithmWithDelayedChunk::IMergingAlgorithmWithDelayedChunk( SortDescription description_) : description(std::move(description_)) , source_chunks(num_inputs) + , cursors(num_inputs) { } - void IMergingAlgorithmWithDelayedChunk::initializeQueue(Chunks chunks) { source_chunks = std::move(chunks); From 0544c1f25be1056c59516578c5aef2cecc08d828 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 16:26:50 +0300 Subject: [PATCH 354/752] Fix build. --- src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp index 54d957014ea..16eac826f16 100644 --- a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp +++ b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.cpp @@ -26,7 +26,6 @@ void IMergingAlgorithmWithDelayedChunk::initializeQueue(Chunks chunks) if (!source_chunks[source_num]) continue; - source_chunks[source_num] = std::move(source_chunks[source_num]); cursors[source_num] = SortCursorImpl(source_chunks[source_num].getColumns(), description, source_num); } From 9ce0607de7ad07cdf4dda93ac14b4d2561628223 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 16:40:53 +0300 Subject: [PATCH 355/752] Remove header from AggregatingSortedAlgorithm. --- .../Merges/AggregatingSortedAlgorithm.cpp | 11 +++++----- .../Merges/AggregatingSortedAlgorithm.h | 22 +++++++++++-------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp index a185e7cee67..d27ef315155 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -48,7 +48,7 @@ namespace type = nullptr; // simple aggregate function - AggregatingSortedAlgorithm::SimpleAggregateDescription desc(simple_aggr->getFunction(), i, type); + AggregatingSortedAlgorithm::SimpleAggregateDescription desc(simple_aggr->getFunction(), i, type, column.type); if (desc.function->allocatesMemoryInArena()) def.allocates_memory_in_arena = true; @@ -84,10 +84,9 @@ namespace } AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( - const Block & header_, size_t num_inputs, + const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) - , header(header_) , columns_definition(defineColumns(header, description_)) , merged_data(getMergedColumns(header, columns_definition), max_block_size, columns_definition) { @@ -102,7 +101,7 @@ void AggregatingSortedAlgorithm::prepareChunk(Chunk & chunk) const column = column->convertToFullColumnIfConst(); for (auto & desc : columns_definition.columns_to_simple_aggregate) - if (desc.inner_type) + if (desc.nested_type) columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); chunk.setColumns(std::move(columns), num_rows); @@ -159,7 +158,7 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() if (merged_data.hasEnoughRows()) { last_key.reset(); - Status(merged_data.pull(columns_definition, header)); + Status(merged_data.pull(columns_definition)); } /// We will write the data for the group. We copy the values of ordinary columns. @@ -200,7 +199,7 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() } last_chunk_sort_columns.clear(); - return Status(merged_data.pull(columns_definition, header), true); + return Status(merged_data.pull(columns_definition), true); } void AggregatingSortedAlgorithm::addRow(SortCursor & cursor) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index 3327776b2f4..993973c1bc7 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -72,7 +72,7 @@ private: /// TODO: sum_blocks_granularity += block_size; } - Chunk pull(ColumnsDefinition & def, const Block & header_) + Chunk pull(ColumnsDefinition & def) { auto chunk = pull(); @@ -81,10 +81,10 @@ private: for (auto & desc : def.columns_to_simple_aggregate) { - if (desc.inner_type) + if (desc.nested_type) { - auto & from_type = desc.inner_type; - auto & to_type = header_.getByPosition(desc.column_number).type; + auto & from_type = desc.nested_type; + auto & to_type = desc.real_type; columns_[desc.column_number] = recursiveTypeConversion(columns_[desc.column_number], from_type, to_type); } } @@ -111,8 +111,6 @@ private: using MergedData::pull; }; - Block header; - ColumnsDefinition columns_definition; AggregatingMergedData merged_data; @@ -134,13 +132,19 @@ public: size_t column_number = 0; IColumn * column = nullptr; - const DataTypePtr inner_type; + + /// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed. + const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is. + const DataTypePtr real_type; /// Type in header. AlignedBuffer state; bool created = false; - SimpleAggregateDescription(AggregateFunctionPtr function_, const size_t column_number_, DataTypePtr type) - : function(std::move(function_)), column_number(column_number_), inner_type(std::move(type)) + SimpleAggregateDescription( + AggregateFunctionPtr function_, const size_t column_number_, + DataTypePtr nested_type_, DataTypePtr real_type_) + : function(std::move(function_)), column_number(column_number_) + , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) { add_function = function->getAddressOfAddFunction(); state.reset(function->sizeOfData(), function->alignOfData()); From 9cc7d0f06aa917fc87219adfb3d6f9311e72f095 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 13 Apr 2020 17:34:01 +0300 Subject: [PATCH 356/752] Fix 'Cannot add column' error while creating range_hashed dictionary using DDL queries --- src/Storages/StorageDictionary.cpp | 16 ++++++++- ...01125_dict_ddl_cannot_add_column.reference | 3 ++ .../01125_dict_ddl_cannot_add_column.sql | 34 +++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference create mode 100644 tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 396e83cc293..86831593d54 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -75,16 +75,30 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure if (dictionary_structure.id) dictionary_names_and_types.emplace_back(dictionary_structure.id->name, std::make_shared()); + + /// In old-style (XML) configuration we don't have this attributes in the + /// main attribute list, so we have to add them to columns list explicitly. + /// In the new configuration (DDL) we have them both in range_* nodes and + /// main attribute list, but for compatibility we add them before main + /// attributes list. if (dictionary_structure.range_min) dictionary_names_and_types.emplace_back(dictionary_structure.range_min->name, dictionary_structure.range_min->type); + if (dictionary_structure.range_max) dictionary_names_and_types.emplace_back(dictionary_structure.range_max->name, dictionary_structure.range_max->type); + if (dictionary_structure.key) + { for (const auto & attribute : *dictionary_structure.key) dictionary_names_and_types.emplace_back(attribute.name, attribute.type); + } for (const auto & attribute : dictionary_structure.attributes) - dictionary_names_and_types.emplace_back(attribute.name, attribute.type); + { + /// Some attributes can be already added (range_min and range_max) + if (!dictionary_names_and_types.contains(attribute.name)) + dictionary_names_and_types.emplace_back(attribute.name, attribute.type); + } return dictionary_names_and_types; } diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference new file mode 100644 index 00000000000..1a9e5685a6a --- /dev/null +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference @@ -0,0 +1,3 @@ +1 2019-01-05 2020-01-10 1 +date_table +somedict diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql new file mode 100644 index 00000000000..3f87235bdf4 --- /dev/null +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql @@ -0,0 +1,34 @@ +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict; + +use database_for_dict; + +CREATE TABLE date_table +( + id UInt32, + val String, + start Date, + end Date +) Engine = Memory(); + +INSERT INTO date_table VALUES(1, '1', toDate('2019-01-05'), toDate('2020-01-10')); + +CREATE DICTIONARY somedict +( + id UInt32, + val String, + start Date, + end Date +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'date_table' DB 'database_for_dict')) +LAYOUT(RANGE_HASHED()) +RANGE (MIN start MAX end) +LIFETIME(MIN 300 MAX 360); + +SELECT * from somedict; + +SHOW TABLES; + +DROP DATABASE IF EXISTS database_for_dict; From e05e2c76283bbc0d34abfcb0e7bf8764ab79a065 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Mon, 13 Apr 2020 17:34:11 +0300 Subject: [PATCH 357/752] Fixed check for nondeterministic functions to handle lambdas correctly --- src/Interpreters/MutationsInterpreter.cpp | 42 ++++++++++++------- ...eterministic_functions_zookeeper.reference | 2 + ...th_nondeterministic_functions_zookeeper.sh | 6 +++ 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index df0267b9450..985fda3aac7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -36,34 +36,44 @@ namespace ErrorCodes namespace { -struct FirstNonDeterministicFuncData +/// Helps to detect situations, where non-deterministic functions may be used in mutations of Replicated*MergeTree. +class FirstNonDeterministicFuncMatcher { - using TypeToVisit = ASTFunction; +public: + struct Data { + const Context & context; + std::optional nondeterministic_function_name; + }; - explicit FirstNonDeterministicFuncData(const Context & context_) - : context{context_} - {} - - const Context & context; - std::optional nondeterministic_function_name; - - void visit(ASTFunction & function, ASTPtr &) +public: + static bool needChildVisit(const ASTPtr & /*node*/, const ASTPtr & child) { - if (nondeterministic_function_name) + return child != nullptr; + } + + static void visit(const ASTPtr & node, Data & data) + { + if (data.nondeterministic_function_name) return; - const auto func = FunctionFactory::instance().get(function.name, context); - if (!func->isDeterministic()) - nondeterministic_function_name = func->getName(); + if (const auto * function = typeid_cast(node.get())) + { + if (function->name != "lambda") + { + const auto func = FunctionFactory::instance().get(function->name, data.context); + if (!func->isDeterministic()) + data.nondeterministic_function_name = func->getName(); + } + } } }; using FirstNonDeterministicFuncFinder = - InDepthNodeVisitor, true>; + InDepthNodeVisitor; std::optional findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) { - FirstNonDeterministicFuncData finder_data(context); + FirstNonDeterministicFuncMatcher::Data finder_data{context}; switch (command.type) { diff --git a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference index f799e8ed8f0..6bf25043399 100644 --- a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference +++ b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.reference @@ -5,3 +5,5 @@ OK OK OK OK +OK +OK diff --git a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh index 9b190855adf..68cb5e0e760 100755 --- a/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh +++ b/tests/queries/0_stateless/01017_mutations_with_nondeterministic_functions_zookeeper.sh @@ -43,6 +43,12 @@ ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 DELETE WHERE ignore(rand())" 2>&1 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = y + rand() % 1 WHERE not ignore()" 2>&1 \ | fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = x + arrayCount(x -> (x + y) % 2, range(y)) WHERE not ignore()" 2>&1 > /dev/null \ +&& echo 'OK' || echo 'FAIL' + +${CLICKHOUSE_CLIENT} --query "ALTER TABLE $R1 UPDATE y = x + arrayCount(x -> (rand() + x) % 2, range(y)) WHERE not ignore()" 2>&1 \ +| fgrep -q "must use only deterministic functions" && echo 'OK' || echo 'FAIL' + # For regular tables we do not enforce deterministic functions ${CLICKHOUSE_CLIENT} --query "ALTER TABLE $T1 DELETE WHERE rand() = 0" 2>&1 > /dev/null \ From 377e16c00c2d5ce2ebc8173ac25a18ed4c7401e2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 17:42:58 +0300 Subject: [PATCH 358/752] Refactor AggregatingSortedAlgorithm. --- .../Merges/AggregatingSortedAlgorithm.cpp | 375 +++++++++++------- .../Merges/AggregatingSortedAlgorithm.h | 147 ++----- 2 files changed, 264 insertions(+), 258 deletions(-) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp index d27ef315155..5e2fe037d3d 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -1,6 +1,5 @@ #include -#include #include #include #include @@ -8,81 +7,251 @@ namespace DB { -namespace +/// Stores information for aggregation of AggregateFunction columns +struct AggregatingSortedAlgorithm::AggregateDescription { - AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( - const Block & header, const SortDescription & description) + ColumnAggregateFunction * column = nullptr; + const size_t column_number = 0; + + AggregateDescription() = default; + explicit AggregateDescription(size_t col_number) : column_number(col_number) {} +}; + +/// Stores information for aggregation of SimpleAggregateFunction columns +struct AggregatingSortedAlgorithm::SimpleAggregateDescription +{ + /// An aggregate function 'anyLast', 'sum'... + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + + size_t column_number = 0; + IColumn * column = nullptr; + + /// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed. + const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is. + const DataTypePtr real_type; /// Type in header. + + AlignedBuffer state; + bool created = false; + + SimpleAggregateDescription( + AggregateFunctionPtr function_, const size_t column_number_, + DataTypePtr nested_type_, DataTypePtr real_type_) + : function(std::move(function_)), column_number(column_number_) + , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) { - AggregatingSortedAlgorithm::ColumnsDefinition def = {}; - size_t num_columns = header.columns(); - - /// Fill in the column numbers that need to be aggregated. - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnWithTypeAndName & column = header.safeGetByPosition(i); - - /// We leave only states of aggregate functions. - if (!dynamic_cast(column.type.get()) - && !dynamic_cast(column.type->getCustomName())) - { - def.column_numbers_not_to_aggregate.push_back(i); - continue; - } - - /// Included into PK? - auto it = description.begin(); - for (; it != description.end(); ++it) - if (it->column_name == column.name || (it->column_name.empty() && it->column_number == i)) - break; - - if (it != description.end()) - { - def.column_numbers_not_to_aggregate.push_back(i); - continue; - } - - if (auto simple_aggr = dynamic_cast(column.type->getCustomName())) - { - auto type = recursiveRemoveLowCardinality(column.type); - if (type.get() == column.type.get()) - type = nullptr; - - // simple aggregate function - AggregatingSortedAlgorithm::SimpleAggregateDescription desc(simple_aggr->getFunction(), i, type, column.type); - if (desc.function->allocatesMemoryInArena()) - def.allocates_memory_in_arena = true; - - def.columns_to_simple_aggregate.emplace_back(std::move(desc)); - } - else - { - // standard aggregate function - def.columns_to_aggregate.emplace_back(i); - } - } - - return def; + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); } - MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def) + void createState() { - MutableColumns columns; - columns.resize(header.columns()); + if (created) + return; + function->create(state.data()); + created = true; + } - for (auto & desc : def.columns_to_simple_aggregate) + void destroyState() + { + if (!created) + return; + function->destroy(state.data()); + created = false; + } + + /// Explicitly destroy aggregation state if the stream is terminated + ~SimpleAggregateDescription() + { + destroyState(); + } + + SimpleAggregateDescription() = default; + SimpleAggregateDescription(SimpleAggregateDescription &&) = default; + SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; +}; + +static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns( + const Block & header, const SortDescription & description) +{ + AggregatingSortedAlgorithm::ColumnsDefinition def = {}; + size_t num_columns = header.columns(); + + /// Fill in the column numbers that need to be aggregated. + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// We leave only states of aggregate functions. + if (!dynamic_cast(column.type.get()) + && !dynamic_cast(column.type->getCustomName())) { - auto & type = header.getByPosition(desc.column_number).type; - columns[desc.column_number] = recursiveRemoveLowCardinality(type)->createColumn(); + def.column_numbers_not_to_aggregate.push_back(i); + continue; } - for (size_t i = 0; i < columns.size(); ++i) - if (!columns[i]) - columns[i] = header.getByPosition(i).type->createColumn(); + /// Included into PK? + auto it = description.begin(); + for (; it != description.end(); ++it) + if (it->column_name == column.name || (it->column_name.empty() && it->column_number == i)) + break; - return columns; + if (it != description.end()) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + if (auto simple = dynamic_cast(column.type->getCustomName())) + { + auto type = recursiveRemoveLowCardinality(column.type); + if (type.get() == column.type.get()) + type = nullptr; + + // simple aggregate function + AggregatingSortedAlgorithm::SimpleAggregateDescription desc(simple->getFunction(), i, type, column.type); + if (desc.function->allocatesMemoryInArena()) + def.allocates_memory_in_arena = true; + + def.columns_to_simple_aggregate.emplace_back(std::move(desc)); + } + else + { + // standard aggregate function + def.columns_to_aggregate.emplace_back(i); + } + } + + return def; +} + +static MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def) +{ + MutableColumns columns; + columns.resize(header.columns()); + + for (auto & desc : def.columns_to_simple_aggregate) + { + auto & type = header.getByPosition(desc.column_number).type; + columns[desc.column_number] = recursiveRemoveLowCardinality(type)->createColumn(); + } + + for (size_t i = 0; i < columns.size(); ++i) + if (!columns[i]) + columns[i] = header.getByPosition(i).type->createColumn(); + + return columns; +} + +static void prepareChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + for (auto & desc : def.columns_to_simple_aggregate) + if (desc.nested_type) + columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); + + chunk.setColumns(std::move(columns), num_rows); +} + + +AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( + MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) + : MergedData(std::move(columns_), false, max_block_size_), def(def_) +{ + initAggregateDescription(); +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::startGroup(const ColumnRawPtrs & raw_columns, size_t row) +{ + /// We will write the data for the group. We copy the values of ordinary columns. + for (auto column_number : def.column_numbers_not_to_aggregate) + columns[column_number]->insertFrom(*raw_columns[column_number], row); + + /// Add the empty aggregation state to the aggregate columns. The state will be updated in the `addRow` function. + for (auto & column_to_aggregate : def.columns_to_aggregate) + column_to_aggregate.column->insertDefault(); + + /// Reset simple aggregation states for next row + for (auto & desc : def.columns_to_simple_aggregate) + desc.createState(); + + if (def.allocates_memory_in_arena) + arena = std::make_unique(); + + is_group_started = true; +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::finishGroup() +{ + /// Write the simple aggregation result for the current group. + for (auto & desc : def.columns_to_simple_aggregate) + { + desc.function->insertResultInto(desc.state.data(), *desc.column); + desc.destroyState(); + } + + is_group_started = false; + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::addRow(SortCursor & cursor) +{ + if (!is_group_started) + throw Exception("Can't add a row to the group because it was not started.", ErrorCodes::LOGICAL_ERROR); + + for (auto & desc : def.columns_to_aggregate) + desc.column->insertMergeFrom(*cursor->all_columns[desc.column_number], cursor->pos); + + for (auto & desc : def.columns_to_simple_aggregate) + { + auto & col = cursor->all_columns[desc.column_number]; + desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, arena.get()); } } +Chunk AggregatingSortedAlgorithm::AggregatingMergedData::pull() +{ + if (is_group_started) + throw Exception("Can't pull chunk because group was not finished.", ErrorCodes::LOGICAL_ERROR); + + auto chunk = MergedData::pull(); + + size_t num_rows = chunk.getNumRows(); + auto columns_ = chunk.detachColumns(); + + for (auto & desc : def.columns_to_simple_aggregate) + { + if (desc.nested_type) + { + auto & from_type = desc.nested_type; + auto & to_type = desc.real_type; + columns_[desc.column_number] = recursiveTypeConversion(columns_[desc.column_number], from_type, to_type); + } + } + + chunk.setColumns(std::move(columns_), num_rows); + initAggregateDescription(); + + return chunk; +} + +void AggregatingSortedAlgorithm::AggregatingMergedData::initAggregateDescription() +{ + for (auto & desc : def.columns_to_simple_aggregate) + desc.column = columns[desc.column_number].get(); + + for (auto & desc : def.columns_to_aggregate) + desc.column = typeid_cast(columns[desc.column_number].get()); +} + + AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) @@ -92,33 +261,18 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( { } -void AggregatingSortedAlgorithm::prepareChunk(Chunk & chunk) const -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - for (auto & desc : columns_definition.columns_to_simple_aggregate) - if (desc.nested_type) - columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]); - - chunk.setColumns(std::move(columns), num_rows); -} - void AggregatingSortedAlgorithm::initialize(Chunks chunks) { for (auto & chunk : chunks) if (chunk) - prepareChunk(chunk); + prepareChunk(chunk, columns_definition); initializeQueue(std::move(chunks)); } void AggregatingSortedAlgorithm::consume(Chunk chunk, size_t source_num) { - prepareChunk(chunk); + prepareChunk(chunk, columns_definition); updateCursor(std::move(chunk), source_num); } @@ -128,18 +282,13 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() while (queue.isValid()) { bool key_differs; - bool has_previous_group = !last_key.empty(); - SortCursor current = queue.current(); { detail::RowRef current_key; current_key.set(current); - if (!has_previous_group) /// The first key encountered. - key_differs = true; - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); + key_differs = last_key.empty() || !last_key.hasEqualSortColumnsWith(current_key); last_key = current_key; last_chunk_sort_columns.clear(); @@ -147,37 +296,20 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() if (key_differs) { - /// Write the simple aggregation result for the previous group. if (merged_data.isGroupStarted()) - { - insertSimpleAggregationResult(); - merged_data.insertRow(); - } + merged_data.finishGroup(); /// if there are enough rows accumulated and the last one is calculated completely if (merged_data.hasEnoughRows()) { last_key.reset(); - Status(merged_data.pull(columns_definition)); + Status(merged_data.pull()); } - /// We will write the data for the group. We copy the values of ordinary columns. - merged_data.initializeRow(current->all_columns, current->pos, - columns_definition.column_numbers_not_to_aggregate); - - /// Add the empty aggregation state to the aggregate columns. The state will be updated in the `addRow` function. - for (auto & column_to_aggregate : columns_definition.columns_to_aggregate) - column_to_aggregate.column->insertDefault(); - - /// Reset simple aggregation states for next row - for (auto & desc : columns_definition.columns_to_simple_aggregate) - desc.createState(); - - if (columns_definition.allocates_memory_in_arena) - arena = std::make_unique(); + merged_data.startGroup(current->all_columns, current->pos); } - addRow(current); + merged_data.addRow(current); if (!current->isLast()) { @@ -193,35 +325,10 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() /// Write the simple aggregation result for the previous group. if (merged_data.isGroupStarted()) - { - insertSimpleAggregationResult(); - merged_data.insertRow(); - } + merged_data.finishGroup(); last_chunk_sort_columns.clear(); - return Status(merged_data.pull(columns_definition), true); + return Status(merged_data.pull(), true); } -void AggregatingSortedAlgorithm::addRow(SortCursor & cursor) -{ - for (auto & desc : columns_definition.columns_to_aggregate) - desc.column->insertMergeFrom(*cursor->all_columns[desc.column_number], cursor->pos); - - for (auto & desc : columns_definition.columns_to_simple_aggregate) - { - auto & col = cursor->all_columns[desc.column_number]; - desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, arena.get()); - } -} - -void AggregatingSortedAlgorithm::insertSimpleAggregationResult() -{ - for (auto & desc : columns_definition.columns_to_simple_aggregate) - { - desc.function->insertResultInto(desc.state.data(), *desc.column); - desc.destroyState(); - } -} - - } diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index 993973c1bc7..f4c6c258297 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB { @@ -22,18 +23,14 @@ public: Status merge() override; struct SimpleAggregateDescription; + struct AggregateDescription; + /// This structure define columns into one of three types: + /// * columns which are not aggregate functions and not needed to be aggregated + /// * usual aggregate functions, which stores states into ColumnAggregateFunction + /// * simple aggregate functions, which store states into ordinary columns struct ColumnsDefinition { - struct AggregateDescription - { - ColumnAggregateFunction * column = nullptr; - const size_t column_number = 0; - - AggregateDescription() = default; - explicit AggregateDescription(size_t col_number) : column_number(col_number) {} - }; - /// Columns with which numbers should not be aggregated. ColumnNumbers column_numbers_not_to_aggregate; std::vector columns_to_aggregate; @@ -47,135 +44,37 @@ private: /// Specialization for AggregatingSortedAlgorithm. struct AggregatingMergedData : public MergedData { + private: + using MergedData::pull; + using MergedData::insertRow; + public: - AggregatingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def) - : MergedData(std::move(columns_), false, max_block_size_) - { - initAggregateDescription(def); - } + AggregatingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_); - void initializeRow(const ColumnRawPtrs & raw_columns, size_t row, const ColumnNumbers & column_numbers) - { - for (auto column_number : column_numbers) - columns[column_number]->insertFrom(*raw_columns[column_number], row); - - is_group_started = true; - } + void startGroup(const ColumnRawPtrs & raw_columns, size_t row); + void finishGroup(); bool isGroupStarted() const { return is_group_started; } + void addRow(SortCursor & cursor); - void insertRow() - { - is_group_started = false; - ++total_merged_rows; - ++merged_rows; - /// TODO: sum_blocks_granularity += block_size; - } - - Chunk pull(ColumnsDefinition & def) - { - auto chunk = pull(); - - size_t num_rows = chunk.getNumRows(); - auto columns_ = chunk.detachColumns(); - - for (auto & desc : def.columns_to_simple_aggregate) - { - if (desc.nested_type) - { - auto & from_type = desc.nested_type; - auto & to_type = desc.real_type; - columns_[desc.column_number] = recursiveTypeConversion(columns_[desc.column_number], from_type, to_type); - } - } - - chunk.setColumns(std::move(columns_), num_rows); - initAggregateDescription(def); - - return chunk; - } + Chunk pull(); private: + ColumnsDefinition & def; + + /// Memory pool for SimpleAggregateFunction + /// (only when allocates_memory_in_arena == true). + std::unique_ptr arena; + bool is_group_started = false; /// Initialize aggregate descriptions with columns. - void initAggregateDescription(ColumnsDefinition & def) - { - for (auto & desc : def.columns_to_simple_aggregate) - desc.column = columns[desc.column_number].get(); - - for (auto & desc : def.columns_to_aggregate) - desc.column = typeid_cast(columns[desc.column_number].get()); - } - - using MergedData::pull; + void initAggregateDescription(); }; + /// Order between members is important because merged_data has reference to columns_definition. ColumnsDefinition columns_definition; AggregatingMergedData merged_data; - - /// Memory pool for SimpleAggregateFunction - /// (only when allocates_memory_in_arena == true). - std::unique_ptr arena; - - void prepareChunk(Chunk & chunk) const; - void addRow(SortCursor & cursor); - void insertSimpleAggregationResult(); - -public: - /// Stores information for aggregation of SimpleAggregateFunction columns - struct SimpleAggregateDescription - { - /// An aggregate function 'anyLast', 'sum'... - AggregateFunctionPtr function; - IAggregateFunction::AddFunc add_function = nullptr; - - size_t column_number = 0; - IColumn * column = nullptr; - - /// For LowCardinality, convert is converted to nested type. nested_type is nullptr if no conversion needed. - const DataTypePtr nested_type; /// Nested type for LowCardinality, if it is. - const DataTypePtr real_type; /// Type in header. - - AlignedBuffer state; - bool created = false; - - SimpleAggregateDescription( - AggregateFunctionPtr function_, const size_t column_number_, - DataTypePtr nested_type_, DataTypePtr real_type_) - : function(std::move(function_)), column_number(column_number_) - , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) - { - add_function = function->getAddressOfAddFunction(); - state.reset(function->sizeOfData(), function->alignOfData()); - } - - void createState() - { - if (created) - return; - function->create(state.data()); - created = true; - } - - void destroyState() - { - if (!created) - return; - function->destroy(state.data()); - created = false; - } - - /// Explicitly destroy aggregation state if the stream is terminated - ~SimpleAggregateDescription() - { - destroyState(); - } - - SimpleAggregateDescription() = default; - SimpleAggregateDescription(SimpleAggregateDescription &&) = default; - SimpleAggregateDescription(const SimpleAggregateDescription &) = delete; - }; }; } From d3adcc1cb0fa589f4e9166c907dfcdd845c23dc1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 17:53:04 +0300 Subject: [PATCH 359/752] Fix build. --- src/Processors/Merges/AggregatingSortedAlgorithm.cpp | 4 +++- src/Processors/Merges/AggregatingSortedAlgorithm.h | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp index 5e2fe037d3d..a9ea347a66f 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -7,11 +7,13 @@ namespace DB { +AggregatingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; + /// Stores information for aggregation of AggregateFunction columns struct AggregatingSortedAlgorithm::AggregateDescription { ColumnAggregateFunction * column = nullptr; - const size_t column_number = 0; + const size_t column_number = 0; /// Position in header. AggregateDescription() = default; explicit AggregateDescription(size_t col_number) : column_number(col_number) {} diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index f4c6c258297..16953d11e3c 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -31,6 +31,8 @@ public: /// * simple aggregate functions, which store states into ordinary columns struct ColumnsDefinition { + ~ColumnsDefinition(); + /// Columns with which numbers should not be aggregated. ColumnNumbers column_numbers_not_to_aggregate; std::vector columns_to_aggregate; @@ -51,13 +53,16 @@ private: public: AggregatingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_); + /// Group is a group of rows with the same sorting key. It represents single row in result. + /// Algorithm is: start group, add several rows, finish group. + /// Then pull chunk when enough groups were added. void startGroup(const ColumnRawPtrs & raw_columns, size_t row); void finishGroup(); bool isGroupStarted() const { return is_group_started; } - void addRow(SortCursor & cursor); + void addRow(SortCursor & cursor); /// Possible only when group was started. - Chunk pull(); + Chunk pull(); /// Possible only if group was finished. private: ColumnsDefinition & def; From 9acea16b7f628bc8c601a5db7875d32d91965b37 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 17:57:21 +0300 Subject: [PATCH 360/752] Fix build. --- src/Processors/Merges/AggregatingSortedAlgorithm.cpp | 2 ++ src/Processors/Merges/AggregatingSortedAlgorithm.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp index a9ea347a66f..3d7075efc97 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -7,6 +7,8 @@ namespace DB { +AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default; +AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) = default; AggregatingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; /// Stores information for aggregation of AggregateFunction columns diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index 16953d11e3c..6840b485ce5 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -31,6 +31,8 @@ public: /// * simple aggregate functions, which store states into ordinary columns struct ColumnsDefinition { + ColumnsDefinition(); + ColumnsDefinition(ColumnsDefinition &&) noexcept; ~ColumnsDefinition(); /// Columns with which numbers should not be aggregated. From 2b81d68b56fcfe22804312f23f8ae1c3a02d5fe9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 17:58:23 +0300 Subject: [PATCH 361/752] Fix build. --- src/Processors/Merges/AggregatingSortedAlgorithm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp index 3d7075efc97..e6138ecc6ab 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -8,7 +8,7 @@ namespace DB { AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default; -AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) = default; +AggregatingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default; AggregatingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; /// Stores information for aggregation of AggregateFunction columns From 89aef7aaf932f706a9598b67c3c2d04d3f6a91cb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 18:04:54 +0300 Subject: [PATCH 362/752] Add comments. --- src/Processors/Merges/AggregatingSortedAlgorithm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index 6840b485ce5..ad5644f7189 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -31,9 +31,9 @@ public: /// * simple aggregate functions, which store states into ordinary columns struct ColumnsDefinition { - ColumnsDefinition(); - ColumnsDefinition(ColumnsDefinition &&) noexcept; - ~ColumnsDefinition(); + ColumnsDefinition(); /// Is needed because destructor is defined. + ColumnsDefinition(ColumnsDefinition &&) noexcept; /// Is needed because destructor is defined. + ~ColumnsDefinition(); /// Is needed because otherwise std::vector's destructor uses incomplete types. /// Columns with which numbers should not be aggregated. ColumnNumbers column_numbers_not_to_aggregate; From 441bb13d4aa10f15056c9a14d109bad795158916 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 13 Apr 2020 18:21:05 +0300 Subject: [PATCH 363/752] better --- src/Interpreters/InterserverIOHandler.h | 2 - .../MergeTree/MergeTreeDataMergerMutator.cpp | 13 ++- .../MergeTree/MergeTreeDataMergerMutator.h | 3 - src/Storages/StorageReplicatedMergeTree.cpp | 107 +++--------------- src/Storages/StorageReplicatedMergeTree.h | 8 +- tests/integration/test_quorum_inserts/test.py | 75 ++++++++++-- 6 files changed, 99 insertions(+), 109 deletions(-) diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index 0ffccb6c33c..952c99ae46d 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -13,8 +13,6 @@ #include #include #include -#include -#include namespace Poco { namespace Net { class HTTPServerResponse; } } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f892302086d..5450f6f66de 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -222,19 +222,28 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( IMergeSelector::Partitions partitions; const String * prev_partition_id = nullptr; + /// Previous part only in boundaries of partition frame const MergeTreeData::DataPartPtr * prev_part = nullptr; bool has_part_with_expired_ttl = false; for (const MergeTreeData::DataPartPtr & part : data_parts) { - if (!single_merge(part, nullptr)) - continue; + /// Check predicate only for first part in each partition. + if (!prev_part) + /* Parts can be merged with themselves for TTL needs for example. + * So we have to check if this part is currently being inserted with quorum and so on and so forth. + * Obviously we have to check it manually only for the first part + * of each partition because it will be automatically checked for a pair of parts. */ + if (!single_merge(part, nullptr)) + continue; const String & partition_id = part->info.partition_id; if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part, nullptr))) { if (partitions.empty() || !partitions.back().empty()) partitions.emplace_back(); + /// New partition frame. prev_partition_id = &partition_id; + prev_part = nullptr; } IMergeSelector::Part part_info; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 0efdb403bf0..0be1909889b 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -51,10 +51,7 @@ class MergeTreeDataMergerMutator public: using AllowedMergingPredicate = std::function; using AllowedSingleMergePredicate = std::function; -// template -// using AllowedMergingPredicate = std::function; -public: MergeTreeDataMergerMutator(MergeTreeData & data_, size_t background_pool_size); /** Get maximum total size of parts to do merge, at current moment of time. diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7548edad48a..fec0d5fc287 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2188,15 +2188,14 @@ void StorageReplicatedMergeTree::mergeSelectingTask() UInt64 max_source_part_size_for_mutation = merger_mutator.getMaxSourcePartSizeForMutation(); FutureMergedMutatedPart future_merged_part; + auto merge_pred_for_single_part = + [&merge_pred] (const MergeTreeData::DataPartPtr & part, String * explain) -> bool + { + return merge_pred.canMergeSinglePart(part, explain); + }; if (max_source_parts_size_for_merge > 0 && - merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, nullptr, - [&merge_pred](const MergeTreeData::DataPartPtr & part, String * explain) -> bool { return merge_pred.canMergeSinglePart(part, explain); })) + merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, nullptr, merge_pred_for_single_part)) { - /// We have to exclude parts, that are currently being writted with quorum. (check .../quorum/status node) - /// Also we have to exclude last parts written with quorum (check .../quorum/last_part node) -// if (global_context.getSettingsRef().insert_quorum) -// excludeSomePartsFromMerge(future_merged_part); - success = createLogEntryToMergeParts(zookeeper, future_merged_part.parts, future_merged_part.name, future_merged_part.type, deduplicate, force_ttl); } @@ -2673,53 +2672,6 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) } -void StorageReplicatedMergeTree::deletePartFromPendingQuorum(const String & part_name) -{ - auto zookeeper = getZooKeeper(); - /// Information on which replicas a part has been added, if the quorum has not yet been reached. - const String quorum_status_path = zookeeper_path + "/quorum/status"; - - /// Delete "status" node if required. - - String value; - Coordination::Stat stat; - - /// If there is no node, then all quorum INSERTs have already reached the quorum, and nothing is needed. - while (zookeeper->tryGet(quorum_status_path, value, &stat)) - { - ReplicatedMergeTreeQuorumEntry quorum_entry; - quorum_entry.fromString(value); - - if (quorum_entry.part_name != part_name) - { - /// There is no information about interested part in this node. - break; - } - - /// Since that we are sure that interested part is being involved in insert with quorum. - /// Our goal is to delete "status" node and information from "last_part" node. - - auto code = zookeeper->tryRemove(quorum_status_path, stat.version); - - if (code == Coordination::ZOK) - { - break; - } - else if (code == Coordination::ZNONODE) - { - /// The quorum has already been achieved. - break; - } - else if (code == Coordination::ZBADVERSION) - { - /// Node was updated meanwhile. We must re-read it and repeat all the actions. - continue; - } - else - throw Coordination::Exception(code, quorum_status_path); - } -} - void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id, const String & part_name) { auto zookeeper = getZooKeeper(); @@ -2752,7 +2704,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id, } /// De Morgan's law - if (part_name == "" || parts_with_quorum.added_parts[partition_id] == part_name) + if (part_name.empty() || parts_with_quorum.added_parts[partition_id] == part_name) parts_with_quorum.added_parts.erase(partition_id); else break; @@ -2781,32 +2733,6 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id, } -void StorageReplicatedMergeTree::excludeSomePartsFromMerge(FutureMergedMutatedPart & future_part) -{ - LOG_FATAL(&Poco::Logger::get("excludeSomePartsFromMerge"), "BEGIN!"); - - auto zookeeper = getZooKeeper(); - /// Information on which replicas a part has been added, if the quorum has not yet been reached. - const String quorum_status_path = zookeeper_path + "/quorum/status"; - - String value; - Coordination::Stat stat; - - if (zookeeper->tryGet(quorum_status_path, value, &stat)) { - ReplicatedMergeTreeQuorumEntry quorum_entry; - quorum_entry.fromString(value); - - MergeTreeData::DataPartsVector & parts_to_merge = future_part.parts; - - parts_to_merge.erase( - std::remove_if( - parts_to_merge.begin(), parts_to_merge.end(), - [&quorum_entry](const MergeTreeData::DataPartPtr & part_to_merge) { return part_to_merge->name == quorum_entry.part_name; }), - parts_to_merge.end()); - } -} - - bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const String & source_replica_path, bool to_detached, size_t quorum) { const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -5057,10 +4983,10 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ } } -void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context) +void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & query_context) { - auto lock1 = lockStructureForShare(false, context.getCurrentQueryId()); - auto lock2 = dest_table->lockStructureForShare(false, context.getCurrentQueryId()); + auto lock1 = lockStructureForShare(false, query_context.getCurrentQueryId()); + auto lock2 = dest_table->lockStructureForShare(false, query_context.getCurrentQueryId()); auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) @@ -5075,7 +5001,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta Stopwatch watch; MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this); auto src_data_id = src_data.getStorageID(); - String partition_id = getPartitionIDFromQuery(partition, context); + String partition_id = getPartitionIDFromQuery(partition, query_context); DataPartsVector src_all_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); DataPartsVector src_parts; @@ -5231,7 +5157,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta parts_to_remove.clear(); cleanup_thread.wakeup(); - if (context.getSettingsRef().replication_alter_partitions_sync > 1) + if (query_context.getSettingsRef().replication_alter_partitions_sync > 1) { lock2.release(); dest_table_storage->waitForAllReplicasToProcessLogEntry(entry); @@ -5246,7 +5172,10 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta log_znode_path = dynamic_cast(*op_results.back()).path_created; entry_delete.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); - if (context.getSettingsRef().replication_alter_partitions_sync > 1) + if (query_context.getSettingsRef().insert_quorum) + cleanLastPartNode(partition_id); + + if (query_context.getSettingsRef().replication_alter_partitions_sync > 1) { lock1.release(); waitForAllReplicasToProcessLogEntry(entry_delete); @@ -5320,10 +5249,10 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti return merger_mutator.ttl_merges_blocker.cancel(); if (action_type == ActionLocks::PartsFetch) - return fetcher.blocker.cancel(); + return fetcher.blocker.cancel(); if (action_type == ActionLocks::PartsSend) - return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); + return data_parts_exchange_endpoint ? data_parts_exchange_endpoint->blocker.cancel() : ActionLock(); if (action_type == ActionLocks::ReplicationQueue) return queue.actions_blocker.cancel(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index bf07b592e2f..2846e731926 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -464,14 +464,10 @@ private: /// With the quorum being tracked, add a replica to the quorum for the part. void updateQuorum(const String & part_name); - /// Delete particular part name not to track it in future. - void deletePartFromPendingQuorum(const String & part_name); - - + /// Deletes info from quorum/last_part node for particular partition_id. + /// If part_name if specified, deletes only if last_part == part_name. void cleanLastPartNode(const String & partition_id, const String & part_name = ""); - void excludeSomePartsFromMerge(FutureMergedMutatedPart & future_part); - /// Creates new block number if block with such block_id does not exist std::optional allocateBlockNumber( const String & partition_id, zkutil::ZooKeeperPtr & zookeeper, diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 64190c6d8aa..c85a1825b94 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -143,7 +143,7 @@ def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): "Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') " \ "PARTITION BY d ORDER BY a " - print("Create Replicated table with two replicas") + print("Create Replicated table with three replicas") zero.query(create_query) first.query(create_query) second.query(create_query) @@ -182,6 +182,73 @@ def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): execute_on_all_cluster("DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition") +@pytest.mark.parametrize( + ('add_new_data'), + [ + False, + True + ] +) + +def test_insert_quorum_with_move_partition(started_cluster, add_new_data): + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source") + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination") + + create_source = "CREATE TABLE test_insert_quorum_with_move_partition_source " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') " \ + "PARTITION BY d ORDER BY a " + + create_destination = "CREATE TABLE test_insert_quorum_with_move_partition_destination " \ + "(a Int8, d Date) " \ + "Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') " \ + "PARTITION BY d ORDER BY a " + + print("Create source Replicated table with three replicas") + zero.query(create_source) + first.query(create_source) + second.query(create_source) + + print("Create destination Replicated table with three replicas") + zero.query(create_destination) + first.query(create_destination) + second.query(create_destination) + + print("Stop fetches for test_insert_quorum_with_move_partition_source at first replica.") + first.query("SYSTEM STOP FETCHES test_insert_quorum_with_move_partition_source") + + print("Insert with quorum. (zero and second)") + zero.query("INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(1, '2011-01-01')") + + print("Drop partition.") + zero.query("ALTER TABLE test_insert_quorum_with_move_partition_source MOVE PARTITION '2011-01-01' TO TABLE test_insert_quorum_with_move_partition_destination") + + if (add_new_data): + print("Insert to deleted partition") + zero.query("INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(2, '2011-01-01')") + + print("Resume fetches for test_insert_quorum_with_move_partition_source at first replica.") + first.query("SYSTEM START FETCHES test_insert_quorum_with_move_partition_source") + + print("Sync first replica with others.") + first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_move_partition_source") + + assert "20110101" not in first.query("SELECT * FROM system.zookeeper " \ + "where path='/clickhouse/tables/0/test_insert_quorum_with_move_partition_source/quorum/last_part' " \ + "format Vertical") + + print("Select from updated partition.") + if (add_new_data): + assert TSV("2\t2011-01-01\n") == TSV(zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source")) + assert TSV("2\t2011-01-01\n") == TSV(second.query("SELECT * FROM test_insert_quorum_with_move_partition_source")) + else: + assert TSV("") == TSV(zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source")) + assert TSV("") == TSV(second.query("SELECT * FROM test_insert_quorum_with_move_partition_source")) + + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source") + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination") + + def test_insert_quorum_with_ttl(started_cluster): execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_ttl") @@ -204,8 +271,6 @@ def test_insert_quorum_with_ttl(started_cluster): settings={'insert_quorum_timeout' : 5000}) assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail." - print(zero.query("SELECT * FROM system.parts format Vertical")) - print("Wait 10 seconds and TTL merge have to be executed. But it won't delete data.") time.sleep(10) assert TSV("1\t2011-01-01\n") == TSV(zero.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 0})) @@ -216,10 +281,6 @@ def test_insert_quorum_with_ttl(started_cluster): print("Sync first replica.") first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_ttl") - - print(first.query("SELECT * from system.replicas format Vertical")) - print(first.query("SELECT * from system.zookeeper where path='/clickhouse/tables/test_insert_quorum_with_ttl/quorum' format Vertical")) - zero.query("INSERT INTO test_insert_quorum_with_ttl(a,d) VALUES(1, '2011-01-01')", settings={'insert_quorum_timeout' : 5000}) From 910ab8a26567e0e440d73549eeb10808bc038142 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 13 Apr 2020 18:42:30 +0300 Subject: [PATCH 364/752] =?UTF-8?q?Move=20contrib/libpcg-random=20?= =?UTF-8?q?=E2=86=92=20base/pcg-random?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/CMakeLists.txt | 3 +- base/pcg-random/CMakeLists.txt | 2 + .../pcg-random/LICENSE | 0 .../libpcg-random => base/pcg-random}/README | 0 .../pcg-random}/pcg_extras.hpp | 8 +-- .../pcg-random}/pcg_random.hpp | 0 .../pcg-random}/pcg_uint128.hpp | 0 cmake/lib_name.cmake | 1 - contrib/libpcg-random/README.md | 52 ------------------- programs/benchmark/CMakeLists.txt | 1 - programs/copier/CMakeLists.txt | 2 +- src/CMakeLists.txt | 4 +- 12 files changed, 10 insertions(+), 63 deletions(-) create mode 100644 base/pcg-random/CMakeLists.txt rename contrib/libpcg-random/LICENSE-APACHE.txt => base/pcg-random/LICENSE (100%) rename {contrib/libpcg-random => base/pcg-random}/README (100%) rename {contrib/libpcg-random/include => base/pcg-random}/pcg_extras.hpp (98%) rename {contrib/libpcg-random/include => base/pcg-random}/pcg_random.hpp (100%) rename {contrib/libpcg-random/include => base/pcg-random}/pcg_uint128.hpp (100%) delete mode 100644 contrib/libpcg-random/README.md diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index e202021fe48..1ead921f7e4 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -3,8 +3,9 @@ if (USE_CLANG_TIDY) endif () add_subdirectory (common) -add_subdirectory (loggers) add_subdirectory (daemon) +add_subdirectory (loggers) +add_subdirectory (pcg-random) if (USE_MYSQL) add_subdirectory (mysqlxx) diff --git a/base/pcg-random/CMakeLists.txt b/base/pcg-random/CMakeLists.txt new file mode 100644 index 00000000000..88acabba6a7 --- /dev/null +++ b/base/pcg-random/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(pcg_random INTERFACE) +target_include_directories(pcg_random INTERFACE .) diff --git a/contrib/libpcg-random/LICENSE-APACHE.txt b/base/pcg-random/LICENSE similarity index 100% rename from contrib/libpcg-random/LICENSE-APACHE.txt rename to base/pcg-random/LICENSE diff --git a/contrib/libpcg-random/README b/base/pcg-random/README similarity index 100% rename from contrib/libpcg-random/README rename to base/pcg-random/README diff --git a/contrib/libpcg-random/include/pcg_extras.hpp b/base/pcg-random/pcg_extras.hpp similarity index 98% rename from contrib/libpcg-random/include/pcg_extras.hpp rename to base/pcg-random/pcg_extras.hpp index 929c756b151..118b726dd57 100644 --- a/contrib/libpcg-random/include/pcg_extras.hpp +++ b/base/pcg-random/pcg_extras.hpp @@ -292,7 +292,7 @@ inline itype rotl(itype value, bitcount_t rot) { constexpr bitcount_t bits = sizeof(itype) * 8; constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM +#if defined(PCG_USE_ZEROCHECK_ROTATE_IDIOM) return rot ? (value << rot) | (value >> (bits - rot)) : value; #else return (value << rot) | (value >> ((- rot) & mask)); @@ -304,7 +304,7 @@ inline itype rotr(itype value, bitcount_t rot) { constexpr bitcount_t bits = sizeof(itype) * 8; constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM +#if defined(PCG_USE_ZEROCHECK_ROTATE_IDIOM) return rot ? (value >> rot) | (value << (bits - rot)) : value; #else return (value >> rot) | (value << ((- rot) & mask)); @@ -318,7 +318,7 @@ inline itype rotr(itype value, bitcount_t rot) * * These overloads will be preferred over the general template code above. */ -#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) +#if defined(PCG_USE_INLINE_ASM) && __GNUC__ && (__x86_64__ || __i386__) inline uint8_t rotr(uint8_t value, bitcount_t rot) { @@ -600,7 +600,7 @@ std::ostream& operator<<(std::ostream& out, printable_typename) { #ifdef __GNUC__ int status; char* pretty_name = - abi::__cxa_demangle(implementation_typename, NULL, NULL, &status); + abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status); if (status == 0) out << pretty_name; free(static_cast(pretty_name)); diff --git a/contrib/libpcg-random/include/pcg_random.hpp b/base/pcg-random/pcg_random.hpp similarity index 100% rename from contrib/libpcg-random/include/pcg_random.hpp rename to base/pcg-random/pcg_random.hpp diff --git a/contrib/libpcg-random/include/pcg_uint128.hpp b/base/pcg-random/pcg_uint128.hpp similarity index 100% rename from contrib/libpcg-random/include/pcg_uint128.hpp rename to base/pcg-random/pcg_uint128.hpp diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake index 8f5bebf4abe..f18b2e52576 100644 --- a/cmake/lib_name.cmake +++ b/cmake/lib_name.cmake @@ -2,4 +2,3 @@ set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide) set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src) set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion) set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) -set(PCG_RANDOM_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libpcg-random/include) diff --git a/contrib/libpcg-random/README.md b/contrib/libpcg-random/README.md deleted file mode 100644 index c6e579cc2cc..00000000000 --- a/contrib/libpcg-random/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# PCG Random Number Generation, C++ Edition - -[PCG-Random website]: http://www.pcg-random.org - -This code provides an implementation of the PCG family of random number -generators, which are fast, statistically excellent, and offer a number of -useful features. - -Full details can be found at the [PCG-Random website]. This version -of the code provides many family members -- if you just want one -simple generator, you may prefer the minimal C version of the library. - -There are two kinds of generator, normal generators and extended generators. -Extended generators provide *k* dimensional equidistribution and can perform -party tricks, but generally speaking most people only need the normal -generators. - -There are two ways to access the generators, using a convenience typedef -or by using the underlying templates directly (similar to C++11's `std::mt19937` typedef vs its `std::mersenne_twister_engine` template). For most users, the convenience typedef is what you want, and probably you're fine with `pcg32` for 32-bit numbers. If you want 64-bit numbers, either use `pcg64` (or, if you're on a 32-bit system, making 64 bits from two calls to `pcg32_k2` may be faster). - -## Documentation and Examples - -Visit [PCG-Random website] for information on how to use this library, or look -at the sample code in the `sample` directory -- hopefully it should be fairly -self explanatory. - -## Building - -The code is written in C++11, as an include-only library (i.e., there is -nothing you need to build). There are some provided demo programs and tests -however. On a Unix-style system (e.g., Linux, Mac OS X) you should be able -to just type - - make - -To build the demo programs. - -## Testing - -Run - - make test - -## Directory Structure - -The directories are arranged as follows: - -* `include` -- contains `pcg_random.hpp` and supporting include files -* `test-high` -- test code for the high-level API where the functions have - shorter, less scary-looking names. -* `sample` -- sample code, some similar to the code in `test-high` but more - human readable, some other examples too diff --git a/programs/benchmark/CMakeLists.txt b/programs/benchmark/CMakeLists.txt index 58096985037..be999aafe80 100644 --- a/programs/benchmark/CMakeLists.txt +++ b/programs/benchmark/CMakeLists.txt @@ -1,6 +1,5 @@ set(CLICKHOUSE_BENCHMARK_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Benchmark.cpp) set(CLICKHOUSE_BENCHMARK_LINK PRIVATE dbms clickhouse_aggregate_functions clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) -set(CLICKHOUSE_BENCHMARK_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) clickhouse_program_add(benchmark) diff --git a/programs/copier/CMakeLists.txt b/programs/copier/CMakeLists.txt index 5573fbc5e5d..76db6ce1ffc 100644 --- a/programs/copier/CMakeLists.txt +++ b/programs/copier/CMakeLists.txt @@ -12,6 +12,6 @@ set(CLICKHOUSE_COPIER_LINK PRIVATE clickhouse_dictionaries string_utils ${Poco_XML_LIBRARY} PUBLIC daemon) -set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) clickhouse_program_add(copier) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2393e0be18d..9949f13def0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -398,6 +398,7 @@ endif() target_link_libraries(clickhouse_common_io PUBLIC ${CITYHASH_LIBRARIES} + pcg_random PRIVATE ${Poco_XML_LIBRARY} ${ZLIB_LIBRARIES} @@ -453,9 +454,6 @@ dbms_target_link_libraries ( target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/Core/include) # uses some includes from core dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/Core/include) -target_include_directories(clickhouse_common_io SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR}) -dbms_target_include_directories(SYSTEM PUBLIC ${PCG_RANDOM_INCLUDE_DIR}) - dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) if (NOT USE_INTERNAL_LZ4_LIBRARY AND LZ4_INCLUDE_DIR) From 871d3f2aa0c77455cf31c47f40a1ad4904304403 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 13 Apr 2020 18:55:48 +0300 Subject: [PATCH 365/752] =?UTF-8?q?Move=20contrib/widecharwidth=20?= =?UTF-8?q?=E2=86=92=20base/widechar=5Fwidth?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/CMakeLists.txt | 1 + {contrib/widecharwidth => base/widechar_width}/CMakeLists.txt | 0 {contrib/widecharwidth => base/widechar_width}/LICENSE | 0 {contrib/widecharwidth => base/widechar_width}/README.md | 0 .../widecharwidth => base/widechar_width}/widechar_width.cpp | 0 {contrib/widecharwidth => base/widechar_width}/widechar_width.h | 0 contrib/CMakeLists.txt | 1 - 7 files changed, 1 insertion(+), 1 deletion(-) rename {contrib/widecharwidth => base/widechar_width}/CMakeLists.txt (100%) rename {contrib/widecharwidth => base/widechar_width}/LICENSE (100%) rename {contrib/widecharwidth => base/widechar_width}/README.md (100%) rename {contrib/widecharwidth => base/widechar_width}/widechar_width.cpp (100%) rename {contrib/widecharwidth => base/widechar_width}/widechar_width.h (100%) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index 1ead921f7e4..cfa54fe2ca4 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory (common) add_subdirectory (daemon) add_subdirectory (loggers) add_subdirectory (pcg-random) +add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) diff --git a/contrib/widecharwidth/CMakeLists.txt b/base/widechar_width/CMakeLists.txt similarity index 100% rename from contrib/widecharwidth/CMakeLists.txt rename to base/widechar_width/CMakeLists.txt diff --git a/contrib/widecharwidth/LICENSE b/base/widechar_width/LICENSE similarity index 100% rename from contrib/widecharwidth/LICENSE rename to base/widechar_width/LICENSE diff --git a/contrib/widecharwidth/README.md b/base/widechar_width/README.md similarity index 100% rename from contrib/widecharwidth/README.md rename to base/widechar_width/README.md diff --git a/contrib/widecharwidth/widechar_width.cpp b/base/widechar_width/widechar_width.cpp similarity index 100% rename from contrib/widecharwidth/widechar_width.cpp rename to base/widechar_width/widechar_width.cpp diff --git a/contrib/widecharwidth/widechar_width.h b/base/widechar_width/widechar_width.h similarity index 100% rename from contrib/widecharwidth/widechar_width.h rename to base/widechar_width/widechar_width.h diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9c0a0aac0dd..1d438e29dae 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -333,6 +333,5 @@ add_subdirectory(grpc-cmake) add_subdirectory(replxx-cmake) add_subdirectory(FastMemcpy) -add_subdirectory(widecharwidth) add_subdirectory(consistent-hashing) add_subdirectory(consistent-hashing-sumbur) From 68e82c11e07e9f6d95d68aa8b8c2a288be78bac6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 13 Apr 2020 18:58:29 +0300 Subject: [PATCH 366/752] fix build --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 3e9f476e5ec..2eee24be477 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1808,7 +1808,7 @@ bool ReplicatedMergeTreeMergePredicate::canMergeSinglePart(const MergeTreeData:: if (containing_part != part->name) { if (out_reason) - *out_reason = "Part " + part->name >+ " has already been assigned a merge into " + containing_part; + *out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; return false; } From eacc04fc5cfbdf5cdb8bce2c9d5e9d1040e5bb07 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 19:33:15 +0300 Subject: [PATCH 367/752] Fix race after cancel of RemoteBlockInputStream. --- src/DataStreams/RemoteBlockInputStream.cpp | 13 +++++++++---- src/DataStreams/RemoteBlockInputStream.h | 3 ++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/DataStreams/RemoteBlockInputStream.cpp b/src/DataStreams/RemoteBlockInputStream.cpp index 9d9f629d463..6be189503e9 100644 --- a/src/DataStreams/RemoteBlockInputStream.cpp +++ b/src/DataStreams/RemoteBlockInputStream.cpp @@ -359,12 +359,17 @@ void RemoteBlockInputStream::sendQuery() void RemoteBlockInputStream::tryCancel(const char * reason) { - bool old_val = false; - if (!was_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) - return; + { + std::lock_guard guard(was_cancelled_mutex); + + if (was_cancelled) + return; + + was_cancelled = true; + multiplexed_connections->sendCancel(); + } LOG_TRACE(log, "(" << multiplexed_connections->dumpAddresses() << ") " << reason); - multiplexed_connections->sendCancel(); } bool RemoteBlockInputStream::isQueryPending() const diff --git a/src/DataStreams/RemoteBlockInputStream.h b/src/DataStreams/RemoteBlockInputStream.h index 783811f2521..66b1ebbb6c3 100644 --- a/src/DataStreams/RemoteBlockInputStream.h +++ b/src/DataStreams/RemoteBlockInputStream.h @@ -135,7 +135,8 @@ private: * - data size is already satisfactory (when using LIMIT, for example) * - an exception was thrown from client side */ - std::atomic was_cancelled { false }; + bool was_cancelled { false }; + std::mutex was_cancelled_mutex; /** An exception from replica was received. No need in receiving more packets or * requesting to cancel query execution From 0b7ae2c2bf79b7da1a7977f8ca761e108d224fd5 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Mon, 13 Apr 2020 20:03:11 +0300 Subject: [PATCH 368/752] working version --- src/Interpreters/HashJoin.cpp | 19 ++-- src/Interpreters/TableJoin.cpp | 32 +++--- src/Interpreters/TableJoin.h | 2 + .../0_stateless/00561_storage_join.sql | 6 +- .../01115_join_with_dictionary.reference | 103 ++++++++++++++++++ .../01115_join_with_dictionary.sql | 90 +++++++++++++++ .../00065_loyalty_with_storage_join.sql | 11 +- 7 files changed, 231 insertions(+), 32 deletions(-) create mode 100644 tests/queries/0_stateless/01115_join_with_dictionary.reference create mode 100644 tests/queries/0_stateless/01115_join_with_dictionary.sql diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 1c0adb96f66..f58efa1920f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -295,17 +295,22 @@ public: : key_columns(key_columns_) {} - FindResult findKey(const DictionaryReader & reader, size_t i, const Arena &) + FindResult findKey(const TableJoin & table_join, size_t row, const Arena &) { + const DictionaryReader & reader = *table_join.dictionary_reader; if (!read_result) { reader.readKeys(*key_columns[0], key_columns[0]->size(), read_result, found, positions); result.block = &read_result; - /// TODO: check types and correct nullability + + if (table_join.forceNullableRight()) + for (auto & column : read_result) + if (table_join.rightBecomeNullable(column.type)) + JoinCommon::convertColumnToNullable(column); } - result.row_num = positions[i]; - return FindResult(&result, found[i]); + result.row_num = positions[row]; + return FindResult(&result, found[row]); } private: @@ -985,14 +990,14 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_ } template -IColumn::Filter dictionaryJoinRightColumns(const DictionaryReader & reader, AddedColumns & added_columns, const ConstNullMapPtr & null_map) +IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedColumns & added_columns, const ConstNullMapPtr & null_map) { if constexpr (KIND == ASTTableJoin::Kind::Left && (STRICTNESS == ASTTableJoin::Strictness::Any || STRICTNESS == ASTTableJoin::Strictness::Semi || STRICTNESS == ASTTableJoin::Strictness::Anti)) { - return joinRightColumnsSwitchNullability(reader, added_columns, null_map); + return joinRightColumnsSwitchNullability(table_join, added_columns, null_map); } throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR); @@ -1059,7 +1064,7 @@ void HashJoin::joinBlockImpl( added_columns.need_filter = need_filter || has_required_right_keys; IColumn::Filter row_filter = overDictionary() ? - dictionaryJoinRightColumns(*table_join->dictionary_reader, added_columns, null_map) : + dictionaryJoinRightColumns(*table_join, added_columns, null_map) : switchJoinRightColumns(maps_, added_columns, data->type, null_map); for (size_t i = 0; i < added_columns.size(); ++i) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5e57c740bf1..4c75b23a5eb 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -155,22 +155,26 @@ NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names return getNamesWithAliases(required_columns); } +bool TableJoin::leftBecomeNullable(const DataTypePtr & column_type) const +{ + return forceNullableLeft() && column_type->canBeInsideNullable(); +} + +bool TableJoin::rightBecomeNullable(const DataTypePtr & column_type) const +{ + return forceNullableRight() && column_type->canBeInsideNullable(); +} + void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column) { - if (join_use_nulls && isLeftOrFull(table_join.kind)) - { - auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type; - columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type))); - } + if (rightBecomeNullable(joined_column.type)) + columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, makeNullable(joined_column.type))); else columns_added_by_join.push_back(joined_column); } void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const { - bool right_or_full_join = isRightOrFull(table_join.kind); - bool left_or_full_join = isLeftOrFull(table_join.kind); - for (auto & col : sample_block) { /// Materialize column. @@ -179,9 +183,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons if (col.column) col.column = nullptr; - bool make_nullable = join_use_nulls && right_or_full_join; - - if (make_nullable && col.type->canBeInsideNullable()) + if (leftBecomeNullable(col.type)) col.type = makeNullable(col.type); } @@ -189,9 +191,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons { auto res_type = col.type; - bool make_nullable = join_use_nulls && left_or_full_join; - - if (make_nullable && res_type->canBeInsideNullable()) + if (rightBecomeNullable(res_type)) res_type = makeNullable(res_type); sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name)); @@ -240,6 +240,10 @@ bool TableJoin::allowMergeJoin() const bool TableJoin::allowDictJoin(const String & dict_key, const Block & sample_block, Names & names, NamesAndTypesList & result_columns) const { + /// Support ALL INNER, [ANY | ALL | SEMI | ANTI] LEFT + if (!isLeft(kind()) && !(isInner(kind()) && strictness() == ASTTableJoin::Strictness::All)) + return false; + const Names & right_keys = keyNamesRight(); if (right_keys.size() != 1) return false; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index c8c51918e27..2047f935966 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -122,6 +122,8 @@ public: size_t rightKeyInclusion(const String & name) const; NameSet requiredRightKeys() const; + bool leftBecomeNullable(const DataTypePtr & column_type) const; + bool rightBecomeNullable(const DataTypePtr & column_type) const; void addJoinedColumn(const NameAndTypePair & joined_column); void addJoinedColumnsAndCorrectNullability(Block & sample_block) const; diff --git a/tests/queries/0_stateless/00561_storage_join.sql b/tests/queries/0_stateless/00561_storage_join.sql index 08f76815702..62ca80d31fe 100644 --- a/tests/queries/0_stateless/00561_storage_join.sql +++ b/tests/queries/0_stateless/00561_storage_join.sql @@ -1,5 +1,3 @@ -SET any_join_distinct_right_table_keys = 1; - drop table IF EXISTS joinbug; CREATE TABLE joinbug ( @@ -21,7 +19,7 @@ CREATE TABLE joinbug_join ( val UInt64, val2 Int32, created UInt64 -) ENGINE = Join(ANY, INNER, id2); +) ENGINE = Join(SEMI, LEFT, id2); insert into joinbug_join (id, id2, val, val2, created) select id, id2, val, val2, created @@ -36,7 +34,7 @@ select id, id2, val, val2, created from ( SELECT toUInt64(arrayJoin(range(50))) AS id2 ) js1 -ANY INNER JOIN joinbug_join using id2; +SEMI LEFT JOIN joinbug_join using id2; DROP TABLE joinbug; DROP TABLE joinbug_join; diff --git a/tests/queries/0_stateless/01115_join_with_dictionary.reference b/tests/queries/0_stateless/01115_join_with_dictionary.reference new file mode 100644 index 00000000000..f909a3d61f5 --- /dev/null +++ b/tests/queries/0_stateless/01115_join_with_dictionary.reference @@ -0,0 +1,103 @@ +flat: left on +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +3 3 3 3 3 +4 0 0 0 +flat: left +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +4 0 0 +flat: any left +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +4 0 0 +flat: semi left +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +flat: anti left +4 0 0 +flat: inner +0 0 0 0 +1 1 1 1 +flat: inner on +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +3 3 3 3 3 +hashed: left on +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +3 3 3 3 3 +4 \N \N \N \N +hashed: left +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +4 \N \N \N +hashed: any left +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +4 \N \N \N +hashed: semi left +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +hashed: anti left +4 \N \N \N +hashed: inner +0 0 0 0 +1 1 1 1 +hashed: inner on +0 0 0 0 0 +1 1 1 1 1 +2 2 2 2 2 +3 3 3 3 3 +complex_cache (smoke) +0 \N \N \N \N +1 \N \N \N \N +2 \N \N \N \N +3 \N \N \N \N +4 \N \N \N \N +not optimized (smoke) +0 0 0 0 +1 1 1 1 +2 2 2 2 +3 3 3 3 +- +0 0 0 0 0 +1 1 1 1 1 +\N 2 2 2 2 +\N 3 3 3 3 +- +2 2 2 2 +3 3 3 3 +4 \N \N \N +5 \N \N \N +\N 0 0 0 +\N 1 1 1 +- +0 0 0 0 +1 1 1 1 +- +0 0 0 0 +1 1 1 1 +3 3 3 3 +2 2 2 2 +- +0 0 0 0 +1 1 1 1 +- +3 3 3 3 +2 2 2 2 diff --git a/tests/queries/0_stateless/01115_join_with_dictionary.sql b/tests/queries/0_stateless/01115_join_with_dictionary.sql new file mode 100644 index 00000000000..65704f2b3eb --- /dev/null +++ b/tests/queries/0_stateless/01115_join_with_dictionary.sql @@ -0,0 +1,90 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS db_01115; +CREATE DATABASE db_01115 Engine = Ordinary; + +USE db_01115; + +DROP DICTIONARY IF EXISTS dict_flat; +DROP DICTIONARY IF EXISTS dict_hashed; +DROP DICTIONARY IF EXISTS dict_complex_cache; + +CREATE TABLE t1 (key UInt64, a UInt8, b String, c Float64) ENGINE = MergeTree() ORDER BY key; +INSERT INTO t1 SELECT number, number, toString(number), number from numbers(4); + +CREATE DICTIONARY dict_flat (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' PASSWORD '' DB 'db_01115')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(FLAT()); + +CREATE DICTIONARY db_01115.dict_hashed (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' DB 'db_01115')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED()); + +CREATE DICTIONARY dict_complex_cache (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0) +PRIMARY KEY key, b +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' DB 'db_01115')) +LIFETIME(MIN 1 MAX 10) +LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 1)); + +SET join_use_nulls = 0; + +SELECT 'flat: left on'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_flat d ON s1.key = d.key ORDER BY s1.key; +SELECT 'flat: left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_flat d USING(key) ORDER BY key; +SELECT 'flat: any left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANY LEFT JOIN dict_flat d USING(key) ORDER BY key; +SELECT 'flat: semi left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 SEMI JOIN dict_flat d USING(key) ORDER BY key; +SELECT 'flat: anti left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANTI JOIN dict_flat d USING(key) ORDER BY key; +SELECT 'flat: inner'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 JOIN dict_flat d USING(key); +SELECT 'flat: inner on'; +SELECT * FROM (SELECT number AS k FROM numbers(100)) s1 JOIN dict_flat d ON k = key ORDER BY k; + +SET join_use_nulls = 1; + +SELECT 'hashed: left on'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_hashed d ON s1.key = d.key ORDER BY s1.key; +SELECT 'hashed: left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_hashed d USING(key) ORDER BY key; +SELECT 'hashed: any left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANY LEFT JOIN dict_hashed d USING(key) ORDER BY key; +SELECT 'hashed: semi left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 SEMI JOIN dict_hashed d USING(key) ORDER BY key; +SELECT 'hashed: anti left'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANTI JOIN dict_hashed d USING(key) ORDER BY key; +SELECT 'hashed: inner'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 JOIN dict_hashed d USING(key); +SELECT 'hashed: inner on'; +SELECT * FROM (SELECT number AS k FROM numbers(100)) s1 JOIN dict_hashed d ON k = key ORDER BY k; + +SELECT 'complex_cache (smoke)'; +SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_complex_cache d ON s1.key = d.key ORDER BY s1.key; + +SELECT 'not optimized (smoke)'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 RIGHT JOIN dict_flat d USING(key) ORDER BY key; +SELECT '-'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 RIGHT JOIN dict_flat d ON s1.key = d.key ORDER BY d.key; +SELECT '-'; +SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s1 FULL JOIN dict_flat d USING(key) ORDER BY s1.key, d.key; +SELECT '-'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY INNER JOIN dict_flat d USING(key); +SELECT '-'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY RIGHT JOIN dict_flat d USING(key); +SELECT '-'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 SEMI RIGHT JOIN dict_flat d USING(key); +SELECT '-'; +SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANTI RIGHT JOIN dict_flat d USING(key); + +DROP DICTIONARY dict_flat; +DROP DICTIONARY dict_hashed; +DROP DICTIONARY dict_complex_cache; + +DROP TABLE t1; +DROP DATABASE IF EXISTS db_01115; diff --git a/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql b/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql index 2fa337b6ebc..15a2a75cf58 100644 --- a/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql +++ b/tests/queries/1_stateful/00065_loyalty_with_storage_join.sql @@ -1,10 +1,7 @@ -SET any_join_distinct_right_table_keys = 1; - USE test; DROP TABLE IF EXISTS join; -CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID) -SETTINGS any_join_distinct_right_table_keys = 1; +CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID); INSERT INTO join SELECT @@ -20,17 +17,17 @@ HAVING (yandex + google) > 10; SELECT loyalty, count() -FROM hits ANY INNER JOIN join USING UserID +FROM hits SEMI LEFT JOIN join USING UserID GROUP BY loyalty ORDER BY loyalty ASC; DETACH TABLE join; -ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID); +ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID); SELECT loyalty, count() -FROM hits ANY INNER JOIN join USING UserID +FROM hits SEMI LEFT JOIN join USING UserID GROUP BY loyalty ORDER BY loyalty ASC; From 0aa4c85602df716a3fa4cbda23b9866e26a22dcd Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Mon, 13 Apr 2020 20:04:17 +0300 Subject: [PATCH 369/752] Fixed style checker complaint --- src/Interpreters/MutationsInterpreter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 985fda3aac7..2d3c01292b8 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -40,7 +40,8 @@ namespace class FirstNonDeterministicFuncMatcher { public: - struct Data { + struct Data + { const Context & context; std::optional nondeterministic_function_name; }; @@ -68,8 +69,7 @@ public: } }; -using FirstNonDeterministicFuncFinder = - InDepthNodeVisitor; +using FirstNonDeterministicFuncFinder = InDepthNodeVisitor; std::optional findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) { From df01227bd24ead0d7568c47ef122ae3299323c76 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Mon, 13 Apr 2020 20:36:07 +0300 Subject: [PATCH 370/752] move DictionaryReader code out of header --- src/Interpreters/DictionaryReader.cpp | 167 ++++++++++++++++++++++++++ src/Interpreters/DictionaryReader.h | 158 ++---------------------- src/Interpreters/HashJoin.cpp | 2 +- 3 files changed, 176 insertions(+), 151 deletions(-) create mode 100644 src/Interpreters/DictionaryReader.cpp diff --git a/src/Interpreters/DictionaryReader.cpp b/src/Interpreters/DictionaryReader.cpp new file mode 100644 index 00000000000..301fe9d57c6 --- /dev/null +++ b/src/Interpreters/DictionaryReader.cpp @@ -0,0 +1,167 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; +} + + +DictionaryReader::FunctionWrapper::FunctionWrapper(FunctionOverloadResolverPtr resolver, const ColumnsWithTypeAndName & arguments, + Block & block, const ColumnNumbers & arg_positions_, const String & column_name, + TypeIndex expected_type) + : arg_positions(arg_positions_) + , result_pos(block.columns()) +{ + FunctionBasePtr prepared_function = resolver->build(arguments); + + ColumnWithTypeAndName result; + result.name = "get_" + column_name; + result.type = prepared_function->getReturnType(); + if (result.type->getTypeId() != expected_type) + throw Exception("Type mismatch in dictionary reader for: " + column_name, ErrorCodes::TYPE_MISMATCH); + block.insert(result); + + function = prepared_function->prepare(block, arg_positions, result_pos); +} + +static constexpr const size_t key_size = 1; + +DictionaryReader::DictionaryReader(const String & dictionary_name, const Names & src_column_names, const NamesAndTypesList & result_columns, + const Context & context) + : result_header(makeResultBlock(result_columns)) + , key_position(key_size + result_header.columns()) +{ + if (src_column_names.size() != result_columns.size()) + throw Exception("Columns number mismatch in dictionary reader", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); + + ColumnWithTypeAndName dict_name; + ColumnWithTypeAndName key; + ColumnWithTypeAndName column_name; + + { + dict_name.name = "dict"; + dict_name.type = std::make_shared(); + dict_name.column = dict_name.type->createColumnConst(1, dictionary_name); + + /// TODO: composite key (key_size > 1) + key.name = "key"; + key.type = std::make_shared(); + + column_name.name = "column"; + column_name.type = std::make_shared(); + } + + /// dictHas('dict_name', id) + ColumnsWithTypeAndName arguments_has; + arguments_has.push_back(dict_name); + arguments_has.push_back(key); + + /// dictGet('dict_name', 'attr_name', id) + ColumnsWithTypeAndName arguments_get; + arguments_get.push_back(dict_name); + arguments_get.push_back(column_name); + arguments_get.push_back(key); + + sample_block.insert(dict_name); + + for (auto & columns_name : src_column_names) + { + ColumnWithTypeAndName name; + name.name = "col_" + columns_name; + name.type = std::make_shared(); + name.column = name.type->createColumnConst(1, columns_name); + + sample_block.insert(name); + } + + sample_block.insert(key); + + ColumnNumbers positions_has{0, key_position}; + function_has = std::make_unique(FunctionFactory::instance().get("dictHas", context), + arguments_has, sample_block, positions_has, "has", DataTypeUInt8().getTypeId()); + functions_get.reserve(result_header.columns()); + + for (size_t i = 0; i < result_header.columns(); ++i) + { + size_t column_name_pos = key_size + i; + auto & column = result_header.getByPosition(i); + arguments_get[1].column = DataTypeString().createColumnConst(1, src_column_names[i]); + ColumnNumbers positions_get{0, column_name_pos, key_position}; + functions_get.emplace_back( + FunctionWrapper(FunctionFactory::instance().get("dictGet", context), + arguments_get, sample_block, positions_get, column.name, column.type->getTypeId())); + } +} + +void DictionaryReader::readKeys(const IColumn & keys, Block & out_block, ColumnVector::Container & found, + std::vector & positions) const +{ + Block working_block = sample_block; + size_t has_position = key_position + 1; + size_t size = keys.size(); + + /// set keys for dictHas() + ColumnWithTypeAndName & key_column = working_block.getByPosition(key_position); + key_column.column = keys.cloneResized(size); /// just a copy we cannot avoid + + /// calculate and extract dictHas() + function_has->execute(working_block, size); + ColumnWithTypeAndName & has_column = working_block.getByPosition(has_position); + auto mutable_has = (*std::move(has_column.column)).mutate(); + found.swap(typeid_cast &>(*mutable_has).getData()); + has_column.column = nullptr; + + /// set mapping form source keys to resulting rows in output block + positions.clear(); + positions.resize(size, 0); + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + if (found[i]) + positions[i] = pos++; + + /// set keys for dictGet(): remove not found keys + key_column.column = key_column.column->filter(found, -1); + size_t rows = key_column.column->size(); + + /// calculate dictGet() + for (auto & func : functions_get) + func.execute(working_block, rows); + + /// make result: copy header block with correct names and move data columns + out_block = result_header.cloneEmpty(); + size_t first_get_position = has_position + 1; + for (size_t i = 0; i < out_block.columns(); ++i) + { + auto & src_column = working_block.getByPosition(first_get_position + i); + auto & dst_column = out_block.getByPosition(i); + dst_column.column = src_column.column; + src_column.column = nullptr; + } +} + +Block DictionaryReader::makeResultBlock(const NamesAndTypesList & names) +{ + Block block; + for (auto & nm : names) + { + ColumnWithTypeAndName column{nullptr, nm.type, nm.name}; + if (column.type->isNullable()) + column.type = typeid_cast(*column.type).getNestedType(); + block.insert(std::move(column)); + } + return block; +} + +} diff --git a/src/Interpreters/DictionaryReader.h b/src/Interpreters/DictionaryReader.h index 823a3690669..92e4924ae80 100644 --- a/src/Interpreters/DictionaryReader.h +++ b/src/Interpreters/DictionaryReader.h @@ -1,25 +1,16 @@ #pragma once -#include #include -#include -#include -#include #include -#include -#include -#include -#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; - extern const int TYPE_MISMATCH; -} +class Context; +/// Read block of required columns from Dictionary by UInt64 key column. Rename columns if needed. +/// Current implementation uses dictHas() + N * dictGet() functions. class DictionaryReader { public: @@ -30,21 +21,7 @@ public: size_t result_pos = 0; FunctionWrapper(FunctionOverloadResolverPtr resolver, const ColumnsWithTypeAndName & arguments, Block & block, - const ColumnNumbers & arg_positions_, const String & column_name, TypeIndex expected_type) - : arg_positions(arg_positions_) - , result_pos(block.columns()) - { - FunctionBasePtr prepared_function = resolver->build(arguments); - - ColumnWithTypeAndName result; - result.name = "get_" + column_name; - result.type = prepared_function->getReturnType(); - if (result.type->getTypeId() != expected_type) - throw Exception("Type mismatch in dictionary reader for: " + column_name, ErrorCodes::TYPE_MISMATCH); - block.insert(result); - - function = prepared_function->prepare(block, arg_positions, result_pos); - } + const ColumnNumbers & arg_positions_, const String & column_name, TypeIndex expected_type); void execute(Block & block, size_t rows) const { @@ -53,116 +30,8 @@ public: }; DictionaryReader(const String & dictionary_name, const Names & src_column_names, const NamesAndTypesList & result_columns, - const Context & context, size_t key_size = 1) - : result_header(makeResultBlock(result_columns)) - , key_position(key_size + result_header.columns()) - { - if (src_column_names.size() != result_columns.size()) - throw Exception("Columns number mismatch in dictionary reader", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - - ColumnWithTypeAndName dict_name; - ColumnWithTypeAndName key; - ColumnWithTypeAndName column_name; - - { - dict_name.name = "dict"; - dict_name.type = std::make_shared(); - dict_name.column = dict_name.type->createColumnConst(1, dictionary_name); - - /// TODO: composite key (key_size > 1) - key.name = "key"; - key.type = std::make_shared(); - - column_name.name = "column"; - column_name.type = std::make_shared(); - } - - /// dictHas('dict_name', id) - ColumnsWithTypeAndName arguments_has; - arguments_has.push_back(dict_name); - arguments_has.push_back(key); - - /// dictGet('dict_name', 'attr_name', id) - ColumnsWithTypeAndName arguments_get; - arguments_get.push_back(dict_name); - arguments_get.push_back(column_name); - arguments_get.push_back(key); - - sample_block.insert(dict_name); - - for (auto & columns_name : src_column_names) - { - ColumnWithTypeAndName name; - name.name = "col_" + columns_name; - name.type = std::make_shared(); - name.column = name.type->createColumnConst(1, columns_name); - - sample_block.insert(name); - } - - sample_block.insert(key); - - ColumnNumbers positions_has{0, key_position}; - function_has = std::make_unique(FunctionFactory::instance().get("dictHas", context), - arguments_has, sample_block, positions_has, "has", DataTypeUInt8().getTypeId()); - functions_get.reserve(result_header.columns()); - - for (size_t i = 0; i < result_header.columns(); ++i) - { - size_t column_name_pos = key_size + i; - auto & column = result_header.getByPosition(i); - arguments_get[1].column = DataTypeString().createColumnConst(1, src_column_names[i]); - ColumnNumbers positions_get{0, column_name_pos, key_position}; - functions_get.emplace_back( - FunctionWrapper(FunctionFactory::instance().get("dictGet", context), - arguments_get, sample_block, positions_get, column.name, column.type->getTypeId())); - } - } - - void readKeys(const IColumn & keys, size_t size, Block & out_block, ColumnVector::Container & found, - std::vector & positions) const - { - Block working_block = sample_block; - size_t has_position = key_position + 1; - - /// set keys for dictHas() - ColumnWithTypeAndName & key_column = working_block.getByPosition(key_position); - key_column.column = keys.cloneResized(size); /// just a copy we cannot avoid - - /// calculate and extract dictHas() - function_has->execute(working_block, size); - ColumnWithTypeAndName & has_column = working_block.getByPosition(has_position); - auto mutable_has = (*std::move(has_column.column)).mutate(); - found.swap(typeid_cast &>(*mutable_has).getData()); - has_column.column = nullptr; - - /// set mapping form source keys to resulting rows in output block - positions.clear(); - positions.resize(size, 0); - size_t pos = 0; - for (size_t i = 0; i < size; ++i) - if (found[i]) - positions[i] = pos++; - - /// set keys for dictGet(): remove not found keys - key_column.column = key_column.column->filter(found, -1); - size_t rows = key_column.column->size(); - - /// calculate dictGet() - for (auto & func : functions_get) - func.execute(working_block, rows); - - /// make result: copy header block with correct names and move data columns - out_block = result_header.cloneEmpty(); - size_t first_get_position = has_position + 1; - for (size_t i = 0; i < out_block.columns(); ++i) - { - auto & src_column = working_block.getByPosition(first_get_position + i); - auto & dst_column = out_block.getByPosition(i); - dst_column.column = src_column.column; - src_column.column = nullptr; - } - } + const Context & context); + void readKeys(const IColumn & keys, Block & out_block, ColumnVector::Container & found, std::vector & positions) const; private: Block result_header; @@ -171,18 +40,7 @@ private: std::unique_ptr function_has; std::vector functions_get; - static Block makeResultBlock(const NamesAndTypesList & names) - { - Block block; - for (auto & nm : names) - { - ColumnWithTypeAndName column{nullptr, nm.type, nm.name}; - if (column.type->isNullable()) - column.type = typeid_cast(*column.type).getNestedType(); - block.insert(std::move(column)); - } - return block; - } + static Block makeResultBlock(const NamesAndTypesList & names); }; } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index f58efa1920f..22a8a87cbe0 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -300,7 +300,7 @@ public: const DictionaryReader & reader = *table_join.dictionary_reader; if (!read_result) { - reader.readKeys(*key_columns[0], key_columns[0]->size(), read_result, found, positions); + reader.readKeys(*key_columns[0], read_result, found, positions); result.block = &read_result; if (table_join.forceNullableRight()) From 45e85724a61b6ea80f065245b84f0ecdfbc83f43 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 13 Apr 2020 21:33:25 +0300 Subject: [PATCH 371/752] Update MutationsInterpreter.cpp --- src/Interpreters/MutationsInterpreter.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 2d3c01292b8..1682bc11f80 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -59,6 +59,9 @@ public: if (const auto * function = typeid_cast(node.get())) { + /// Lambda functions also may be non-deterministic. But we skip them for simplicity. + /// Replication will work correctly even if non-deterministic function is used, + /// it will select any of the results and discard other. if (function->name != "lambda") { const auto func = FunctionFactory::instance().get(function->name, data.context); From 586c295b944ed2c78c46f0ab64d78826093f5134 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 21:51:17 +0300 Subject: [PATCH 372/752] Added SummingSortedAlgorithm --- .../Merges/AggregatingSortedAlgorithm.cpp | 48 +- .../Merges/AggregatingSortedAlgorithm.h | 5 - .../Merges/SummingSortedAlgorithm.cpp | 594 +++++++++++++++++ .../Merges/SummingSortedAlgorithm.h | 152 +++++ .../Merges/SummingSortedTransform.cpp | 626 ------------------ .../Merges/SummingSortedTransform.h | 158 +---- 6 files changed, 786 insertions(+), 797 deletions(-) create mode 100644 src/Processors/Merges/SummingSortedAlgorithm.cpp create mode 100644 src/Processors/Merges/SummingSortedAlgorithm.h delete mode 100644 src/Processors/Merges/SummingSortedTransform.cpp diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp index e6138ecc6ab..85d45afd168 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -136,8 +138,9 @@ static MutableColumns getMergedColumns(const Block & header, const AggregatingSo for (auto & desc : def.columns_to_simple_aggregate) { - auto & type = header.getByPosition(desc.column_number).type; - columns[desc.column_number] = recursiveRemoveLowCardinality(type)->createColumn(); + auto & type = desc.nested_type ? desc.nested_type + : desc.real_type; + columns[desc.column_number] = type->createColumn(); } for (size_t i = 0; i < columns.size(); ++i) @@ -147,7 +150,8 @@ static MutableColumns getMergedColumns(const Block & header, const AggregatingSo return columns; } -static void prepareChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) +/// Remove constants and LowCardinality for SimpleAggregateFunction +static void preprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) { auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); @@ -162,6 +166,25 @@ static void prepareChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Column chunk.setColumns(std::move(columns), num_rows); } +/// Return back LowCardinality for SimpleAggregateFunction +static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def) +{ + size_t num_rows = chunk.getNumRows(); + auto columns_ = chunk.detachColumns(); + + for (auto & desc : def.columns_to_simple_aggregate) + { + if (desc.nested_type) + { + auto & from_type = desc.nested_type; + auto & to_type = desc.real_type; + columns_[desc.column_number] = recursiveTypeConversion(columns_[desc.column_number], from_type, to_type); + } + } + + chunk.setColumns(std::move(columns_), num_rows); +} + AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) @@ -226,21 +249,8 @@ Chunk AggregatingSortedAlgorithm::AggregatingMergedData::pull() throw Exception("Can't pull chunk because group was not finished.", ErrorCodes::LOGICAL_ERROR); auto chunk = MergedData::pull(); + postprocessChunk(chunk, def); - size_t num_rows = chunk.getNumRows(); - auto columns_ = chunk.detachColumns(); - - for (auto & desc : def.columns_to_simple_aggregate) - { - if (desc.nested_type) - { - auto & from_type = desc.nested_type; - auto & to_type = desc.real_type; - columns_[desc.column_number] = recursiveTypeConversion(columns_[desc.column_number], from_type, to_type); - } - } - - chunk.setColumns(std::move(columns_), num_rows); initAggregateDescription(); return chunk; @@ -269,14 +279,14 @@ void AggregatingSortedAlgorithm::initialize(Chunks chunks) { for (auto & chunk : chunks) if (chunk) - prepareChunk(chunk, columns_definition); + preprocessChunk(chunk, columns_definition); initializeQueue(std::move(chunks)); } void AggregatingSortedAlgorithm::consume(Chunk chunk, size_t source_num) { - prepareChunk(chunk, columns_definition); + preprocessChunk(chunk, columns_definition); updateCursor(std::move(chunk), source_num); } diff --git a/src/Processors/Merges/AggregatingSortedAlgorithm.h b/src/Processors/Merges/AggregatingSortedAlgorithm.h index ad5644f7189..2457bb54e5e 100644 --- a/src/Processors/Merges/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/AggregatingSortedAlgorithm.h @@ -2,11 +2,6 @@ #include #include -#include -#include -#include -#include -#include namespace DB { diff --git a/src/Processors/Merges/SummingSortedAlgorithm.cpp b/src/Processors/Merges/SummingSortedAlgorithm.cpp new file mode 100644 index 00000000000..357ecc7114e --- /dev/null +++ b/src/Processors/Merges/SummingSortedAlgorithm.cpp @@ -0,0 +1,594 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CORRUPTED_DATA; +} + +static bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) +{ + for (auto & desc : description) + if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) + return true; + + return false; +} + +/// Returns true if merge result is not empty +static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row & row, SortCursor & cursor) +{ + /// Strongly non-optimal. + + Row & left = row; + Row right(left.size()); + + for (size_t col_num : desc.key_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + for (size_t col_num : desc.val_col_nums) + right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + + auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & + { + return matrix[i].get()[j]; + }; + + auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array + { + size_t size = col_nums.size(); + Array res(size); + for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) + res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); + return res; + }; + + std::map merged; + + auto accumulate = [](Array & dst, const Array & src) + { + bool has_non_zero = false; + size_t size = dst.size(); + for (size_t i = 0; i < size; ++i) + if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) + has_non_zero = true; + return has_non_zero; + }; + + auto merge = [&](const Row & matrix) + { + size_t rows = matrix[desc.key_col_nums[0]].get().size(); + + for (size_t j = 0; j < rows; ++j) + { + Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); + Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); + + auto it = merged.find(key); + if (merged.end() == it) + merged.emplace(std::move(key), std::move(value)); + else + { + if (!accumulate(it->second, value)) + merged.erase(it); + } + } + }; + + merge(left); + merge(right); + + for (size_t col_num : desc.key_col_nums) + row[col_num] = Array(merged.size()); + for (size_t col_num : desc.val_col_nums) + row[col_num] = Array(merged.size()); + + size_t row_num = 0; + for (const auto & key_value : merged) + { + for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + + for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) + row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + + ++row_num; + } + + return row_num != 0; +} + +static SummingSortedAlgorithm::ColumnsDefinition defineColumns( + const Block & header, + const SortDescription & description, + const Names & column_names_to_sum) +{ + size_t num_columns = header.columns(); + SummingSortedAlgorithm::ColumnsDefinition def; + + /// name of nested structure -> the column numbers that refer to it. + std::unordered_map> discovered_maps; + + /** Fill in the column numbers, which must be summed. + * This can only be numeric columns that are not part of the sort key. + * If a non-empty column_names_to_sum is specified, then we only take these columns. + * Some columns from column_names_to_sum may not be found. This is ignored. + */ + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// Discover nested Maps and find columns for summation + if (typeid_cast(column.type.get())) + { + const auto map_name = Nested::extractTableName(column.name); + /// if nested table name ends with `Map` it is a possible candidate for special handling + if (map_name == column.name || !endsWith(map_name, "Map")) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + discovered_maps[map_name].emplace_back(i); + } + else + { + bool is_agg_func = WhichDataType(column.type).isAggregateFunction(); + + /// There are special const columns for example after prewhere sections. + if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column)) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + /// Are they inside the PK? + if (isInPrimaryKey(description, column.name, i)) + { + def.column_numbers_not_to_aggregate.push_back(i); + continue; + } + + if (column_names_to_sum.empty() + || column_names_to_sum.end() != + std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) + { + // Create aggregator to sum this column + SummingSortedAlgorithm::AggregateDescription desc; + desc.is_agg_func_type = is_agg_func; + desc.column_numbers = {i}; + + if (!is_agg_func) + { + desc.init("sumWithOverflow", {column.type}); + } + + def.columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Column is not going to be summed, use last value + def.column_numbers_not_to_aggregate.push_back(i); + } + } + } + + /// select actual nested Maps from list of candidates + for (const auto & map : discovered_maps) + { + /// map should contain at least two elements (key -> value) + if (map.second.size() < 2) + { + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + continue; + } + + /// no elements of map could be in primary key + auto column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) + break; + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + continue; + } + + DataTypes argument_types; + SummingSortedAlgorithm::AggregateDescription desc; + SummingSortedAlgorithm::MapDescription map_desc; + + column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + { + const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); + const String & name = key_col.name; + const IDataType & nested_type = *assert_cast(*key_col.type).getNestedType(); + + if (column_num_it == map.second.begin() + || endsWith(name, "ID") + || endsWith(name, "Key") + || endsWith(name, "Type")) + { + if (!nested_type.isValueRepresentedByInteger() && !isStringOrFixedString(nested_type)) + break; + + map_desc.key_col_nums.push_back(*column_num_it); + } + else + { + if (!nested_type.isSummable()) + break; + + map_desc.val_col_nums.push_back(*column_num_it); + } + + // Add column to function arguments + desc.column_numbers.push_back(*column_num_it); + argument_types.push_back(key_col.type); + } + + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + continue; + } + + if (map_desc.key_col_nums.size() == 1) + { + // Create summation for all value columns in the map + desc.init("sumMapWithOverflow", argument_types); + def.columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Fall back to legacy mergeMaps for composite keys + for (auto col : map.second) + def.column_numbers_not_to_aggregate.push_back(col); + def.maps_to_sum.emplace_back(std::move(map_desc)); + } + } + + return def; +} + +static MutableColumns getMergedDataColumns( + const Block & header, + const SummingSortedAlgorithm::ColumnsDefinition & columns_definition) +{ + MutableColumns columns; + columns.reserve(columns_definition.getNumColumns()); + + for (auto & desc : columns_definition.columns_to_aggregate) + { + // Wrap aggregated columns in a tuple to match function signature + if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) + { + size_t tuple_size = desc.column_numbers.size(); + MutableColumns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty(); + + columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); + } + else + columns.emplace_back(header.safeGetByPosition(desc.column_numbers[0]).column->cloneEmpty()); + } + + for (auto & column_number : columns_definition.column_numbers_not_to_aggregate) + columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); + + return columns; +} + +static void preprocessChunk(Chunk & chunk) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + for (auto & column : columns) + column = column->convertToFullColumnIfConst(); + + chunk.setColumns(std::move(columns), num_rows); +} + +static void postprocessChunk( + Chunk & chunk, size_t num_result_columns, + const SummingSortedAlgorithm::ColumnsDefinition & def) +{ + size_t num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + Columns res_columns(num_result_columns); + size_t next_column = 0; + + for (auto & desc : def.columns_to_aggregate) + { + auto column = std::move(columns[next_column]); + ++next_column; + + if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) + { + /// Unpack tuple into block. + size_t tuple_size = desc.column_numbers.size(); + for (size_t i = 0; i < tuple_size; ++i) + res_columns[desc.column_numbers[i]] = assert_cast(*column).getColumnPtr(i); + } + else + res_columns[desc.column_numbers[0]] = std::move(column); + } + + for (auto column_number : def.column_numbers_not_to_aggregate) + { + auto column = std::move(columns[next_column]); + ++next_column; + + res_columns[column_number] = std::move(column); + } + + chunk.setColumns(std::move(res_columns), num_rows); +} + +static void setRow(Row & row, SortCursor & cursor, const Names & column_names) +{ + size_t num_columns = row.size(); + for (size_t i = 0; i < num_columns; ++i) + { + try + { + cursor->all_columns[i]->get(cursor->pos, row[i]); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + + /// Find out the name of the column and throw more informative exception. + + String column_name; + if (i < column_names.size()) + column_name = column_names[i]; + + throw Exception("MergingSortedBlockInputStream failed to read row " + toString(cursor->pos) + + " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"), + ErrorCodes::CORRUPTED_DATA); + } + } +} + + +Chunk SummingSortedAlgorithm::SummingMergedData::pull(size_t num_result_columns, const ColumnsDefinition & def) +{ + auto chunk = MergedData::pull(); + postprocessChunk(chunk, num_result_columns, def); + return chunk; +} + +SummingSortedAlgorithm::SummingSortedAlgorithm( + const Block & header, size_t num_inputs, + SortDescription description_, + const Names & column_names_to_sum, + size_t max_block_size) + : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) + , columns_definition(defineColumns(header, description_, column_names_to_sum)) + , merged_data(getMergedDataColumns(header, columns_definition), false, max_block_size) + , column_names(header.getNames()) +{ + current_row.resize(header.columns()); + merged_data.initAggregateDescription(columns_definition.columns_to_aggregate); +} + +void SummingSortedAlgorithm::initialize(Chunks chunks) +{ + for (auto & chunk : chunks) + if (chunk) + preprocessChunk(chunk); + + initializeQueue(std::move(chunks)); +} + +void SummingSortedAlgorithm::consume(Chunk chunk, size_t source_num) +{ + preprocessChunk(chunk); + updateCursor(std::move(chunk), source_num); +} + + +void SummingSortedAlgorithm::insertCurrentRowIfNeeded() +{ + /// We have nothing to aggregate. It means that it could be non-zero, because we have columns_not_to_aggregate. + if (columns_definition.columns_to_aggregate.empty()) + current_row_is_zero = false; + + for (auto & desc : columns_definition.columns_to_aggregate) + { + // Do not insert if the aggregation state hasn't been created + if (desc.created) + { + if (desc.is_agg_func_type) + { + current_row_is_zero = false; + } + else + { + try + { + desc.function->insertResultInto(desc.state.data(), *desc.merged_column); + + /// Update zero status of current row + if (desc.column_numbers.size() == 1) + { + // Flag row as non-empty if at least one column number if non-zero + current_row_is_zero = current_row_is_zero && desc.merged_column->isDefaultAt(desc.merged_column->size() - 1); + } + else + { + /// It is sumMapWithOverflow aggregate function. + /// Assume that the row isn't empty in this case (just because it is compatible with previous version) + current_row_is_zero = false; + } + } + catch (...) + { + desc.destroyState(); + throw; + } + } + desc.destroyState(); + } + else + desc.merged_column->insertDefault(); + } + + /// If it is "zero" row, then rollback the insertion + /// (at this moment we need rollback only cols from columns_to_aggregate) + if (current_row_is_zero) + { + for (auto & desc : columns_definition.columns_to_aggregate) + desc.merged_column->popBack(1); + + return; + } + + merged_data.insertRow(current_row, columns_definition.column_numbers_not_to_aggregate); +} + +void SummingSortedAlgorithm::addRow(SortCursor & cursor) +{ + for (auto & desc : columns_definition.columns_to_aggregate) + { + if (!desc.created) + throw Exception("Logical error in SummingSortedBlockInputStream, there are no description", ErrorCodes::LOGICAL_ERROR); + + if (desc.is_agg_func_type) + { + // desc.state is not used for AggregateFunction types + auto & col = cursor->all_columns[desc.column_numbers[0]]; + assert_cast(*desc.merged_column).insertMergeFrom(*col, cursor->pos); + } + else + { + // Specialized case for unary functions + if (desc.column_numbers.size() == 1) + { + auto & col = cursor->all_columns[desc.column_numbers[0]]; + desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr); + } + else + { + // Gather all source columns into a vector + ColumnRawPtrs columns(desc.column_numbers.size()); + for (size_t i = 0; i < desc.column_numbers.size(); ++i) + columns[i] = cursor->all_columns[desc.column_numbers[i]]; + + desc.add_function(desc.function.get(), desc.state.data(), columns.data(), cursor->pos, nullptr); + } + } + } +} + +IMergingAlgorithm::Status SummingSortedAlgorithm::merge() +{ + /// Take the rows in needed order and put them in `merged_columns` until rows no more than `max_block_size` + while (queue.isValid()) + { + bool key_differs; + bool has_previous_group = !last_key.empty(); + + SortCursor current = queue.current(); + + { + detail::RowRef current_key; + current_key.set(current); + + if (!has_previous_group) /// The first key encountered. + { + key_differs = true; + current_row_is_zero = true; + } + else + key_differs = !last_key.hasEqualSortColumnsWith(current_key); + + last_key = current_key; + last_chunk_sort_columns.clear(); + } + + if (key_differs) + { + if (has_previous_group) + /// Write the data for the previous group. + insertCurrentRowIfNeeded(); + + if (merged_data.hasEnoughRows()) + { + /// The block is now full and the last row is calculated completely. + last_key.reset(); + return Status(merged_data.pull(column_names.size(), columns_definition)); + } + + setRow(current_row, current, column_names); + + /// Reset aggregation states for next row + for (auto & desc : columns_definition.columns_to_aggregate) + desc.createState(); + + // Start aggregations with current row + addRow(current); + + if (columns_definition.maps_to_sum.empty()) + { + /// We have only columns_to_aggregate. The status of current row will be determined + /// in 'insertCurrentRowIfNeeded' method on the values of aggregate functions. + current_row_is_zero = true; // NOLINT + } + else + { + /// We have complex maps that will be summed with 'mergeMap' method. + /// The single row is considered non zero, and the status after merging with other rows + /// will be determined in the branch below (when key_differs == false). + current_row_is_zero = false; // NOLINT + } + } + else + { + addRow(current); + + // Merge maps only for same rows + for (const auto & desc : columns_definition.maps_to_sum) + if (mergeMap(desc, current_row, current)) + current_row_is_zero = false; + } + + if (!current->isLast()) + { + queue.next(); + } + else + { + /// We get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + } + + /// We will write the data for the last group, if it is non-zero. + /// If it is zero, and without it the output stream will be empty, we will write it anyway. + insertCurrentRowIfNeeded(); + last_chunk_sort_columns.clear(); + return Status(merged_data.pull(column_names.size(), columns_definition), true); +} + + +} diff --git a/src/Processors/Merges/SummingSortedAlgorithm.h b/src/Processors/Merges/SummingSortedAlgorithm.h new file mode 100644 index 00000000000..178e6c13f3b --- /dev/null +++ b/src/Processors/Merges/SummingSortedAlgorithm.h @@ -0,0 +1,152 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class SummingSortedAlgorithm : public IMergingAlgorithmWithDelayedChunk +{ +public: + SummingSortedAlgorithm( + const Block & header, size_t num_inputs, + SortDescription description_, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size); + + void initialize(Chunks chunks) override; + void consume(Chunk chunk, size_t source_num) override; + Status merge() override; + + struct AggregateDescription; + + /// Stores numbers of key-columns and value-columns. + struct MapDescription + { + std::vector key_col_nums; + std::vector val_col_nums; + }; + + /// This structure define columns into one of three types: + /// * columns which values not needed to be aggregated + /// * aggregate functions and columns which needed to be summed + /// * mapping for nested columns + struct ColumnsDefinition + { + /// Columns with which values should not be aggregated. + ColumnNumbers column_numbers_not_to_aggregate; + /// Columns which should be aggregated. + std::vector columns_to_aggregate; + /// Mapping for nested columns. + std::vector maps_to_sum; + + size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } + }; + + /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. + class SummingMergedData : public MergedData + { + private: + using MergedData::pull; + + public: + using MergedData::MergedData; + + void insertRow(const Row & row, const ColumnNumbers & column_numbers) + { + size_t next_column = columns.size() - column_numbers.size(); + for (auto column_number : column_numbers) + { + columns[next_column]->insert(row[column_number]); + ++next_column; + } + + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; + } + + /// Initialize aggregate descriptions with columns. + void initAggregateDescription(std::vector & columns_to_aggregate) + { + size_t num_columns = columns_to_aggregate.size(); + for (size_t column_number = 0; column_number < num_columns; ++column_number) + columns_to_aggregate[column_number].merged_column = columns[column_number].get(); + } + + Chunk pull(size_t num_result_columns, const ColumnsDefinition & def); + }; + +private: + Row current_row; + bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. + + ColumnsDefinition columns_definition; + SummingMergedData merged_data; + + Names column_names; + + void addRow(SortCursor & cursor); + void insertCurrentRowIfNeeded(); + +public: + /// Stores aggregation function, state, and columns to be used as function arguments. + struct AggregateDescription + { + /// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing. + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + std::vector column_numbers; + IColumn * merged_column = nullptr; + AlignedBuffer state; + bool created = false; + + /// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above. + bool is_agg_func_type = false; + + void init(const char * function_name, const DataTypes & argument_types) + { + function = AggregateFunctionFactory::instance().get(function_name, argument_types); + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); + } + + void createState() + { + if (created) + return; + if (is_agg_func_type) + merged_column->insertDefault(); + else + function->create(state.data()); + created = true; + } + + void destroyState() + { + if (!created) + return; + if (!is_agg_func_type) + function->destroy(state.data()); + created = false; + } + + /// Explicitly destroy aggregation state if the stream is terminated + ~AggregateDescription() + { + destroyState(); + } + + AggregateDescription() = default; + AggregateDescription(AggregateDescription &&) = default; + AggregateDescription(const AggregateDescription &) = delete; + }; +}; + +} diff --git a/src/Processors/Merges/SummingSortedTransform.cpp b/src/Processors/Merges/SummingSortedTransform.cpp deleted file mode 100644 index f34a3b479d7..00000000000 --- a/src/Processors/Merges/SummingSortedTransform.cpp +++ /dev/null @@ -1,626 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int CORRUPTED_DATA; -} - -namespace -{ - bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) - { - for (auto & desc : description) - if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) - return true; - - return false; - } - - /// Returns true if merge result is not empty - bool mergeMap(const SummingSortedTransform::MapDescription & desc, Row & row, SortCursor & cursor) - { - /// Strongly non-optimal. - - Row & left = row; - Row right(left.size()); - - for (size_t col_num : desc.key_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); - - for (size_t col_num : desc.val_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); - - auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & - { - return matrix[i].get()[j]; - }; - - auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array - { - size_t size = col_nums.size(); - Array res(size); - for (size_t col_num_index = 0; col_num_index < size; ++col_num_index) - res[col_num_index] = at_ith_column_jth_row(matrix, col_nums[col_num_index], j); - return res; - }; - - std::map merged; - - auto accumulate = [](Array & dst, const Array & src) - { - bool has_non_zero = false; - size_t size = dst.size(); - for (size_t i = 0; i < size; ++i) - if (applyVisitor(FieldVisitorSum(src[i]), dst[i])) - has_non_zero = true; - return has_non_zero; - }; - - auto merge = [&](const Row & matrix) - { - size_t rows = matrix[desc.key_col_nums[0]].get().size(); - - for (size_t j = 0; j < rows; ++j) - { - Array key = tuple_of_nth_columns_at_jth_row(matrix, desc.key_col_nums, j); - Array value = tuple_of_nth_columns_at_jth_row(matrix, desc.val_col_nums, j); - - auto it = merged.find(key); - if (merged.end() == it) - merged.emplace(std::move(key), std::move(value)); - else - { - if (!accumulate(it->second, value)) - merged.erase(it); - } - } - }; - - merge(left); - merge(right); - - for (size_t col_num : desc.key_col_nums) - row[col_num] = Array(merged.size()); - for (size_t col_num : desc.val_col_nums) - row[col_num] = Array(merged.size()); - - size_t row_num = 0; - for (const auto & key_value : merged) - { - for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; - - for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; - - ++row_num; - } - - return row_num != 0; - } - - SummingSortedTransform::ColumnsDefinition defineColumns( - const Block & header, - const SortDescription & description, - const Names & column_names_to_sum) - { - size_t num_columns = header.columns(); - SummingSortedTransform::ColumnsDefinition def; - - /// name of nested structure -> the column numbers that refer to it. - std::unordered_map> discovered_maps; - - /** Fill in the column numbers, which must be summed. - * This can only be numeric columns that are not part of the sort key. - * If a non-empty column_names_to_sum is specified, then we only take these columns. - * Some columns from column_names_to_sum may not be found. This is ignored. - */ - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnWithTypeAndName & column = header.safeGetByPosition(i); - - /// Discover nested Maps and find columns for summation - if (typeid_cast(column.type.get())) - { - const auto map_name = Nested::extractTableName(column.name); - /// if nested table name ends with `Map` it is a possible candidate for special handling - if (map_name == column.name || !endsWith(map_name, "Map")) - { - def.column_numbers_not_to_aggregate.push_back(i); - continue; - } - - discovered_maps[map_name].emplace_back(i); - } - else - { - bool is_agg_func = WhichDataType(column.type).isAggregateFunction(); - - /// There are special const columns for example after prewhere sections. - if ((!column.type->isSummable() && !is_agg_func) || isColumnConst(*column.column)) - { - def.column_numbers_not_to_aggregate.push_back(i); - continue; - } - - /// Are they inside the PK? - if (isInPrimaryKey(description, column.name, i)) - { - def.column_numbers_not_to_aggregate.push_back(i); - continue; - } - - if (column_names_to_sum.empty() - || column_names_to_sum.end() != - std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) - { - // Create aggregator to sum this column - SummingSortedTransform::AggregateDescription desc; - desc.is_agg_func_type = is_agg_func; - desc.column_numbers = {i}; - - if (!is_agg_func) - { - desc.init("sumWithOverflow", {column.type}); - } - - def.columns_to_aggregate.emplace_back(std::move(desc)); - } - else - { - // Column is not going to be summed, use last value - def.column_numbers_not_to_aggregate.push_back(i); - } - } - } - - /// select actual nested Maps from list of candidates - for (const auto & map : discovered_maps) - { - /// map should contain at least two elements (key -> value) - if (map.second.size() < 2) - { - for (auto col : map.second) - def.column_numbers_not_to_aggregate.push_back(col); - continue; - } - - /// no elements of map could be in primary key - auto column_num_it = map.second.begin(); - for (; column_num_it != map.second.end(); ++column_num_it) - if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) - break; - if (column_num_it != map.second.end()) - { - for (auto col : map.second) - def.column_numbers_not_to_aggregate.push_back(col); - continue; - } - - DataTypes argument_types; - SummingSortedTransform::AggregateDescription desc; - SummingSortedTransform::MapDescription map_desc; - - column_num_it = map.second.begin(); - for (; column_num_it != map.second.end(); ++column_num_it) - { - const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); - const String & name = key_col.name; - const IDataType & nested_type = *assert_cast(*key_col.type).getNestedType(); - - if (column_num_it == map.second.begin() - || endsWith(name, "ID") - || endsWith(name, "Key") - || endsWith(name, "Type")) - { - if (!nested_type.isValueRepresentedByInteger() && !isStringOrFixedString(nested_type)) - break; - - map_desc.key_col_nums.push_back(*column_num_it); - } - else - { - if (!nested_type.isSummable()) - break; - - map_desc.val_col_nums.push_back(*column_num_it); - } - - // Add column to function arguments - desc.column_numbers.push_back(*column_num_it); - argument_types.push_back(key_col.type); - } - - if (column_num_it != map.second.end()) - { - for (auto col : map.second) - def.column_numbers_not_to_aggregate.push_back(col); - continue; - } - - if (map_desc.key_col_nums.size() == 1) - { - // Create summation for all value columns in the map - desc.init("sumMapWithOverflow", argument_types); - def.columns_to_aggregate.emplace_back(std::move(desc)); - } - else - { - // Fall back to legacy mergeMaps for composite keys - for (auto col : map.second) - def.column_numbers_not_to_aggregate.push_back(col); - def.maps_to_sum.emplace_back(std::move(map_desc)); - } - } - - return def; - } - - MutableColumns getMergedDataColumns( - const Block & header, - const SummingSortedTransform::ColumnsDefinition & columns_definition) - { - MutableColumns columns; - columns.reserve(columns_definition.getNumColumns()); - - for (auto & desc : columns_definition.columns_to_aggregate) - { - // Wrap aggregated columns in a tuple to match function signature - if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) - { - size_t tuple_size = desc.column_numbers.size(); - MutableColumns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty(); - - columns.emplace_back(ColumnTuple::create(std::move(tuple_columns))); - } - else - columns.emplace_back(header.safeGetByPosition(desc.column_numbers[0]).column->cloneEmpty()); - } - - for (auto & column_number : columns_definition.column_numbers_not_to_aggregate) - columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); - - return columns; - } - - void finalizeChunk( - Chunk & chunk, size_t num_result_columns, - const SummingSortedTransform::ColumnsDefinition & columns_definition) - { - size_t num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - - Columns res_columns(num_result_columns); - size_t next_column = 0; - - for (auto & desc : columns_definition.columns_to_aggregate) - { - auto column = std::move(columns[next_column]); - ++next_column; - - if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) - { - /// Unpack tuple into block. - size_t tuple_size = desc.column_numbers.size(); - for (size_t i = 0; i < tuple_size; ++i) - res_columns[desc.column_numbers[i]] = assert_cast(*column).getColumnPtr(i); - } - else - res_columns[desc.column_numbers[0]] = std::move(column); - } - - for (auto column_number : columns_definition.column_numbers_not_to_aggregate) - { - auto column = std::move(columns[next_column]); - ++next_column; - - res_columns[column_number] = std::move(column); - } - - chunk.setColumns(std::move(res_columns), num_rows); - } - - void setRow(Row & row, SortCursor & cursor, const Block & header) - { - size_t num_columns = row.size(); - for (size_t i = 0; i < num_columns; ++i) - { - try - { - cursor->all_columns[i]->get(cursor->pos, row[i]); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - - /// Find out the name of the column and throw more informative exception. - - String column_name; - if (i < header.columns()) - { - column_name = header.safeGetByPosition(i).name; - break; - } - - throw Exception("MergingSortedBlockInputStream failed to read row " + toString(cursor->pos) - + " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"), - ErrorCodes::CORRUPTED_DATA); - } - } - } -} - -SummingSortedTransform::SummingSortedTransform( - const Block & header, size_t num_inputs, - SortDescription description_, - /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. - const Names & column_names_to_sum, - size_t max_block_size) - : IMergingTransform(num_inputs, header, header, true) - , columns_definition(defineColumns(header, description_, column_names_to_sum)) - , merged_data(getMergedDataColumns(header, columns_definition), false, max_block_size) - , description(std::move(description_)) - , source_chunks(num_inputs) - , cursors(num_inputs) -{ - current_row.resize(header.columns()); - merged_data.initAggregateDescription(columns_definition.columns_to_aggregate); -} - -void SummingSortedTransform::initializeInputs() -{ - queue = SortingHeap(cursors); - is_queue_initialized = true; -} - -void SummingSortedTransform::consume(Chunk chunk, size_t input_number) -{ - updateCursor(std::move(chunk), input_number); - - if (is_queue_initialized) - queue.push(cursors[input_number]); -} - -void SummingSortedTransform::updateCursor(Chunk chunk, size_t source_num) -{ - auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = column->convertToFullColumnIfConst(); - - chunk.setColumns(std::move(columns), num_rows); - - auto & source_chunk = source_chunks[source_num]; - - if (source_chunk) - { - /// Extend lifetime of last chunk. - last_chunk = std::move(source_chunk); - last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); - - source_chunk = std::move(chunk); - cursors[source_num].reset(source_chunk.getColumns(), {}); - } - else - { - if (cursors[source_num].has_collation) - throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); - - source_chunk = std::move(chunk); - cursors[source_num] = SortCursorImpl(source_chunk.getColumns(), description, source_num); - } -} - -void SummingSortedTransform::work() -{ - merge(); - prepareOutputChunk(merged_data); - - if (has_output_chunk) - { - finalizeChunk(output_chunk, getOutputs().back().getHeader().columns(), columns_definition); - merged_data.initAggregateDescription(columns_definition.columns_to_aggregate); - } -} - -void SummingSortedTransform::insertCurrentRowIfNeeded() -{ - /// We have nothing to aggregate. It means that it could be non-zero, because we have columns_not_to_aggregate. - if (columns_definition.columns_to_aggregate.empty()) - current_row_is_zero = false; - - for (auto & desc : columns_definition.columns_to_aggregate) - { - // Do not insert if the aggregation state hasn't been created - if (desc.created) - { - if (desc.is_agg_func_type) - { - current_row_is_zero = false; - } - else - { - try - { - desc.function->insertResultInto(desc.state.data(), *desc.merged_column); - - /// Update zero status of current row - if (desc.column_numbers.size() == 1) - { - // Flag row as non-empty if at least one column number if non-zero - current_row_is_zero = current_row_is_zero && desc.merged_column->isDefaultAt(desc.merged_column->size() - 1); - } - else - { - /// It is sumMapWithOverflow aggregate function. - /// Assume that the row isn't empty in this case (just because it is compatible with previous version) - current_row_is_zero = false; - } - } - catch (...) - { - desc.destroyState(); - throw; - } - } - desc.destroyState(); - } - else - desc.merged_column->insertDefault(); - } - - /// If it is "zero" row, then rollback the insertion - /// (at this moment we need rollback only cols from columns_to_aggregate) - if (current_row_is_zero) - { - for (auto & desc : columns_definition.columns_to_aggregate) - desc.merged_column->popBack(1); - - return; - } - - merged_data.insertRow(current_row, columns_definition.column_numbers_not_to_aggregate); -} - -void SummingSortedTransform::addRow(SortCursor & cursor) -{ - for (auto & desc : columns_definition.columns_to_aggregate) - { - if (!desc.created) - throw Exception("Logical error in SummingSortedBlockInputStream, there are no description", ErrorCodes::LOGICAL_ERROR); - - if (desc.is_agg_func_type) - { - // desc.state is not used for AggregateFunction types - auto & col = cursor->all_columns[desc.column_numbers[0]]; - assert_cast(*desc.merged_column).insertMergeFrom(*col, cursor->pos); - } - else - { - // Specialized case for unary functions - if (desc.column_numbers.size() == 1) - { - auto & col = cursor->all_columns[desc.column_numbers[0]]; - desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr); - } - else - { - // Gather all source columns into a vector - ColumnRawPtrs columns(desc.column_numbers.size()); - for (size_t i = 0; i < desc.column_numbers.size(); ++i) - columns[i] = cursor->all_columns[desc.column_numbers[i]]; - - desc.add_function(desc.function.get(), desc.state.data(), columns.data(), cursor->pos, nullptr); - } - } - } -} - -void SummingSortedTransform::merge() -{ - /// Take the rows in needed order and put them in `merged_columns` until rows no more than `max_block_size` - while (queue.isValid()) - { - bool key_differs; - bool has_previous_group = !last_key.empty(); - - SortCursor current = queue.current(); - - { - detail::RowRef current_key; - current_key.set(current); - - if (!has_previous_group) /// The first key encountered. - { - key_differs = true; - current_row_is_zero = true; - } - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); - - last_key = current_key; - last_chunk_sort_columns.clear(); - } - - if (key_differs) - { - if (has_previous_group) - /// Write the data for the previous group. - insertCurrentRowIfNeeded(); - - if (merged_data.hasEnoughRows()) - { - /// The block is now full and the last row is calculated completely. - last_key.reset(); - return; - } - - setRow(current_row, current, getInputs().front().getHeader()); - - /// Reset aggregation states for next row - for (auto & desc : columns_definition.columns_to_aggregate) - desc.createState(); - - // Start aggregations with current row - addRow(current); - - if (columns_definition.maps_to_sum.empty()) - { - /// We have only columns_to_aggregate. The status of current row will be determined - /// in 'insertCurrentRowIfNeeded' method on the values of aggregate functions. - current_row_is_zero = true; // NOLINT - } - else - { - /// We have complex maps that will be summed with 'mergeMap' method. - /// The single row is considered non zero, and the status after merging with other rows - /// will be determined in the branch below (when key_differs == false). - current_row_is_zero = false; // NOLINT - } - } - else - { - addRow(current); - - // Merge maps only for same rows - for (const auto & desc : columns_definition.maps_to_sum) - if (mergeMap(desc, current_row, current)) - current_row_is_zero = false; - } - - if (!current->isLast()) - { - queue.next(); - } - else - { - /// We get the next block from the corresponding source, if there is one. - queue.removeTop(); - requestDataForInput(current.impl->order); - return; - } - } - - /// We will write the data for the last group, if it is non-zero. - /// If it is zero, and without it the output stream will be empty, we will write it anyway. - insertCurrentRowIfNeeded(); - last_chunk_sort_columns.clear(); - is_finished = true; -} - -} diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index cc651197647..37859e1b88b 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -1,15 +1,7 @@ #pragma once #include -#include -#include - -#include -#include -#include -#include -#include -#include +#include namespace DB { @@ -19,7 +11,7 @@ namespace DB * collapses them into one row, summing all the numeric columns except the primary key. * If in all numeric columns, except for the primary key, the result is zero, it deletes the row. */ -class SummingSortedTransform final : public IMergingTransform +class SummingSortedTransform final : public IMergingTransform2 { public: @@ -28,146 +20,18 @@ public: SortDescription description_, /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. const Names & column_names_to_sum, - size_t max_block_size); - - struct AggregateDescription; - - /// Stores numbers of key-columns and value-columns. - struct MapDescription + size_t max_block_size) + : IMergingTransform2( + num_inputs, header, header, true, + header, + num_inputs, + std::move(description_), + column_names_to_sum, + max_block_size) { - std::vector key_col_nums; - std::vector val_col_nums; - }; - - struct ColumnsDefinition - { - /// Columns with which values should be summed. - ColumnNumbers column_numbers_not_to_aggregate; - /// Columns which should be aggregated. - std::vector columns_to_aggregate; - /// Mapping for nested columns. - std::vector maps_to_sum; - - size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } - }; - - /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. - struct SummingMergedData : public MergedData - { - public: - using MergedData::MergedData; - - void insertRow(const Row & row, const ColumnNumbers & column_numbers) - { - size_t next_column = columns.size() - column_numbers.size(); - for (auto column_number : column_numbers) - { - columns[next_column]->insert(row[column_number]); - ++next_column; - } - - ++total_merged_rows; - ++merged_rows; - /// TODO: sum_blocks_granularity += block_size; - } - - /// Initialize aggregate descriptions with columns. - void initAggregateDescription(std::vector & columns_to_aggregate) - { - size_t num_columns = columns_to_aggregate.size(); - for (size_t column_number = 0; column_number < num_columns; ++column_number) - columns_to_aggregate[column_number].merged_column = columns[column_number].get(); - } - }; + } String getName() const override { return "SummingSortedTransform"; } - void work() override; - -protected: - void initializeInputs() override; - void consume(Chunk chunk, size_t input_number) override; - -private: - Row current_row; - bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. - - ColumnsDefinition columns_definition; - SummingMergedData merged_data; - - SortDescription description; - - /// Chunks currently being merged. - std::vector source_chunks; - SortCursorImpls cursors; - - /// In merging algorithm, we need to compare current sort key with the last one. - /// So, sorting columns for last row needed to be stored. - /// In order to do it, we extend lifetime of last chunk and it's sort columns (from corresponding sort cursor). - Chunk last_chunk; - ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. - - detail::RowRef last_key; - - SortingHeap queue; - bool is_queue_initialized = false; - - void merge(); - void updateCursor(Chunk chunk, size_t source_num); - void addRow(SortCursor & cursor); - void insertCurrentRowIfNeeded(); - -public: - /// Stores aggregation function, state, and columns to be used as function arguments. - struct AggregateDescription - { - /// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing. - AggregateFunctionPtr function; - IAggregateFunction::AddFunc add_function = nullptr; - std::vector column_numbers; - IColumn * merged_column = nullptr; - AlignedBuffer state; - bool created = false; - - /// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above. - bool is_agg_func_type = false; - - void init(const char * function_name, const DataTypes & argument_types) - { - function = AggregateFunctionFactory::instance().get(function_name, argument_types); - add_function = function->getAddressOfAddFunction(); - state.reset(function->sizeOfData(), function->alignOfData()); - } - - void createState() - { - if (created) - return; - if (is_agg_func_type) - merged_column->insertDefault(); - else - function->create(state.data()); - created = true; - } - - void destroyState() - { - if (!created) - return; - if (!is_agg_func_type) - function->destroy(state.data()); - created = false; - } - - /// Explicitly destroy aggregation state if the stream is terminated - ~AggregateDescription() - { - destroyState(); - } - - AggregateDescription() = default; - AggregateDescription(AggregateDescription &&) = default; - AggregateDescription(const AggregateDescription &) = delete; - }; }; } From d7d29d8250022696b3ab2b13c3883a8e0e81d940 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 22:02:57 +0300 Subject: [PATCH 373/752] Fix build. --- src/Processors/Merges/SummingSortedAlgorithm.cpp | 12 +++++++----- src/Processors/Merges/SummingSortedAlgorithm.h | 11 ++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/Processors/Merges/SummingSortedAlgorithm.cpp b/src/Processors/Merges/SummingSortedAlgorithm.cpp index 357ecc7114e..4f437654a19 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/SummingSortedAlgorithm.cpp @@ -369,10 +369,13 @@ static void setRow(Row & row, SortCursor & cursor, const Names & column_names) } -Chunk SummingSortedAlgorithm::SummingMergedData::pull(size_t num_result_columns, const ColumnsDefinition & def) +Chunk SummingSortedAlgorithm::SummingMergedData::pull(size_t num_result_columns) { auto chunk = MergedData::pull(); postprocessChunk(chunk, num_result_columns, def); + + initAggregateDescription(def.columns_to_aggregate); + return chunk; } @@ -383,7 +386,7 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( size_t max_block_size) : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) , columns_definition(defineColumns(header, description_, column_names_to_sum)) - , merged_data(getMergedDataColumns(header, columns_definition), false, max_block_size) + , merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition) , column_names(header.getNames()) { current_row.resize(header.columns()); @@ -535,7 +538,7 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() { /// The block is now full and the last row is calculated completely. last_key.reset(); - return Status(merged_data.pull(column_names.size(), columns_definition)); + return Status(merged_data.pull(column_names.size())); } setRow(current_row, current, column_names); @@ -587,8 +590,7 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() /// If it is zero, and without it the output stream will be empty, we will write it anyway. insertCurrentRowIfNeeded(); last_chunk_sort_columns.clear(); - return Status(merged_data.pull(column_names.size(), columns_definition), true); + return Status(merged_data.pull(column_names.size()), true); } - } diff --git a/src/Processors/Merges/SummingSortedAlgorithm.h b/src/Processors/Merges/SummingSortedAlgorithm.h index 178e6c13f3b..8cd7fb1ff13 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/SummingSortedAlgorithm.h @@ -58,6 +58,12 @@ public: public: using MergedData::MergedData; + SummingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) + : MergedData(std::move(columns_), false, max_block_size_) + , def(def_) + { + } + void insertRow(const Row & row, const ColumnNumbers & column_numbers) { size_t next_column = columns.size() - column_numbers.size(); @@ -80,7 +86,10 @@ public: columns_to_aggregate[column_number].merged_column = columns[column_number].get(); } - Chunk pull(size_t num_result_columns, const ColumnsDefinition & def); + Chunk pull(size_t num_result_columns); + + private: + ColumnsDefinition & def; }; private: From def6817ede8854d7c422429d2bbe9204c134ae25 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 13 Apr 2020 22:06:07 +0300 Subject: [PATCH 374/752] Update compare.sh --- docker/test/performance-comparison/compare.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index f89631522f4..bf48fe467ca 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -42,9 +42,11 @@ function configure rm db0/metadata/system/* -rf ||: # Make copies of the original db for both servers. Use hardlinks instead - # of copying. + # of copying. Be careful to remove preprocessed configs or it can lead to + # weird effects. rm -r left/db ||: rm -r right/db ||: + rm -r db0/preprocessed_configs ||: cp -al db0/ left/db/ cp -al db0/ right/db/ } From 84b1dd4c1e642b259fd10ff3cd25abcb6f6eb4e3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 22:20:26 +0300 Subject: [PATCH 375/752] Fix tests. --- src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h | 3 +-- src/Processors/Merges/SummingSortedAlgorithm.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h index 2a53f22f5aa..214f262a5cc 100644 --- a/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h +++ b/src/Processors/Merges/IMergingAlgorithmWithDelayedChunk.h @@ -16,6 +16,7 @@ public: protected: SortingHeap queue; + SortDescription description; /// Previous row. May refer to last_chunk_sort_columns or row from source_chunks. detail::RowRef last_key; @@ -26,8 +27,6 @@ protected: void updateCursor(Chunk chunk, size_t source_num); private: - SortDescription description; - /// Chunks currently being merged. std::vector source_chunks; SortCursorImpls cursors; diff --git a/src/Processors/Merges/SummingSortedAlgorithm.cpp b/src/Processors/Merges/SummingSortedAlgorithm.cpp index 4f437654a19..43f282e516d 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/SummingSortedAlgorithm.cpp @@ -385,7 +385,7 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( const Names & column_names_to_sum, size_t max_block_size) : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) - , columns_definition(defineColumns(header, description_, column_names_to_sum)) + , columns_definition(defineColumns(header, description, column_names_to_sum)) , merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition) , column_names(header.getNames()) { From e46322fcefb1d3e1d88fcd24c4776f91ae0a037a Mon Sep 17 00:00:00 2001 From: Avogar Date: Mon, 13 Apr 2020 22:33:02 +0300 Subject: [PATCH 376/752] Update MsgPack input format. --- src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index ee32aeb6bfe..53c5a623a35 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -119,8 +119,8 @@ void MsgPackRowInputFormat::insertObject(IColumn & column, DataTypePtr data_type case TypeIndex::FixedString: [[fallthrough]]; case TypeIndex::String: { - String str = object.as(); - column.insertData(str.data(), str.size()); + msgpack::object_str obj_str = object.via.str; + column.insertData(obj_str.ptr, obj_str.size); return; } case TypeIndex::Array: From d480707c12133d9d3ad0708447f08bab4bc3f995 Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Mon, 13 Apr 2020 23:02:44 +0300 Subject: [PATCH 377/752] Fixed clang build + tweaked comment --- src/Interpreters/MutationsInterpreter.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 1682bc11f80..b1b226b157f 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -59,9 +59,8 @@ public: if (const auto * function = typeid_cast(node.get())) { - /// Lambda functions also may be non-deterministic. But we skip them for simplicity. - /// Replication will work correctly even if non-deterministic function is used, - /// it will select any of the results and discard other. + /// Property of being deterministic for lambda expression is completely determined + /// by the contents of its definition, so we just proceed to it. if (function->name != "lambda") { const auto func = FunctionFactory::instance().get(function->name, data.context); @@ -76,7 +75,7 @@ using FirstNonDeterministicFuncFinder = InDepthNodeVisitor findFirstNonDeterministicFuncName(const MutationCommand & command, const Context & context) { - FirstNonDeterministicFuncMatcher::Data finder_data{context}; + FirstNonDeterministicFuncMatcher::Data finder_data{context, std::nullopt}; switch (command.type) { From 1d843df1f3bf2619f58679c9e42e1cb7ec1c4945 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 13 Apr 2020 23:43:23 +0300 Subject: [PATCH 378/752] Changelog for v20.3.5.21 --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d5301de8a23..2ab006bcdd3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ ## ClickHouse release v20.3 +### ClickHouse release v20.3.5.21, 2020-03-27 + +#### Bug Fix + +* Fix 'Different expressions with the same alias' error when query has PREWHERE and WHERE on distributed table and `SET distributed_product_mode = 'local'`. [#9871](https://github.com/ClickHouse/ClickHouse/pull/9871) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix mutations excessive memory consumption for tables with a composite primary key. This fixes [#9850](https://github.com/ClickHouse/ClickHouse/issues/9850). [#9860](https://github.com/ClickHouse/ClickHouse/pull/9860) ([alesapin](https://github.com/alesapin)). +* Fix 'COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query' error in case of subqueries with COMMA JOIN out of tables lists (i.e. in WHERE). Fixes [#9782](https://github.com/ClickHouse/ClickHouse/issues/9782). [#9830](https://github.com/ClickHouse/ClickHouse/pull/9830) ([Artem Zuikov](https://github.com/4ertus2)). +* Fix possible exception `Got 0 in totals chunk, expected 1` on client. It happened for queries with `JOIN` in case if right joined table had zero rows. Example: `select * from system.one t1 join system.one t2 on t1.dummy = t2.dummy limit 0 FORMAT TabSeparated;`. Fixes [#9777](https://github.com/ClickHouse/ClickHouse/issues/9777). [#9823](https://github.com/ClickHouse/ClickHouse/pull/9823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix SIGSEGV with optimize_skip_unused_shards when type cannot be converted. [#9804](https://github.com/ClickHouse/ClickHouse/pull/9804) ([Azat Khuzhin](https://github.com/azat)). +* Fix broken `ALTER TABLE DELETE COLUMN` query for compact parts. [#9779](https://github.com/ClickHouse/ClickHouse/pull/9779) ([alesapin](https://github.com/alesapin)). +* Fix max_distributed_connections (w/ and w/o Processors). [#9673](https://github.com/ClickHouse/ClickHouse/pull/9673) ([Azat Khuzhin](https://github.com/azat)). +* Fixed a few cases when timezone of the function argument wasn't used properly. [#9574](https://github.com/ClickHouse/ClickHouse/pull/9574) ([Vasily Nemkov](https://github.com/Enmk)). + +#### Improvement + +* Remove order by stage from mutations because we read from a single ordered part in a single thread. Also add check that the order of rows in mutation is ordered in sorting key order and this order is not violated. [#9886](https://github.com/ClickHouse/ClickHouse/pull/9886) ([alesapin](https://github.com/alesapin)). + + ### ClickHouse release v20.3.4.10, 2020-03-20 #### Bug Fix From 801320983b61c5985d1ccb5bbbcdfe61dab9bb6a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 23:50:13 +0300 Subject: [PATCH 379/752] Refactor SummingSortedAlgorithm. --- .../Merges/SummingSortedAlgorithm.cpp | 297 ++++++++++++------ .../Merges/SummingSortedAlgorithm.h | 123 ++------ 2 files changed, 222 insertions(+), 198 deletions(-) diff --git a/src/Processors/Merges/SummingSortedAlgorithm.cpp b/src/Processors/Merges/SummingSortedAlgorithm.cpp index 43f282e516d..923e890abd7 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/SummingSortedAlgorithm.cpp @@ -16,6 +16,70 @@ namespace ErrorCodes extern const int CORRUPTED_DATA; } +SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition() = default; +SummingSortedAlgorithm::ColumnsDefinition::ColumnsDefinition(ColumnsDefinition &&) noexcept = default; +SummingSortedAlgorithm::ColumnsDefinition::~ColumnsDefinition() = default; + +/// Stores numbers of key-columns and value-columns. +struct SummingSortedAlgorithm::MapDescription +{ + std::vector key_col_nums; + std::vector val_col_nums; +}; + +/// Stores aggregation function, state, and columns to be used as function arguments. +struct SummingSortedAlgorithm::AggregateDescription +{ + /// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing. + AggregateFunctionPtr function; + IAggregateFunction::AddFunc add_function = nullptr; + std::vector column_numbers; + IColumn * merged_column = nullptr; + AlignedBuffer state; + bool created = false; + + /// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above. + bool is_agg_func_type = false; + + void init(const char * function_name, const DataTypes & argument_types) + { + function = AggregateFunctionFactory::instance().get(function_name, argument_types); + add_function = function->getAddressOfAddFunction(); + state.reset(function->sizeOfData(), function->alignOfData()); + } + + void createState() + { + if (created) + return; + if (is_agg_func_type) + merged_column->insertDefault(); + else + function->create(state.data()); + created = true; + } + + void destroyState() + { + if (!created) + return; + if (!is_agg_func_type) + function->destroy(state.data()); + created = false; + } + + /// Explicitly destroy aggregation state if the stream is terminated + ~AggregateDescription() + { + destroyState(); + } + + AggregateDescription() = default; + AggregateDescription(AggregateDescription &&) = default; + AggregateDescription(const AggregateDescription &) = delete; +}; + + static bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) { for (auto & desc : description) @@ -26,7 +90,8 @@ static bool isInPrimaryKey(const SortDescription & description, const std::strin } /// Returns true if merge result is not empty -static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row & row, SortCursor & cursor) +static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, + Row & row, const ColumnRawPtrs & raw_columns, size_t row_num) { /// Strongly non-optimal. @@ -34,10 +99,10 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row & Row right(left.size()); for (size_t col_num : desc.key_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + right[col_num] = (*raw_columns[col_num])[row_num].template get(); for (size_t col_num : desc.val_col_nums) - right[col_num] = (*cursor->all_columns[col_num])[cursor->pos].template get(); + right[col_num] = (*raw_columns[col_num])[row_num].template get(); auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & { @@ -93,19 +158,19 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row & for (size_t col_num : desc.val_col_nums) row[col_num] = Array(merged.size()); - size_t row_num = 0; + size_t row_num_ = 0; for (const auto & key_value : merged) { for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + row[desc.key_col_nums[col_num_index]].get()[row_num_] = key_value.first[col_num_index]; for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + row[desc.val_col_nums[col_num_index]].get()[row_num_] = key_value.second[col_num_index]; - ++row_num; + ++row_num_; } - return row_num != 0; + return row_num_ != 0; } static SummingSortedAlgorithm::ColumnsDefinition defineColumns( @@ -115,6 +180,7 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns( { size_t num_columns = header.columns(); SummingSortedAlgorithm::ColumnsDefinition def; + def.column_names = header.getNames(); /// name of nested structure -> the column numbers that refer to it. std::unordered_map> discovered_maps; @@ -342,14 +408,14 @@ static void postprocessChunk( chunk.setColumns(std::move(res_columns), num_rows); } -static void setRow(Row & row, SortCursor & cursor, const Names & column_names) +static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, const Names & column_names) { size_t num_columns = row.size(); for (size_t i = 0; i < num_columns; ++i) { try { - cursor->all_columns[i]->get(cursor->pos, row[i]); + raw_columns[i]->get(row_num, row[i]); } catch (...) { @@ -361,7 +427,7 @@ static void setRow(Row & row, SortCursor & cursor, const Names & column_names) if (i < column_names.size()) column_name = column_names[i]; - throw Exception("MergingSortedBlockInputStream failed to read row " + toString(cursor->pos) + throw Exception("MergingSortedBlockInputStream failed to read row " + toString(row_num) + " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"), ErrorCodes::CORRUPTED_DATA); } @@ -369,53 +435,49 @@ static void setRow(Row & row, SortCursor & cursor, const Names & column_names) } -Chunk SummingSortedAlgorithm::SummingMergedData::pull(size_t num_result_columns) +SummingSortedAlgorithm::SummingMergedData::SummingMergedData( + MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) + : MergedData(std::move(columns_), false, max_block_size_) + , def(def_) { - auto chunk = MergedData::pull(); - postprocessChunk(chunk, num_result_columns, def); - - initAggregateDescription(def.columns_to_aggregate); - - return chunk; + current_row.resize(def.column_names.size()); + initAggregateDescription(); } -SummingSortedAlgorithm::SummingSortedAlgorithm( - const Block & header, size_t num_inputs, - SortDescription description_, - const Names & column_names_to_sum, - size_t max_block_size) - : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) - , columns_definition(defineColumns(header, description, column_names_to_sum)) - , merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition) - , column_names(header.getNames()) +void SummingSortedAlgorithm::SummingMergedData::startGroup(ColumnRawPtrs & raw_columns, size_t row) { - current_row.resize(header.columns()); - merged_data.initAggregateDescription(columns_definition.columns_to_aggregate); + setRow(current_row, raw_columns, row, def.column_names); + + /// Reset aggregation states for next row + for (auto & desc : def.columns_to_aggregate) + desc.createState(); + + if (def.maps_to_sum.empty()) + { + /// We have only columns_to_aggregate. The status of current row will be determined + /// in 'insertCurrentRowIfNeeded' method on the values of aggregate functions. + current_row_is_zero = true; // NOLINT + } + else + { + /// We have complex maps that will be summed with 'mergeMap' method. + /// The single row is considered non zero, and the status after merging with other rows + /// will be determined in the branch below (when key_differs == false). + current_row_is_zero = false; // NOLINT + } + + addRowImpl(raw_columns, row); } -void SummingSortedAlgorithm::initialize(Chunks chunks) +void SummingSortedAlgorithm::SummingMergedData::finishGroup() { - for (auto & chunk : chunks) - if (chunk) - preprocessChunk(chunk); + is_group_started = false; - initializeQueue(std::move(chunks)); -} - -void SummingSortedAlgorithm::consume(Chunk chunk, size_t source_num) -{ - preprocessChunk(chunk); - updateCursor(std::move(chunk), source_num); -} - - -void SummingSortedAlgorithm::insertCurrentRowIfNeeded() -{ /// We have nothing to aggregate. It means that it could be non-zero, because we have columns_not_to_aggregate. - if (columns_definition.columns_to_aggregate.empty()) + if (def.columns_to_aggregate.empty()) current_row_is_zero = false; - for (auto & desc : columns_definition.columns_to_aggregate) + for (auto & desc : def.columns_to_aggregate) { // Do not insert if the aggregation state hasn't been created if (desc.created) @@ -459,56 +521,120 @@ void SummingSortedAlgorithm::insertCurrentRowIfNeeded() /// (at this moment we need rollback only cols from columns_to_aggregate) if (current_row_is_zero) { - for (auto & desc : columns_definition.columns_to_aggregate) + for (auto & desc : def.columns_to_aggregate) desc.merged_column->popBack(1); return; } - merged_data.insertRow(current_row, columns_definition.column_numbers_not_to_aggregate); + size_t next_column = columns.size() - def.column_numbers_not_to_aggregate.size(); + for (auto column_number : def.column_numbers_not_to_aggregate) + { + columns[next_column]->insert(current_row[column_number]); + ++next_column; + } + + ++total_merged_rows; + ++merged_rows; + /// TODO: sum_blocks_granularity += block_size; } -void SummingSortedAlgorithm::addRow(SortCursor & cursor) +void SummingSortedAlgorithm::SummingMergedData::addRow(ColumnRawPtrs & raw_columns, size_t row) { - for (auto & desc : columns_definition.columns_to_aggregate) + // Merge maps only for same rows + for (const auto & desc : def.maps_to_sum) + if (mergeMap(desc, current_row, raw_columns, row)) + current_row_is_zero = false; + + addRowImpl(raw_columns, row); +} + +void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_columns, size_t row) +{ + for (auto & desc : def.columns_to_aggregate) { if (!desc.created) - throw Exception("Logical error in SummingSortedBlockInputStream, there are no description", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error in SummingSortedBlockInputStream, there are no description", + ErrorCodes::LOGICAL_ERROR); if (desc.is_agg_func_type) { // desc.state is not used for AggregateFunction types - auto & col = cursor->all_columns[desc.column_numbers[0]]; - assert_cast(*desc.merged_column).insertMergeFrom(*col, cursor->pos); + auto & col = raw_columns[desc.column_numbers[0]]; + assert_cast(*desc.merged_column).insertMergeFrom(*col, row); } else { // Specialized case for unary functions if (desc.column_numbers.size() == 1) { - auto & col = cursor->all_columns[desc.column_numbers[0]]; - desc.add_function(desc.function.get(), desc.state.data(), &col, cursor->pos, nullptr); + auto & col = raw_columns[desc.column_numbers[0]]; + desc.add_function(desc.function.get(), desc.state.data(), &col, row, nullptr); } else { // Gather all source columns into a vector ColumnRawPtrs columns(desc.column_numbers.size()); for (size_t i = 0; i < desc.column_numbers.size(); ++i) - columns[i] = cursor->all_columns[desc.column_numbers[i]]; + columns[i] = raw_columns[desc.column_numbers[i]]; - desc.add_function(desc.function.get(), desc.state.data(), columns.data(), cursor->pos, nullptr); + desc.add_function(desc.function.get(), desc.state.data(), columns.data(), row, nullptr); } } } } +void SummingSortedAlgorithm::SummingMergedData::initAggregateDescription() +{ + size_t num_columns = def.columns_to_aggregate.size(); + for (size_t column_number = 0; column_number < num_columns; ++column_number) + def.columns_to_aggregate[column_number].merged_column = columns[column_number].get(); +} + + +Chunk SummingSortedAlgorithm::SummingMergedData::pull() +{ + auto chunk = MergedData::pull(); + postprocessChunk(chunk, def.column_names.size(), def); + + initAggregateDescription(); + + return chunk; +} + + +SummingSortedAlgorithm::SummingSortedAlgorithm( + const Block & header, size_t num_inputs, + SortDescription description_, + const Names & column_names_to_sum, + size_t max_block_size) + : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) + , columns_definition(defineColumns(header, description, column_names_to_sum)) + , merged_data(getMergedDataColumns(header, columns_definition), max_block_size, columns_definition) +{ +} + +void SummingSortedAlgorithm::initialize(Chunks chunks) +{ + for (auto & chunk : chunks) + if (chunk) + preprocessChunk(chunk); + + initializeQueue(std::move(chunks)); +} + +void SummingSortedAlgorithm::consume(Chunk chunk, size_t source_num) +{ + preprocessChunk(chunk); + updateCursor(std::move(chunk), source_num); +} + IMergingAlgorithm::Status SummingSortedAlgorithm::merge() { /// Take the rows in needed order and put them in `merged_columns` until rows no more than `max_block_size` while (queue.isValid()) { bool key_differs; - bool has_previous_group = !last_key.empty(); SortCursor current = queue.current(); @@ -516,13 +642,7 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() detail::RowRef current_key; current_key.set(current); - if (!has_previous_group) /// The first key encountered. - { - key_differs = true; - current_row_is_zero = true; - } - else - key_differs = !last_key.hasEqualSortColumnsWith(current_key); + key_differs = last_key.empty() || !last_key.hasEqualSortColumnsWith(current_key); last_key = current_key; last_chunk_sort_columns.clear(); @@ -530,49 +650,21 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() if (key_differs) { - if (has_previous_group) + if (merged_data.isGroupStarted()) /// Write the data for the previous group. - insertCurrentRowIfNeeded(); + merged_data.finishGroup(); if (merged_data.hasEnoughRows()) { /// The block is now full and the last row is calculated completely. last_key.reset(); - return Status(merged_data.pull(column_names.size())); + return Status(merged_data.pull()); } - setRow(current_row, current, column_names); - - /// Reset aggregation states for next row - for (auto & desc : columns_definition.columns_to_aggregate) - desc.createState(); - - // Start aggregations with current row - addRow(current); - - if (columns_definition.maps_to_sum.empty()) - { - /// We have only columns_to_aggregate. The status of current row will be determined - /// in 'insertCurrentRowIfNeeded' method on the values of aggregate functions. - current_row_is_zero = true; // NOLINT - } - else - { - /// We have complex maps that will be summed with 'mergeMap' method. - /// The single row is considered non zero, and the status after merging with other rows - /// will be determined in the branch below (when key_differs == false). - current_row_is_zero = false; // NOLINT - } + merged_data.startGroup(current->all_columns, current->pos); } else - { - addRow(current); - - // Merge maps only for same rows - for (const auto & desc : columns_definition.maps_to_sum) - if (mergeMap(desc, current_row, current)) - current_row_is_zero = false; - } + merged_data.addRow(current->all_columns, current->pos); if (!current->isLast()) { @@ -587,10 +679,11 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() } /// We will write the data for the last group, if it is non-zero. - /// If it is zero, and without it the output stream will be empty, we will write it anyway. - insertCurrentRowIfNeeded(); + if (merged_data.isGroupStarted()) + merged_data.finishGroup(); + last_chunk_sort_columns.clear(); - return Status(merged_data.pull(column_names.size()), true); + return Status(merged_data.pull(), true); } } diff --git a/src/Processors/Merges/SummingSortedAlgorithm.h b/src/Processors/Merges/SummingSortedAlgorithm.h index 8cd7fb1ff13..4b750063df5 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/SummingSortedAlgorithm.h @@ -25,13 +25,7 @@ public: Status merge() override; struct AggregateDescription; - - /// Stores numbers of key-columns and value-columns. - struct MapDescription - { - std::vector key_col_nums; - std::vector val_col_nums; - }; + struct MapDescription; /// This structure define columns into one of three types: /// * columns which values not needed to be aggregated @@ -39,6 +33,10 @@ public: /// * mapping for nested columns struct ColumnsDefinition { + ColumnsDefinition(); /// Is needed because destructor is defined. + ColumnsDefinition(ColumnsDefinition &&) noexcept; /// Is needed because destructor is defined. + ~ColumnsDefinition(); /// Is needed because otherwise std::vector's destructor uses incomplete types. + /// Columns with which values should not be aggregated. ColumnNumbers column_numbers_not_to_aggregate; /// Columns which should be aggregated. @@ -46,6 +44,10 @@ public: /// Mapping for nested columns. std::vector maps_to_sum; + /// Names of columns from header. + Names column_names; + + /// It's not the same as column_names.size() size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } }; @@ -54,108 +56,37 @@ public: { private: using MergedData::pull; + using MergedData::insertRow; public: - using MergedData::MergedData; + SummingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_); - SummingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_) - , def(def_) - { - } + void startGroup(ColumnRawPtrs & raw_columns, size_t row); + void finishGroup(); - void insertRow(const Row & row, const ColumnNumbers & column_numbers) - { - size_t next_column = columns.size() - column_numbers.size(); - for (auto column_number : column_numbers) - { - columns[next_column]->insert(row[column_number]); - ++next_column; - } + bool isGroupStarted() const { return is_group_started; } + void addRow(ColumnRawPtrs & raw_columns, size_t row); /// Possible only when group was started. - ++total_merged_rows; - ++merged_rows; - /// TODO: sum_blocks_granularity += block_size; - } - - /// Initialize aggregate descriptions with columns. - void initAggregateDescription(std::vector & columns_to_aggregate) - { - size_t num_columns = columns_to_aggregate.size(); - for (size_t column_number = 0; column_number < num_columns; ++column_number) - columns_to_aggregate[column_number].merged_column = columns[column_number].get(); - } - - Chunk pull(size_t num_result_columns); + Chunk pull(); private: ColumnsDefinition & def; + + bool is_group_started = false; + + Row current_row; + bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. + + void addRowImpl(ColumnRawPtrs & raw_columns, size_t row); + + /// Initialize aggregate descriptions with columns. + void initAggregateDescription(); }; private: - Row current_row; - bool current_row_is_zero = true; /// Are all summed columns zero (or empty)? It is updated incrementally. - + /// Order between members is important because merged_data has reference to columns_definition. ColumnsDefinition columns_definition; SummingMergedData merged_data; - - Names column_names; - - void addRow(SortCursor & cursor); - void insertCurrentRowIfNeeded(); - -public: - /// Stores aggregation function, state, and columns to be used as function arguments. - struct AggregateDescription - { - /// An aggregate function 'sumWithOverflow' or 'sumMapWithOverflow' for summing. - AggregateFunctionPtr function; - IAggregateFunction::AddFunc add_function = nullptr; - std::vector column_numbers; - IColumn * merged_column = nullptr; - AlignedBuffer state; - bool created = false; - - /// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above. - bool is_agg_func_type = false; - - void init(const char * function_name, const DataTypes & argument_types) - { - function = AggregateFunctionFactory::instance().get(function_name, argument_types); - add_function = function->getAddressOfAddFunction(); - state.reset(function->sizeOfData(), function->alignOfData()); - } - - void createState() - { - if (created) - return; - if (is_agg_func_type) - merged_column->insertDefault(); - else - function->create(state.data()); - created = true; - } - - void destroyState() - { - if (!created) - return; - if (!is_agg_func_type) - function->destroy(state.data()); - created = false; - } - - /// Explicitly destroy aggregation state if the stream is terminated - ~AggregateDescription() - { - destroyState(); - } - - AggregateDescription() = default; - AggregateDescription(AggregateDescription &&) = default; - AggregateDescription(const AggregateDescription &) = delete; - }; }; } From 1ef6e8f7c77bd137a3ab96354aa693e48f1127b4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 13 Apr 2020 23:58:29 +0300 Subject: [PATCH 380/752] Fix build. --- .../Merges/SummingSortedAlgorithm.cpp | 24 +++++++++++-------- .../Merges/SummingSortedAlgorithm.h | 3 --- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/Processors/Merges/SummingSortedAlgorithm.cpp b/src/Processors/Merges/SummingSortedAlgorithm.cpp index 923e890abd7..eb61cbd5333 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/SummingSortedAlgorithm.cpp @@ -38,7 +38,8 @@ struct SummingSortedAlgorithm::AggregateDescription AlignedBuffer state; bool created = false; - /// In case when column has type AggregateFunction: use the aggregate function from itself instead of 'function' above. + /// In case when column has type AggregateFunction: + /// use the aggregate function from itself instead of 'function' above. bool is_agg_func_type = false; void init(const char * function_name, const DataTypes & argument_types) @@ -333,12 +334,13 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns( static MutableColumns getMergedDataColumns( const Block & header, - const SummingSortedAlgorithm::ColumnsDefinition & columns_definition) + const SummingSortedAlgorithm::ColumnsDefinition & def) { MutableColumns columns; - columns.reserve(columns_definition.getNumColumns()); + size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size(); + columns.reserve(num_columns); - for (auto & desc : columns_definition.columns_to_aggregate) + for (auto & desc : def.columns_to_aggregate) { // Wrap aggregated columns in a tuple to match function signature if (!desc.is_agg_func_type && isTuple(desc.function->getReturnType())) @@ -354,7 +356,7 @@ static MutableColumns getMergedDataColumns( columns.emplace_back(header.safeGetByPosition(desc.column_numbers[0]).column->cloneEmpty()); } - for (auto & column_number : columns_definition.column_numbers_not_to_aggregate) + for (auto & column_number : def.column_numbers_not_to_aggregate) columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn()); return columns; @@ -496,12 +498,14 @@ void SummingSortedAlgorithm::SummingMergedData::finishGroup() if (desc.column_numbers.size() == 1) { // Flag row as non-empty if at least one column number if non-zero - current_row_is_zero = current_row_is_zero && desc.merged_column->isDefaultAt(desc.merged_column->size() - 1); + current_row_is_zero = current_row_is_zero + && desc.merged_column->isDefaultAt(desc.merged_column->size() - 1); } else { /// It is sumMapWithOverflow aggregate function. - /// Assume that the row isn't empty in this case (just because it is compatible with previous version) + /// Assume that the row isn't empty in this case + /// (just because it is compatible with previous version) current_row_is_zero = false; } } @@ -574,11 +578,11 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c else { // Gather all source columns into a vector - ColumnRawPtrs columns(desc.column_numbers.size()); + ColumnRawPtrs column_ptrs(desc.column_numbers.size()); for (size_t i = 0; i < desc.column_numbers.size(); ++i) - columns[i] = raw_columns[desc.column_numbers[i]]; + column_ptrs[i] = raw_columns[desc.column_numbers[i]]; - desc.add_function(desc.function.get(), desc.state.data(), columns.data(), row, nullptr); + desc.add_function(desc.function.get(), desc.state.data(), column_ptrs.data(), row, nullptr); } } } diff --git a/src/Processors/Merges/SummingSortedAlgorithm.h b/src/Processors/Merges/SummingSortedAlgorithm.h index 4b750063df5..efa67cebe88 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/SummingSortedAlgorithm.h @@ -46,9 +46,6 @@ public: /// Names of columns from header. Names column_names; - - /// It's not the same as column_names.size() - size_t getNumColumns() const { return column_numbers_not_to_aggregate.size() + columns_to_aggregate.size(); } }; /// Specialization for SummingSortedTransform. Inserts only data for non-aggregated columns. From 2543741a9afbb9909c0c52670b13fba606b7fbfb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 14 Apr 2020 00:03:11 +0300 Subject: [PATCH 381/752] Fix build. --- src/Processors/Merges/SummingSortedAlgorithm.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/Merges/SummingSortedAlgorithm.cpp b/src/Processors/Merges/SummingSortedAlgorithm.cpp index eb61cbd5333..ad005c77ea8 100644 --- a/src/Processors/Merges/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/SummingSortedAlgorithm.cpp @@ -448,6 +448,8 @@ SummingSortedAlgorithm::SummingMergedData::SummingMergedData( void SummingSortedAlgorithm::SummingMergedData::startGroup(ColumnRawPtrs & raw_columns, size_t row) { + is_group_started = true; + setRow(current_row, raw_columns, row, def.column_names); /// Reset aggregation states for next row From 29bb9f666565129587846f1507c9a4a5dad8a24e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 14 Apr 2020 00:15:58 +0300 Subject: [PATCH 382/752] simple backport script --- utils/simple-backport/README.md | 44 ++++++++- utils/simple-backport/backport.sh | 16 +++- utils/simple-backport/changelog.sh | 69 ++++++++++++++ utils/simple-backport/format-changelog.py | 109 ++++++++++++++++++++++ 4 files changed, 229 insertions(+), 9 deletions(-) create mode 100755 utils/simple-backport/changelog.sh create mode 100755 utils/simple-backport/format-changelog.py diff --git a/utils/simple-backport/README.md b/utils/simple-backport/README.md index 13378f93989..c5a625ca0d1 100644 --- a/utils/simple-backport/README.md +++ b/utils/simple-backport/README.md @@ -52,22 +52,56 @@ $ cat 20.1-report.tsv | cut -f1 | sort | uniq -c | sort -rn 10 no-backport ``` - ### Как разметить пулреквест? -По умолчанию бекпортируются все пулреквесты, у которых в описании указана категория чейнжлога Bug fix. Если этого недостаточно, используйте теги: -* v20.1-backported -- этот пулреквест уже бекпортирован в ветку 20.1. На случай, если автоматически не определилось. +По умолчанию бекпортируются все пулреквесты, у которых в описании указана +категория чейнжлога Bug fix. Если этого недостаточно, используйте теги: * v20.1-no-backport -- в ветку 20.1 бекпортировать не нужно. * pr-no-backport -- ни в какие ветки бекпортировать не нужно. -* v20.1-conflicts -- при бекпорте в 20.1 произошёл конфликт. Такие пулреквесты скрипт пропускает, к ним можно потом вернуться. +* v20.1-conflicts -- при бекпорте в 20.1 произошёл конфликт. Такие пулреквесты + скрипт пропускает, к ним можно потом вернуться. * pr-must-backport -- нужно бекпортировать в поддерживаемые ветки. * v20.1-must-backport -- нужно бекпортировать в 20.1. +### Я бекпортировал, почему скрипт не видит? +* Сообщение коммита должно содержать текст backport/cherry-pick #12345, или + иметь вид стандартного гитхабовского мерж-коммита для ПР #12345. +* Коммит должен быть достижим по `git log --first-parent my-branch`. Возможно, + в ветке сделали pull с merge, от чего некоторые коммиты из ветки становятся +недоступны по `--first-parent`. + +В качестве обхода, добавьте в ветку пустой коммит с текстом вроде "backport +#12345 -- real backport commit is ". ### Я поправил пулреквест, почему скрипт не видит? -В процессе работы скрипт кеширует данные о пулреквестах в текущей папке, чтобы экономить квоту гитхаба. Удалите закешированные файлы, например, для всех реквестов, которые не помечены как пропущенные: +В процессе работы скрипт кеширует данные о пулреквестах в текущей папке, чтобы +экономить квоту гитхаба. Удалите закешированные файлы, например, для всех +реквестов, которые не помечены как пропущенные: ``` $ cat <ваша-ветка>-report.tsv | grep -v "^skip" | cut -f4 $ cat <ваша-ветка>-report.tsv | grep -v "^skip" | cut -f4 | xargs rm ``` +## Как сформировать change log +В этой же папке запустите: +``` +$ time GITHUB_TOKEN=... ./changelog.sh v20.3.4.10-stable v20.3.5.21-stable +9 PRs added between v20.3.4.10-stable and v20.3.5.21-stable. +### ClickHouse release v20.3.5.21-stable FIXME as compared to v20.3.4.10-stable +#### Bug Fix + +* Fix 'Different expressions with the same alias' error when query has PREWHERE + and WHERE on distributed table and `SET distributed_product_mode = 'local'`. +[#9871](https://github.com/ClickHouse/ClickHouse/pull/9871) ([Artem +Zuikov](https://github.com/4ertus2)). +... +``` + +Скрипт выведет changelog на экран, а также сохранит его в `./changelog.md`. +Скопируйте этот текст в большой changelog, проверьте и поправьте версию и дату +релиза, вычитайте сообщения. Если сообщения неправильные, обязательно исправьте +их на гитхабе -- это поможет при последующей генерации changelog для других +версий, содержащих этот пулреквест. Чтобы скрипт подтянул изменения с гитхаба, +удалите соответствующие файлы `./pr12345.json`. Если вы часто видите +неправильно оформленные пулреквесты, это повод подумать об улучшении проверки +Description check в CI. diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index 7fbd34f0a08..b19df885c9e 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -10,10 +10,13 @@ merge_base=$(git merge-base origin/master "origin/$branch") git log "$merge_base..origin/master" --first-parent > master-log.txt git log "$merge_base..origin/$branch" --first-parent > "$branch-log.txt" +# NOTE keep in sync with ./changelog.sh. # Search for PR numbers in commit messages. First variant is normal merge, and second -# variant is squashed. +# variant is squashed. Next are some backport message variants. find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; - s/^.*(#\([[:digit:]]\+\))$/\1/p") + s/^.*(#\([[:digit:]]\+\))$/\1/p; + s/^.*back[- ]*port[ ]*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ ]*#\([[:digit:]]\+\).*$/\1/Ip") "${find_prs[@]}" master-log.txt | sort -rn > master-prs.txt "${find_prs[@]}" "$branch-log.txt" | sort -rn > "$branch-prs.txt" @@ -39,7 +42,7 @@ do rm "$file" break fi - sleep 0.5 + sleep 0.1 fi if ! [ "$pr" == "$(jq -r .number "$file")" ] @@ -61,7 +64,12 @@ do if echo "$labels" | grep -x "pr-must-backport\|v$branch-must-backport" > /dev/null; then action="backport"; fi if echo "$labels" | grep -x "v$branch-conflicts" > /dev/null; then action="conflict"; fi if echo "$labels" | grep -x "pr-no-backport\|v$branch-no-backport" > /dev/null; then action="no-backport"; fi - if echo "$labels" | grep -x "v$branch\|v$branch-backported" > /dev/null; then action="done"; fi + # FIXME Ignore "backported" labels for now. If we can't find the backport commit, + # this means that the changelog script also won't be able to. An alternative + # way to mark PR as backported is to add an empty commit with text like + # "backported #12345", so that it can be found between tags and put in proper + # place in changelog. + #if echo "$labels" | grep -x "v$branch\|v$branch-backported" > /dev/null; then action="done"; fi # Find merge commit SHA for convenience merge_sha="$(jq -r .merge_commit_sha "$file")" diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh new file mode 100755 index 00000000000..43a0b2d46da --- /dev/null +++ b/utils/simple-backport/changelog.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -e + +from="$1" +to="$2" + +git log "$from..$to" --first-parent > "changelog-log.txt" + +# NOTE keep in sync with ./backport.sh. +# Search for PR numbers in commit messages. First variant is normal merge, and second +# variant is squashed. Next are some backport message variants. +find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; + s/^.*(#\([[:digit:]]\+\))$/\1/p; + s/^.*back[- ]*port[ ]*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ ]*#\([[:digit:]]\+\).*$/\1/Ip") + +"${find_prs[@]}" "changelog-log.txt" | sort -rn > "changelog-prs.txt" + + +echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to." + +function github_download() +{ + local url=${1} + local file=${2} + if ! [ -f "$file" ] + then + if ! curl -H "Authorization: token $GITHUB_TOKEN" \ + -sSf "$url" \ + > "$file" + then + >&2 echo "Failed to download '$url' to '$file'. Contents: '" + >&2 cat "$file" + >&2 echo "'." + rm "$file" + return 1 + fi + sleep 0.1 + fi +} + +for pr in $(cat "changelog-prs.txt") +do + # Download PR info from github. + file="pr$pr.json" + github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue + + if ! [ "$pr" == "$(jq -r .number "$file")" ] + then + >&2 echo "Got wrong data for PR #$pr (please check and remove '$file')." + continue + fi + + # Download author info from github. + user_id=$(jq -r .user.id "$file") + user_file="user$user_id.json" + github_download "$(jq -r .user.url "$file")" "$user_file" || continue + + if ! [ "$user_id" == "$(jq -r .id "$user_file")" ] + then + >&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')." + continue + fi +done + +echo "### ClickHouse release $to FIXME as compared to $from +" > changelog.md +./format-changelog.py changelog-prs.txt >> changelog.md +cat changelog.md diff --git a/utils/simple-backport/format-changelog.py b/utils/simple-backport/format-changelog.py new file mode 100755 index 00000000000..0f379d5529f --- /dev/null +++ b/utils/simple-backport/format-changelog.py @@ -0,0 +1,109 @@ +#!/usr/bin/python3 + +import os +import sys +import itertools +import argparse +import json +import collections +import re + +parser = argparse.ArgumentParser(description='Format changelog for given PRs.') +parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, default=sys.stdin, help='File with PR numbers, one per line.') +args = parser.parse_args() + +# This function mirrors the PR description checks in ClickhousePullRequestTrigger. +# Returns False if the PR should not be mentioned changelog. +def parse_one_pull_request(item): + description = item['body'] + # Don't skip empty lines because they delimit parts of description + lines = [line for line in map(lambda x: x.strip(), description.split('\n') if description else [])] + lines = [re.sub(r'\s+', ' ', l) for l in lines] + + category = '' + entry = '' + + if lines: + i = 0 + while i < len(lines): + if re.match(r'(?i).*category.*:$', lines[i]): + i += 1 + if i >= len(lines): + break + category = re.sub(r'^[-*\s]*', '', lines[i]) + i += 1 + elif re.match(r'(?i)^\**\s*(Short description|Change\s*log entry)', lines[i]): + i += 1 + # Can have one empty line between header and the entry itself. Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = ' '.join(entry_lines) + else: + i += 1 + + if not category: + # Shouldn't happen, because description check in CI should catch such PRs. + # Fall through, so that it shows up in output and the user can fix it. + category = "NO CL CATEGORY" + + # Filter out the PR categories that are not for changelog. + if re.match(r'(?i)doc|((non|in|not|un)[-\s]*significant)', category): + return False + + if not entry: + # Shouldn't happen, because description check in CI should catch such PRs. + category = "NO CL ENTRY" + entry = "NO CL ENTRY: '" + item['title'] + "'" + + entry = entry.strip() + if entry[-1] != '.': + entry += '.' + + item['entry'] = entry + item['category'] = category + + return True + + +category_to_pr = collections.defaultdict(lambda: []) +users = {} +for line in args.file[0]: + pr = json.loads(open(f'pr{line.strip()}.json').read()) + assert(pr['number']) + if not parse_one_pull_request(pr): + continue + + assert(pr['category']) + category_to_pr[pr['category']].append(pr) + user_id = pr['user']['id'] + users[user_id] = json.loads(open(f'user{user_id}.json').read()) + +def print_category(category): + print("#### " + category) + print() + for pr in category_to_pr[category]: + user = users[pr["user"]["id"]] + user_name = user["name"] if user["name"] else user["login"] + + # Substitute issue links + pr["entry"] = re.sub(r'#([0-9]{4,})', r'[#\1](https://github.com/ClickHouse/ClickHouse/issues/\1)', pr["entry"]) + + print(f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).') + + print() + +# Print categories in preferred order +categories_preferred_order = ['Backward Incompatible Change', 'New Feature', 'Bug Fix', 'Improvement', 'Performance Improvement', 'Build/Testing/Packaging Improvement', 'Other'] +for category in categories_preferred_order: + if category in category_to_pr: + print_category(category) + category_to_pr.pop(category) + +# Print the rest of the categories +for category in category_to_pr: + print_category(category) From a7c5f622ea4f5fdb6776994492533590b4789ff0 Mon Sep 17 00:00:00 2001 From: Konstantin Lebedev Date: Mon, 13 Apr 2020 10:50:00 +0300 Subject: [PATCH 383/752] Add string_utils for tests/zookeeper_impl.cpp --- src/Common/ZooKeeper/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/tests/CMakeLists.txt b/src/Common/ZooKeeper/tests/CMakeLists.txt index 06716e49918..45a48ddc7a9 100644 --- a/src/Common/ZooKeeper/tests/CMakeLists.txt +++ b/src/Common/ZooKeeper/tests/CMakeLists.txt @@ -2,7 +2,7 @@ add_executable(zkutil_test_commands zkutil_test_commands.cpp) target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper) add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) -target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper) +target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper string_utils) add_executable(zkutil_test_lock zkutil_test_lock.cpp) target_link_libraries(zkutil_test_lock PRIVATE clickhouse_common_zookeeper) From 7c6a0c27e775720c5e078a4c91e3ba3c0c9a4a7a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 14 Apr 2020 01:05:05 +0300 Subject: [PATCH 384/752] simple backport script --- utils/simple-backport/backport.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index b19df885c9e..ade0b54f24d 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -15,8 +15,8 @@ git log "$merge_base..origin/$branch" --first-parent > "$branch-log.txt" # variant is squashed. Next are some backport message variants. find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ ]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ ]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ of]*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ of]*#\([[:digit:]]\+\).*$/\1/Ip") "${find_prs[@]}" master-log.txt | sort -rn > master-prs.txt "${find_prs[@]}" "$branch-log.txt" | sort -rn > "$branch-prs.txt" From 93e0313ed46a56ff470e4ddc09046bae38d6da56 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Tue, 14 Apr 2020 01:40:07 +0300 Subject: [PATCH 385/752] boop the CI --- .../config/config.d/perf-comparison-tweaks-config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml index 090d8ebe581..e41ab8eb75d 100644 --- a/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/perf-comparison-tweaks-config.xml @@ -1,4 +1,4 @@ - + From 2c88e914d704ace3fb3ca503609c55bc0398ea39 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 14 Apr 2020 02:24:33 +0300 Subject: [PATCH 386/752] Update roadmap. Also revert digital degradation due to accidential changes with automatic scripts. --- docs/ru/extended_roadmap.md | 179 ++++++++++++++++++++++++------------ 1 file changed, 122 insertions(+), 57 deletions(-) diff --git a/docs/ru/extended_roadmap.md b/docs/ru/extended_roadmap.md index 1637b54311a..135a49ca8fb 100644 --- a/docs/ru/extended_roadmap.md +++ b/docs/ru/extended_roadmap.md @@ -39,18 +39,20 @@ Upd. Большая часть задачи реализована и добав Требует 1.3. Будет делать [Александр Сапин](https://github.com/alesapin). Ура, сделано. -### 1.5. ALTER RENAME COLUMN. {#alter-rename-column} +### 1.5. + ALTER RENAME COLUMN. {#alter-rename-column} [\#6861](https://github.com/ClickHouse/ClickHouse/issues/6861) Требует 1.3. Будет делать [Александр Сапин](https://github.com/alesapin). -### 1.6. Полиморфные куски данных. {#polimorfnye-kuski-dannykh} +### 1.6. + Полиморфные куски данных. {#polimorfnye-kuski-dannykh} -Компактные куски - Q1, куски в оперативке Q1/Q2. +Компактные куски - Q1, куски в оперативке Q1/Q2 - пункт 1.7. Компактные куски реализованы, ещё не включены по-умолчанию. Первым шагом включаем по-умолчанию для системных таблиц. +Upd. Включено для системных таблиц. + Делает [Антон Попов](https://github.com/CurtizJ), первый рабочий вариант в декабре. Пререквизит чтобы снизить сложность мелких INSERT, что в свою очередь нужно для 1.12, иначе задача 1.12 не сможет нормально работать. Особенно нужно для Яндекс.Облака. Данные в таблицах типа MergeTree в ClickHouse хранятся в виде набора независимых «кусков». Внутри куска, каждый столбец, а также индекс, хранится в отдельных файлах. Это сделано для возможности быстрых манипуляций со столбцами (пример - запрос ALTER DROP COLUMN). При вставке данных (INSERT), создаётся новый кусок. Для таблиц с большим количеством столбцов, запросы INSERT с маленьким количеством строк являются неэффективными, так как требуют создания большого количества файлов в файловой системе. Это является врождённой особенностью ClickHouse - одной из первой проблем, с которыми сталкиваются пользователи. Пользователям приходится буферизовывать данные и собирать их в более крупные пачки перед вставкой в ClickHouse. @@ -61,7 +63,7 @@ Upd. Большая часть задачи реализована и добав ### 1.7. Буферизация и WAL в MergeTree. {#buferizatsiia-i-wal-v-mergetree} -Требует 1.6. +Требует 1.6. Антон Попов. Задача взята в работу. Q2. ### 1.8. + Перенос между разделами по TTL. {#perenos-mezhdu-razdelami-po-ttl} @@ -74,7 +76,7 @@ Q1. Закоммичено, но есть технический долг, ко Будет делать Сорокин Николай, ВШЭ и Яндекс. -Сейчас пользователь может задать в таблице выражение, которое определяет, сколько времени хранятся данные. Обычно это выражение задаётся относительно значения столбца с датой - например: удалять данные через три месяца. https://clickhouse.tech/docs/ru/operations/table\_engines/mergetree/\#table\_engine-mergetree-ttl +Сейчас пользователь может задать в таблице выражение, которое определяет, сколько времени хранятся данные. Обычно это выражение задаётся относительно значения столбца с датой - например: удалять данные через три месяца. https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/\#table_engine-mergetree-ttl Это может быть задано для всей таблицы (тогда строки целиком удаляются после указанного времени) или для отдельных столбцов (тогда данные столбца физически удаляются с диска, а строки в таблице остаются; при чтении значений столбца, они читаются как значения по-умолчанию). @@ -88,7 +90,7 @@ Q1. Закоммичено, но есть технический долг, ко А вот пункт 2 требуется продумать. Не очевидно даже, какой лучше использовать синтаксис для этого при создании таблицы. Но мы придумаем - сразу видно несколько вариантов. -Частный случай такой задачи уже есть в https://clickhouse.tech/docs/ru/operations/table\_engines/graphitemergetree/ Но это было сделано для конкретной задачи. А надо обобщить. +Частный случай такой задачи уже есть в https://clickhouse.tech/docs/ru/operations/table_engines/graphitemergetree/ Но это было сделано для конкретной задачи. А надо обобщить. ### 1.10. Пережатие старых данных в фоне. {#perezhatie-starykh-dannykh-v-fone} @@ -100,17 +102,15 @@ Q1. Закоммичено, но есть технический долг, ко Предлагается добавить в ClickHouse настройки по пережатию данных и фоновые потоки, выполняющие эту задачу. -### 1.11. Виртуальная файловая система. {#virtualnaia-failovaia-sistema} +### 1.11. + Виртуальная файловая система. {#virtualnaia-failovaia-sistema} -В процессе реализации, сейчас на VFS переведены Log, TinyLog, StripeLog, готовится MergeTree. +На VFS переведены Log, TinyLog, StripeLog, а также MergeTree, что доказывает состоятельность реализации. -Q2. - -Нужно для Яндекс.Облака. Делает Александр, Яндекс.Облако, а также Олег Ершов, ВШЭ и Яндекс. +Нужно для Яндекс.Облака. Делает Александр, Яндекс.Облако. ClickHouse использует для хранения данных локальную файловую систему. Существует сценарий работы, в котором размещение старых (архивных) данных было бы выгодно на удалённой файловой системе. Если файловая система POSIX совместимая, то это не составляет проблем: ClickHouse успешно работает с Ceph, GlusterFS, MooseFS. Также востребованным является сценарий использования S3 (из-за доступности в облаке) или HDFS (для интеграции с Hadoop). Но эти файловые системы не являются POSIX совместимыми. Хотя для них существуют FUSE драйверы, но скорость работы сильно страдает и поддержка неполная. -ClickHouse использует небольшое подмножество функций ФС, но в то же время, и некоторые специфические части: симлинки и хардлинки, O\_DIRECT. Предлагается выделить всё взаимодействие с файловой системой в отдельный интерфейс. +ClickHouse использует небольшое подмножество функций ФС, но в то же время, и некоторые специфические части: симлинки и хардлинки, O_DIRECT. Предлагается выделить всё взаимодействие с файловой системой в отдельный интерфейс. ### 1.12. Экспериментальная реализация VFS поверх S3 и HDFS. {#eksperimentalnaia-realizatsiia-vfs-poverkh-s3-i-hdfs} @@ -121,13 +121,15 @@ Q2. Upd. Олег будет делать только часть про HDFS. +Upd. Реализация поверх S3 является рабочей на уровне PoC. + ### 1.13. Ускорение запросов с FINAL. {#uskorenie-zaprosov-s-final} -Требует 2.1. Делает [Николай Кочетов](https://github.com/KochetovNicolai). Нужно для Яндекс.Метрики. +Требует 2.1. Делает [Николай Кочетов](https://github.com/KochetovNicolai). Нужно для Яндекс.Метрики. Q2. ### 1.14. Не писать столбцы, полностью состоящие из нулей. {#ne-pisat-stolbtsy-polnostiu-sostoiashchie-iz-nulei} -Антон Попов. Q1/Q2. +Антон Попов. Q2. В очереди. Простая задача, является небольшим пререквизитом для потенциальной поддержки полуструктурированных данных. ### 1.15. Возможность иметь разный первичный ключ в разных кусках. {#vozmozhnost-imet-raznyi-pervichnyi-kliuch-v-raznykh-kuskakh} @@ -146,6 +148,7 @@ Upd. Олег будет делать только часть про HDFS. Требует 1.3 и 1.6. Полная замена hard links на sym links, что будет лучше для 1.12. + ## 2. Крупные рефакторинги. {#krupnye-refaktoringi} Для обоснования необходимости смотрите ссылки в описании других задач. @@ -161,6 +164,8 @@ Upd. Включили по-умолчанию. Удаление старого Upd. Уже есть первый релиз, в котором это включено по-умолчанию. +Upd. Всё ещё ждём удаление старого кода, которое должно случиться после релиза 20.4. + ### 2.2. Инфраструктура событий/метрик/ограничений/квот/трассировки. {#infrastruktura-sobytiimetrikogranicheniikvottrassirovki} В очереди. https://gist.github.com/alexey-milovidov/d62d73222d83b9319dc519cbb13aeff6 @@ -193,6 +198,8 @@ Upd. Каталог БД вынесен из Context. Средний приоритет. Нужно для YQL. +Upd. В очереди. Иван Лежанкин. + ### 2.9. Логгировние в format-стиле. {#loggirovnie-v-format-stile} Делает [Иван Лежанкин](https://github.com/abyss7). Низкий приоритет. @@ -212,10 +219,14 @@ Upd. Каталог БД вынесен из Context. Задачу делает Алексей Миловидов. Прогресс 50% и разработка временно приостановлена. +Upd. Разработка всё ещё приостановлена. + ### 2.13. Каждая функция в отдельном файле. {#kazhdaia-funktsiia-v-otdelnom-faile} Задачу делает Алексей Миловидов. Прогресс 80%. Потребуется помощь других разработчиков. +Upd. Поползновения наблюдаются. + ### 2.14. Все функции с состоянием переделать на FunctionBuilder. {#vse-funktsii-s-sostoianiem-peredelat-na-functionbuilder} Долг [Николай Кочетов](https://github.com/KochetovNicolai). Сейчас код находится в переходном состоянии, что неприемлемо. @@ -224,13 +235,14 @@ Upd. Каталог БД вынесен из Context. Для нормализации работы materialized views поверх Merge, Distributed, Kafka. + ## 3. Документация. {#dokumentatsiia} Здесь задачи только по инфраструктуре документации. ### 3.1. Перенос документации по функциям в код. {#perenos-dokumentatsii-po-funktsiiam-v-kod} -Требует 2.12 и 2.13. Хотим в Q1/Q2, средний приоритет. +Требует 2.12 и 2.13. Хотим в Q2, средний приоритет. ### 3.2. Перенос однородных частей документации в код. {#perenos-odnorodnykh-chastei-dokumentatsii-v-kod} @@ -246,11 +258,12 @@ Upd. Иван Блинков сделал эту задачу путём зам Эту задачу сделает [Иван Блинков](https://github.com/blinkov/), до конца декабря 2019. Сделано. + ## 4. Сетевое взаимодействие. {#setevoe-vzaimodeistvie} ### 4.1. Уменьшение числа потоков при распределённых запросах. {#umenshenie-chisla-potokov-pri-raspredelionnykh-zaprosakh} -[Никита Лапков](https://github.com/laplab), весна 2020. Upd. Есть прототип. Upd. Он не работает. +Весна 2020. Upd. Есть прототип. Upd. Он не работает. Upd. Человек отказался от задачи, теперь сроки не определены. ### 4.2. Спекулятивное выполнение запросов на нескольких репликах. {#spekuliativnoe-vypolnenie-zaprosov-na-neskolkikh-replikakh} @@ -262,6 +275,8 @@ Upd. Иван Блинков сделал эту задачу путём зам Сейчас для распределённых запросов используется по потоку на соединение. Это позволяет хорошо распараллелить вычисления над полученными данными и утилизировать сеть, но становится сильно избыточным для больших кластеров. Для примера, создание 1000 потоков для чтения данных из 1000 серверов кластера - лишь расходует ресурсы и увеличивает время выполнения запроса. Вместо этого необходимо использовать количество потоков не большее количества процессорных ядер, и мультиплексировать в одном потоке общение с серверами. Реализация нетривиальна, так как мультиплексировать необходимо каждую стадию общения по сети, включая установку соединения и обмен handshake. +Upd. Сейчас обсуждается, как сделать другую задачу вместо этой. + ### 4.3. Ограничение числа одновременных скачиваний с реплик. {#ogranichenie-chisla-odnovremennykh-skachivanii-s-replik} Дмитрий Григорьев, ВШЭ. @@ -284,14 +299,16 @@ Upd. Иван Блинков сделал эту задачу путём зам Дмитрий Григорьев, ВШЭ. В очереди. Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3. + ## 5. Операции. {#operatsii} -### 5.1. Разделение задач на более мелкие куски в clickhouse-copier. {#razdelenie-zadach-na-bolee-melkie-kuski-v-clickhouse-copier} +### 5.1. + Разделение задач на более мелкие куски в clickhouse-copier. {#razdelenie-zadach-na-bolee-melkie-kuski-v-clickhouse-copier} [\#9075](https://github.com/ClickHouse/ClickHouse/pull/9075) Q1. Нужно для Метрики, в очереди. Никита Михайлов. Upd. Задача на финальной стадии разработки. +Upd. Сделано. Эффективность работы под вопросом. Есть варианты, как сделать лучше. ### 5.2. Автонастройка лимита на оперативку и размера кэшей. {#avtonastroika-limita-na-operativku-i-razmera-keshei} @@ -305,6 +322,8 @@ Upd. Задача на финальной стадии разработки. Требует 7.5. Задачу хочет Метрика, Облако, БК, Маркет и Altinity. Первой LTS версией уже стала версия 19.14. Метрика, БК, Маркет, Altinity уже используют более свежие версии чем LTS. +Upd. Появилась вторая версия LTS - 20.3. + ## 6. Инструментирование. {#instrumentirovanie} @@ -321,7 +340,7 @@ Upd. Задача на финальной стадии разработки. ### 6.3. Учёт оперативки total расширить не только на запросы. {#uchiot-operativki-total-rasshirit-ne-tolko-na-zaprosy} -Исправление долгоживущей проблемы с дрифтом учёта оперативки. Нужна для Метрики и БК. Иван Лежанкин. Q1. +Исправление долгоживущей проблемы с дрифтом учёта оперативки. Нужна для Метрики и БК. Иван Лежанкин. Q1. Странно, как будто не сделано. ### 6.4. Поддержка perf events как метрик запроса. {#podderzhka-perf-events-kak-metrik-zaprosa} @@ -339,7 +358,7 @@ Upd. Задача на финальной стадии разработки. Сейчас есть стек трейс для почти всех, но не всех исключений. Требует 7.4. -### 6.7. + Таблица system.stack\_trace. {#tablitsa-system-stack-trace} +### 6.7. + Таблица system.stack_trace. {#tablitsa-system-stack-trace} Сравнительно простая задача, но только для опытных разработчиков. @@ -351,6 +370,7 @@ Upd. Задача на финальной стадии разработки. ### 6.10. Сбор общих системных метрик. {#sbor-obshchikh-sistemnykh-metrik} + ## 7. Сопровождение разработки. {#soprovozhdenie-razrabotki} ### 7.1. + ICU в submodules. {#icu-v-submodules} @@ -361,7 +381,7 @@ Upd. Задача на финальной стадии разработки. Сделал Алексей Миловидов. -### 7.3. Обновление Poco. {#obnovlenie-poco} +### 7.3. + Обновление Poco. {#obnovlenie-poco} Алексанр Кузьменков. @@ -383,13 +403,18 @@ Upd. Задача на финальной стадии разработки. Уже есть ASan, TSan, UBSan. Не хватает тестов под MSan. Они уже добавлены в CI, но не проходят. [Александр Кузьменков](https://github.com/akuzm) и [Александр Токмаков](https://github.com/tavplubix). -### 7.8. Добавить clang-tidy. {#dobavit-clang-tidy} +Upd. Задача всё ещё медленно тащится. + +### 7.8. + Добавить clang-tidy. {#dobavit-clang-tidy} Уже есть PVS-Studio. Мы очень довольны, но этого недостаточно. Upd. Алексей Миловидов. Добавлено некоторое множество проверок, но нужно рассмотреть все проверки подряд и добавить всё, что можно. +Upd. Рассмотрели все проверки подряд. -### 7.9. Проверки на стиль имён с помощью clang-tidy. {#proverki-na-stil-imion-s-pomoshchiu-clang-tidy} +### 7.9. + Проверки на стиль имён с помощью clang-tidy. {#proverki-na-stil-imion-s-pomoshchiu-clang-tidy} + +Сделано. Только в .cpp файлах и только для имён локальных переменных. Остальное слишком сложно. ### 7.10. Включение UBSan и MSan в интеграционных тестах. {#vkliuchenie-ubsan-i-msan-v-integratsionnykh-testakh} @@ -399,6 +424,8 @@ UBSan включен в функциональных тестах, но не в У нас мало unit тестов по сравнению с функциональными тестами и их использование не обязательно. Но они всё-равно важны и нет причин не запускать их под всеми видами sanitizers. +Илья Яцишин. + ### 7.12. Показывать тестовое покрытие нового кода в PR. {#pokazyvat-testovoe-pokrytie-novogo-koda-v-pr} Пока есть просто показ тестового покрытия всего кода. @@ -413,6 +440,8 @@ UBSan включен в функциональных тестах, но не в Подключение replxx вместо readline сделал Иван Лежанкин. +Есть технический долг с лицензиями файлов консорциума Unicode. + ### 7.14.1. Улучшение возможностей интерактивного режима clickhouse-client. {#uluchshenie-vozmozhnostei-interaktivnogo-rezhima-clickhouse-client} Тагир Кускаров, ВШЭ. @@ -476,7 +505,7 @@ https://github.com/ClickHouse/ClickHouse/issues/8027\#issuecomment-566670282 Проверили на настоящем сервере Huawei, а также в специальном Docker контейнере, который содержит внутри qemu-user-static. Также можно проверить на Cavium, на Raspberry Pi а также на твоём Android телефоне. -### 7.20. Автосборка для FreeBSD x86\_64. {#avtosborka-dlia-freebsd-x86-64} +### 7.20. Автосборка для FreeBSD x86_64. {#avtosborka-dlia-freebsd-x86-64} [Иван Лежанкин](https://github.com/abyss7). @@ -535,6 +564,8 @@ Fuzzing тестирование - это тестирование случай Также можно сделать функции с детерминированным генератором случайных чисел (аргументом передаётся seed) для воспроизводимости тестовых кейсов. Upd. Сергей Штыков сделал функцию `randomPrintableASCII`. +Upd. Илья Яцишин сделал табличную функцию `generateRandom`. +Upd. Эльдар Заитов добавляет OSS Fuzz. ### 7.24. Fuzzing лексера и парсера запросов; кодеков и форматов. {#fuzzing-leksera-i-parsera-zaprosov-kodekov-i-formatov} @@ -557,10 +588,12 @@ Upd. Сергей Штыков сделал функцию `randomPrintableASCII Нужно для CHYT и YQL. -UPD: Все патчи Максима отправлены в master. Задача взята в работу. +Upd: Все патчи Максима отправлены в master. Задача взята в работу. Upd: Задача в процессе реализации. Синхронизироваться будет master. Делает [Иван Лежанкин](https://github.com/abyss7) +Upd: Есть собирающийся прототип, но сборка как будто ещё не в trunk Аркадии. + ### 7.26. Побайтовая идентичность репозитория с Аркадией. {#pobaitovaia-identichnost-repozitoriia-s-arkadiei} Команда DevTools. Прогресс по задаче под вопросом. @@ -617,6 +650,7 @@ Upd: Задача в процессе реализации. Синхронизи Upd. Иван Блинков настроил CDN repo.clickhouse.tech, что решает проблему с доступностью зарубежом. Вопрос с operations, visibility пока актуален. + ## 8. Интеграция с внешними системами. {#integratsiia-s-vneshnimi-sistemami} ### 8.1. Поддержка ALTER MODIFY SETTING для Kafka. {#podderzhka-alter-modify-setting-dlia-kafka} @@ -629,11 +663,11 @@ Altinity. Никто не делает эту задачу. [Александр Кузьменков](https://github.com/akuzm). -### 8.3. Доработки globs (правильная поддержка диапазонов, уменьшение числа одновременных stream-ов). {#dorabotki-globs-pravilnaia-podderzhka-diapazonov-umenshenie-chisla-odnovremennykh-stream-ov} +### 8.3. + Доработки globs (правильная поддержка диапазонов, уменьшение числа одновременных stream-ов). {#dorabotki-globs-pravilnaia-podderzhka-diapazonov-umenshenie-chisla-odnovremennykh-stream-ov} [Ольга Хвостикова](https://github.com/stavrolia). -Уменьшение числа stream-ов сделано, а вот правильная поддержка диапазонов - нет. Будем надеяться на Q1/Q2. +Уменьшение числа stream-ов сделано, а вот правильная поддержка диапазонов - нет. Будем надеяться на Q1/Q2. Сделано. ### 8.4. Унификация File, HDFS, S3 под URL. {#unifikatsiia-file-hdfs-s3-pod-url} @@ -690,19 +724,21 @@ Andrew Onyshchuk. Есть pull request. Q1. Сделано. Павел Круглов, ВШЭ и Яндекс. Есть pull request. -### 8.16.2. Поддержка формата Thrift. {#podderzhka-formata-thrift} +### 8.16.2. - Поддержка формата Thrift. {#podderzhka-formata-thrift} -Павел Круглов, ВШЭ и Яндекс. +Павел Круглов, ВШЭ и Яндекс. Задача отменена. ### 8.16.3. Поддержка формата MsgPack. {#podderzhka-formata-msgpack} Павел Круглов, ВШЭ и Яндекс. Задача взята в работу. -### 8.16.4. Формат Regexp. {#format-regexp} +Upd. Почти готово - есть лишь небольшой технический долг. + +### 8.16.4. + Формат Regexp. {#format-regexp} Павел Круглов, ВШЭ и Яндекс. -Есть pull request. +Есть pull request. Готово. ### 8.17. ClickHouse как MySQL реплика. {#clickhouse-kak-mysql-replika} @@ -735,6 +771,7 @@ Maxim Fedotov, Wargaming + Yuri Baranov, Яндекс. Нужно для БК. Декабрь 2019. В декабре для БК сделан минимальный вариант этой задачи. Максимальный вариант, вроде, никому не нужен. +Upd. Всё ещё кажется, что задача не нужна. ### 8.22. Поддержка синтаксиса для переменных в стиле MySQL. {#podderzhka-sintaksisa-dlia-peremennykh-v-stile-mysql} @@ -746,6 +783,7 @@ Upd. Юрий Баранов работает в Google, там запрещен Желательно 2.15. + ## 9. Безопасность. {#bezopasnost} ### 9.1. + Ограничение на хосты в запросах ко внешним системам. {#ogranichenie-na-khosty-v-zaprosakh-ko-vneshnim-sistemam} @@ -761,7 +799,12 @@ ClickHouse предоставляет возможность обратитьс Вместо этого предлагается описывать необходимые данные в конфигурационном файле сервера или в отдельном сервисе и ссылаться на них по именам. ### 9.3. Поддержка TLS для ZooKeeper. {#podderzhka-tls-dlia-zookeeper} + [#10174](https://github.com/ClickHouse/ClickHouse/issues/10174) + +Есть pull request. + + ## 10. Внешние словари. {#vneshnie-slovari} ### 10.1. + Исправление зависания в библиотеке доступа к YT. {#ispravlenie-zavisaniia-v-biblioteke-dostupa-k-yt} @@ -777,6 +820,7 @@ ClickHouse предоставляет возможность обратитьс Нужно для БК и Метрики. Поиск причин - [Александр Сапин](https://github.com/alesapin). Дальшейшее исправление возможно на стороне YT. Upd. Одну причину устранили, но ещё что-то неизвестное осталось. +Upd. Нас заставляют переписать эту библиотеку с одного API на другое, так как старое внезапно устарело. Кажется, что переписывание случайно исправит все проблемы. ### 10.3. Возможность чтения данных из статических таблиц в YT словарях. {#vozmozhnost-chteniia-dannykh-iz-staticheskikh-tablits-v-yt-slovariakh} @@ -802,7 +846,7 @@ Upd. Одну причину устранили, но ещё что-то неи Артём Стрельцов, Николай Дегтеринский, Наталия Михненко, ВШЭ. -### 10.9. Уменьшение блокировок для cache словарей за счёт одновременных запросов одного и того же. {#umenshenie-blokirovok-dlia-cache-slovarei-za-schiot-odnovremennykh-zaprosov-odnogo-i-togo-zhe} +### 10.9. - Уменьшение блокировок для cache словарей за счёт одновременных запросов одного и того же. {#umenshenie-blokirovok-dlia-cache-slovarei-za-schiot-odnovremennykh-zaprosov-odnogo-i-togo-zhe} Заменено в пользу 10.10, 10.11. @@ -825,7 +869,7 @@ Upd. Одну причину устранили, но ещё что-то неи ### 10.14. Поддержка всех типов в функции transform. {#podderzhka-vsekh-tipov-v-funktsii-transform} -Задачу взяла Ольга Хвостикова. +Задачу взяла Ольга Хвостикова. Upd. Статус неизвестен. ### 10.15. Использование словарей как специализированного layout для Join. {#ispolzovanie-slovarei-kak-spetsializirovannogo-layout-dlia-join} @@ -843,6 +887,7 @@ Upd. Одну причину устранили, но ещё что-то неи ### 10.19. Возможность зарегистрировать некоторые функции, использующие словари, под пользовательскими именами. {#vozmozhnost-zaregistrirovat-nekotorye-funktsii-ispolzuiushchie-slovari-pod-polzovatelskimi-imenami} + ## 11. Интерфейсы. {#interfeisy} ### 11.1. Вставка состояний агрегатных функций в виде кортежа аргументов или массива кортежей аргументов. {#vstavka-sostoianii-agregatnykh-funktsii-v-vide-kortezha-argumentov-ili-massiva-kortezhei-argumentov} @@ -851,6 +896,8 @@ Upd. Одну причину устранили, но ещё что-то неи Нужно разобраться, как упаковывать Java в статический бинарник, возможно AppImage. Или предоставить максимально простую инструкцию по установке jdbc-bridge. Может быть будет заинтересован Александр Крашенинников, Badoo, так как он разработал jdbc-bridge. +Upd. Александр Крашенинников перешёл в другую компанию и больше не занимается этим. + ### 11.3. + Интеграционные тесты ODBC драйвера путём подключения ClickHouse к самому себе через ODBC. {#integratsionnye-testy-odbc-draivera-putiom-podkliucheniia-clickhouse-k-samomu-sebe-cherez-odbc} Михаил Филимонов, Altinity. Готово. @@ -881,12 +928,13 @@ zhang2014, есть pull request. Возможность описать в конфигурационном файле handler (путь в URL) для HTTP запросов к серверу, которому соответствует некоторый параметризованный запрос. Пользователь может вызвать этот обработчик и не должен передавать SQL запрос. + ## 12. Управление пользователями и доступом. {#upravlenie-polzovateliami-i-dostupom} -### 12.1. Role Based Access Control. {#role-based-access-control} +### 12.1. + Role Based Access Control. {#role-based-access-control} -[Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в начале февраля 2019. -Q1. Сейчас сделаны все интерфейсы в коде и запросы, но не сделаны варианты хранения прав кроме прототипа. +[Виталий Баранов](https://github.com/vitlibar). Финальная стадия разработки, рабочая версия в начале апреля 2019. +Q2. Сейчас сделаны все интерфейсы в коде и запросы, но не сделаны варианты хранения прав кроме прототипа. Upd. Сделано хранение прав. До готового к использованию состояния осталось несколько доработок. ### 12.2. + Управление пользователями и правами доступа с помощью SQL запросов. {#upravlenie-polzovateliami-i-pravami-dostupa-s-pomoshchiu-sql-zaprosov} @@ -897,7 +945,7 @@ Q1. Сделано управление правами полностью, но ### 12.3. Подключение справочника пользователей и прав доступа из LDAP. {#podkliuchenie-spravochnika-polzovatelei-i-prav-dostupa-iz-ldap} [Виталий Баранов](https://github.com/vitlibar). Требует 12.1. -Q1/Q2. +Q2. ### 12.4. Подключение IDM системы Яндекса как справочника пользователей и прав доступа. {#podkliuchenie-idm-sistemy-iandeksa-kak-spravochnika-polzovatelei-i-prav-dostupa} @@ -911,6 +959,7 @@ Q1/Q2. [Виталий Баранов](https://github.com/vitlibar). Требует 12.1. + ## 13. Разделение ресурсов, multi-tenancy. {#razdelenie-resursov-multi-tenancy} ### 13.1. Overcommit запросов по памяти и вытеснение. {#overcommit-zaprosov-po-pamiati-i-vytesnenie} @@ -926,6 +975,8 @@ Q1/Q2. Требует 13.2 или сможем сделать более неудобную реализацию раньше. Обсуждается вариант неудобной реализации. Пока средний приоритет, целимся на Q1/Q2. Вариант реализации выбрал Александр Казаков. +Upd. Не уследили, и задачу стали обсуждать менеджеры. + ## 14. Диалект SQL. {#dialekt-sql} @@ -936,8 +987,6 @@ Q1/Q2. ### 14.2. Поддержка WITH для подзапросов. {#podderzhka-with-dlia-podzaprosov} -Михаил Коротов. - ### 14.3. Поддержка подстановок для множеств в правой части IN. {#podderzhka-podstanovok-dlia-mnozhestv-v-pravoi-chasti-in} ### 14.4. Поддержка подстановок для идентификаторов (имён) в SQL запросе. {#podderzhka-podstanovok-dlia-identifikatorov-imion-v-sql-zaprose} @@ -993,7 +1042,7 @@ zhang2014 ### 14.16. Синонимы для функций из MySQL. {#sinonimy-dlia-funktsii-iz-mysql} -### 14.17. Ввести понятие stateful функций. {#vvesti-poniatie-stateful-funktsii} +### 14.17. + Ввести понятие stateful функций. {#vvesti-poniatie-stateful-funktsii} zhang2014. Для runningDifference, neighbour - их учёт в оптимизаторе запросов. @@ -1018,13 +1067,15 @@ zhang2014. Павел Потёмкин, ВШЭ. + ## 15. Улучшение поддержки JOIN. {#uluchshenie-podderzhki-join} -### 15.1. Доведение merge JOIN до продакшена. {#dovedenie-merge-join-do-prodakshena} +### 15.1. + Доведение merge JOIN до продакшена. {#dovedenie-merge-join-do-prodakshena} Артём Зуйков. Сейчас merge JOIN включается вручную опцией и всегда замедляет запросы. Хотим, чтобы он замедлял запросы только когда это неизбежно. Кстати, смысл merge JOIN появляется только совместно с 15.2 и 15.3. Q1. Сделали адаптивный вариант, но вроде он что-то всё-ещё замедляет. +Задача сделана, но всё работает слишком медленно. ### 15.1.1. Алгоритм two-level merge JOIN. {#algoritm-two-level-merge-join} @@ -1052,6 +1103,7 @@ Q1. Сделали адаптивный вариант, но вроде он ч Артём Зуйков. + ## 16. Типы данных и функции. {#tipy-dannykh-i-funktsii} ### 16.1. + DateTime64. {#datetime64} @@ -1073,6 +1125,7 @@ Upd. Секретного изменения в работе не будет, з ### 16.6. Функции нормализации и хэширования SQL запросов. {#funktsii-normalizatsii-i-kheshirovaniia-sql-zaprosov} + ## 17. Работа с географическими данными. {#rabota-s-geograficheskimi-dannymi} ### 17.1. Гео-словари для определения региона по координатам. {#geo-slovari-dlia-opredeleniia-regiona-po-koordinatam} @@ -1105,6 +1158,7 @@ Upd. Андрей сделал прототип более оптимально Сейчас функция тихо не работает в случае полигонов с самопересечениями, надо кидать исключение. + ## 18. Машинное обучение и статистика. {#mashinnoe-obuchenie-i-statistika} ### 18.1. Инкрементальная кластеризация данных. {#inkrementalnaia-klasterizatsiia-dannykh} @@ -1123,6 +1177,7 @@ Upd. Андрей сделал прототип более оптимально В очереди. Возможно, Александр Кожихов. У него сначала идёт задача 24.26. + ## 19. Улучшение работы кластера. {#uluchshenie-raboty-klastera} ### 19.1. Параллельные кворумные вставки без линеаризуемости. {#parallelnye-kvorumnye-vstavki-bez-linearizuemosti} @@ -1153,7 +1208,7 @@ Upd. Алексей сделал какой-то вариант, но борет Hold. Полезно для заказчиков внутри Яндекса, но есть риски. Эту задачу никто не будет делать. -### 19.4. internal\_replication = ‘auto’. {#internal-replication-auto} +### 19.4. internal_replication = ‘auto’. {#internal-replication-auto} ### 19.5. Реплицируемые базы данных. {#replitsiruemye-bazy-dannykh} @@ -1177,18 +1232,20 @@ Hold. Полезно для заказчиков внутри Яндекса, н Требует 1.6, 19.1, 19.6, 19.7, 19.8, 19.9. + ## 20. Мутации данных. {#mutatsii-dannykh} Пока все задачи по точечным UPDATE/DELETE имеют низкий приоритет, но ожидаем взять в работу в середине 2020. ### 20.1. Поддержка DELETE путём запоминания множества затронутых кусков и ключей. {#podderzhka-delete-putiom-zapominaniia-mnozhestva-zatronutykh-kuskov-i-kliuchei} -### 20.2. Поддержка DELETE путём преобразования множества ключей в множество row\_numbers на реплике, столбца флагов и индекса по диапазонам. {#podderzhka-delete-putiom-preobrazovaniia-mnozhestva-kliuchei-v-mnozhestvo-row-numbers-na-replike-stolbtsa-flagov-i-indeksa-po-diapazonam} +### 20.2. Поддержка DELETE путём преобразования множества ключей в множество row_numbers на реплике, столбца флагов и индекса по диапазонам. {#podderzhka-delete-putiom-preobrazovaniia-mnozhestva-kliuchei-v-mnozhestvo-row-numbers-na-replike-stolbtsa-flagov-i-indeksa-po-diapazonam} ### 20.3. Поддержка ленивых DELETE путём запоминания выражений и преобразования к множеству ключей в фоне. {#podderzhka-lenivykh-delete-putiom-zapominaniia-vyrazhenii-i-preobrazovaniia-k-mnozhestvu-kliuchei-v-fone} ### 20.4. Поддержка UPDATE с помощью преобразования в DELETE и вставок. {#podderzhka-update-s-pomoshchiu-preobrazovaniia-v-delete-i-vstavok} + ## 21. Оптимизации производительности. {#optimizatsii-proizvoditelnosti} ### 21.1. + Параллельный парсинг форматов. {#parallelnyi-parsing-formatov} @@ -1201,7 +1258,7 @@ Hold. Полезно для заказчиков внутри Яндекса, н После 21.1, предположительно Никита Михайлов. Задача сильно проще чем 21.1. -### 21.3. Исправление низкой производительности анализа индекса в случае большого множества в секции IN. {#ispravlenie-nizkoi-proizvoditelnosti-analiza-indeksa-v-sluchae-bolshogo-mnozhestva-v-sektsii-in} +### 21.3. + Исправление низкой производительности анализа индекса в случае большого множества в секции IN. {#ispravlenie-nizkoi-proizvoditelnosti-analiza-indeksa-v-sluchae-bolshogo-mnozhestva-v-sektsii-in} Нужно всем (Zen, БК, DataLens, TestEnv…). Антон Попов, Q1/Q2. @@ -1309,23 +1366,23 @@ Constraints позволяют задать выражение, истиннос В ClickHouse используется неоптимальный вариант top sort. Суть его в том, что из каждого блока достаётся top N записей, а затем, все блоки мержатся. Но доставание top N записей у каждого следующего блока бессмысленно, если мы знаем, что из них в глобальный top N войдёт меньше. Конечно нужно реализовать вариацию на тему priority queue (heap) с быстрым пропуском целых блоков, если ни одна строка не попадёт в накопленный top. -1. Рекурсивный вариант сортировки по кортежам. +2. Рекурсивный вариант сортировки по кортежам. Для сортировки по кортежам используется обычная сортировка с компаратором, который в цикле по элементам кортежа делает виртуальные вызовы `IColumn::compareAt`. Это неоптимально - как из-за короткого цикла по неизвестному в compile-time количеству элементов, так и из-за виртуальных вызовов. Чтобы обойтись без виртуальных вызовов, есть метод `IColumn::getPermutation`. Он используется в случае сортировки по одному столбцу. Есть вариант, что в случае сортировки по кортежу, что-то похожее тоже можно применить… например, сделать метод `updatePermutation`, принимающий аргументы offset и limit, и допереставляющий перестановку в диапазоне значений, в которых предыдущий столбец имел равные значения. -1. RadixSort для сортировки. +3. RadixSort для сортировки. Один наш знакомый начал делать задачу по попытке использования RadixSort для сортировки столбцов. Был сделан вариант indirect сортировки (для `getPermutation`), но не оптимизирован до конца - есть лишние ненужные перекладывания элементов. Для того, чтобы его оптимизировать, придётся добавить немного шаблонной магии (на последнем шаге что-то не копировать, вместо перекладывания индексов - складывать их в готовое место). Также этот человек добавил метод MSD Radix Sort для реализации radix partial sort. Но даже не проверил производительность. Наиболее содержательная часть задачи может состоять в применении Radix Sort для сортировки кортежей, расположенных в оперативке в виде Structure Of Arrays неизвестного в compile-time размера. Это может работать хуже, чем то, что описано в пункте 2… Но попробовать не помешает. -1. Three-way comparison sort. +4. Three-way comparison sort. Виртуальный метод `compareAt` возвращает -1, 0, 1. Но алгоритмы сортировки сравнениями обычно рассчитаны на `operator<` и не могут получить преимущества от three-way comparison. А можно ли написать так, чтобы преимущество было? -1. pdq partial sort +5. pdq partial sort -Хороший алгоритм сортировки сравнениями `pdqsort` не имеет варианта partial sort. Заметим, что на практике, почти все сортировки в запросах ClickHouse являются partial\_sort, так как `ORDER BY` почти всегда идёт с `LIMIT`. Кстати, Данила Кутенин уже попробовал это и показал, что в тривиальном случае преимущества нет. Но не очевидно, что нельзя сделать лучше. +Хороший алгоритм сортировки сравнениями `pdqsort` не имеет варианта partial sort. Заметим, что на практике, почти все сортировки в запросах ClickHouse являются partial_sort, так как `ORDER BY` почти всегда идёт с `LIMIT`. Кстати, Данила Кутенин уже попробовал это и показал, что в тривиальном случае преимущества нет. Но не очевидно, что нельзя сделать лучше. ### 21.20. Использование материализованных представлений для оптимизации запросов. {#ispolzovanie-materializovannykh-predstavlenii-dlia-optimizatsii-zaprosov} @@ -1344,6 +1401,7 @@ Constraints позволяют задать выражение, истиннос zhang2014. Есть pull request. + ## 22. Долги и недоделанные возможности. {#dolgi-i-nedodelannye-vozmozhnosti} ### 22.1. + Исправление неработающих таймаутов, если используется TLS. {#ispravlenie-nerabotaiushchikh-taimautov-esli-ispolzuetsia-tls} @@ -1362,6 +1420,7 @@ N.Vartolomei. Александр Казаков. Нужно для Яндекс.Метрики и Datalens. Задача постепенно тащится и исправлениями в соседних местах стала менее актуальна. В Q1 будет сделана или отменена с учётом 1.2. и 1.3. +Upd. Добавили таймауты. ### 22.5. + Исправление редких срабатываний TSan в stress тестах в CI. {#ispravlenie-redkikh-srabatyvanii-tsan-v-stress-testakh-v-ci} @@ -1470,18 +1529,19 @@ Altinity. [Александр Сапин](https://github.com/alesapin) + ## 23. Default Festival. {#default-festival} -### 23.1. + Включение minimalistic\_part\_header в ZooKeeper. {#vkliuchenie-minimalistic-part-header-v-zookeeper} +### 23.1. + Включение minimalistic_part_header в ZooKeeper. {#vkliuchenie-minimalistic-part-header-v-zookeeper} Сильно уменьшает объём данных в ZooKeeper. Уже год в продакшене в Яндекс.Метрике. Алексей Миловидов, ноябрь 2019. -### 23.2. Включение distributed\_aggregation\_memory\_efficient. {#vkliuchenie-distributed-aggregation-memory-efficient} +### 23.2. Включение distributed_aggregation_memory_efficient. {#vkliuchenie-distributed-aggregation-memory-efficient} Есть риски меньшей производительности лёгких запросов, хотя производительность тяжёлых запросов всегда увеличивается. -### 23.3. Включение min\_bytes\_to\_external\_sort и min\_bytes\_to\_external\_group\_by. {#vkliuchenie-min-bytes-to-external-sort-i-min-bytes-to-external-group-by} +### 23.3. Включение min_bytes_to_external_sort и min_bytes_to_external_group_by. {#vkliuchenie-min-bytes-to-external-sort-i-min-bytes-to-external-group-by} Желательно 5.2. и 13.1. @@ -1489,7 +1549,7 @@ Altinity. Есть гипотеза, что плохо работает на очень больших кластерах. -### 23.5. Включение compile\_expressions. {#vkliuchenie-compile-expressions} +### 23.5. Включение compile_expressions. {#vkliuchenie-compile-expressions} Требует 7.2. Задачу изначально на 99% сделал Денис Скоробогатов, ВШЭ и Яндекс. Остальной процент доделывал Алексей Миловидов, а затем [Александр Сапин](https://github.com/alesapin). @@ -1514,6 +1574,7 @@ Q1. [Николай Кочетов](https://github.com/KochetovNicolai). Возможность mlock бинарника сделал Олег Алексеенков [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) . Поможет, когда на серверах кроме ClickHouse работает много посторонних программ (мы иногда называем их в шутку «треш-программами»). + ## 24. Экспериментальные задачи. {#eksperimentalnye-zadachi} ### 24.1. Веб-интерфейс для просмотра состояния кластера и профилирования запросов. {#veb-interfeis-dlia-prosmotra-sostoianiia-klastera-i-profilirovaniia-zaprosov} @@ -1553,7 +1614,7 @@ ClickHouse поддерживает LZ4 и ZSTD для сжатия данных Смотрите также 24.5. -1. Шифрование отдельных значений. +2. Шифрование отдельных значений. Для этого требуется реализовать функции шифрования и расшифрования, доступные из SQL. Для шифрования реализовать возможность добавления нужного количества случайных бит для исключения одинаковых зашифрованных значений на одинаковых данных. Это позволит реализовать возможность «забывания» данных без удаления строк таблицы: можно шифровать данные разных клиентов разными ключами, и для того, чтобы забыть данные одного клиента, потребуется всего лишь удалить ключ. ### 24.6. Userspace RAID. {#userspace-raid} @@ -1586,7 +1647,7 @@ RAID позволяет одновременно увеличить надёжн Дмитрий Ковальков, ВШЭ и Яндекс. -Подавляющее большинство кода ClickHouse написана для x86\_64 с набором инструкций до SSE 4.2 включительно. Лишь отдельные редкие функции поддерживают AVX/AVX2/AVX512 с динамической диспетчеризацией. +Подавляющее большинство кода ClickHouse написана для x86_64 с набором инструкций до SSE 4.2 включительно. Лишь отдельные редкие функции поддерживают AVX/AVX2/AVX512 с динамической диспетчеризацией. В первой части задачи, следует добавить в ClickHouse реализации некоторых примитивов, оптимизированные под более новый набор инструкций. Например, AVX2 реализацию генератора случайных чисел pcg: https://github.com/lemire/simdpcg @@ -1598,6 +1659,8 @@ RAID позволяет одновременно увеличить надёжн Продолжение 24.8. +Upd. Есть pull request. + ### 24.10. Поддержка типов half/bfloat16/unum. {#podderzhka-tipov-halfbfloat16unum} [\#7657](https://github.com/ClickHouse/ClickHouse/issues/7657) @@ -1633,6 +1696,7 @@ ClickHouse предоставляет достаточно богатый наб В компании nVidia сделали прототип offloading вычисления GROUP BY с некоторыми из агрегатных функций в ClickHouse и обещат предоставить исходники в публичный доступ для дальнейшего развития. Предлагается изучить этот прототип и расширить его применимость для более широкого сценария использования. В качестве альтернативы, предлагается изучить исходные коды системы `OmniSci` или `Alenka` или библиотеку `CUB` https://nvlabs.github.io/cub/ и применить некоторые из алгоритмов в ClickHouse. Upd. В компании nVidia выложили прототип, теперь нужна интеграция в систему сборки. +Upd. Интеграция в систему сборки - Иван Лежанкин. ### 24.13. Stream запросы. {#stream-zaprosy} @@ -1791,7 +1855,7 @@ Amos Bird, но его решение слишком громоздкое и п ### 25.10. Митапы в России и Беларуси: Москва x2 + митап для разработчиков или хакатон, Санкт-Петербург, Минск, Нижний Новгород, Екатеринбург, Новосибирск и/или Академгородок, Иннополис или Казань. {#mitapy-v-rossii-i-belarusi-moskva-x2-mitap-dlia-razrabotchikov-ili-khakaton-sankt-peterburg-minsk-nizhnii-novgorod-ekaterinburg-novosibirsk-iili-akademgorodok-innopolis-ili-kazan} -Екатерина - организация +Екатерина - организация. Upd. Проведено два онлайн митапа на русском. ### 25.11. Митапы зарубежные: восток США (Нью Йорк, возможно Raleigh), возможно северо-запад (Сиэтл), Китай (Пекин снова, возможно митап для разработчиков или хакатон), Лондон. {#mitapy-zarubezhnye-vostok-ssha-niu-iork-vozmozhno-raleigh-vozmozhno-severo-zapad-sietl-kitai-pekin-snova-vozmozhno-mitap-dlia-razrabotchikov-ili-khakaton-london} @@ -1807,7 +1871,8 @@ Amos Bird, но его решение слишком громоздкое и п ### 25.14. Конференции в России: все HighLoad, возможно CodeFest, DUMP или UWDC, возможно C++ Russia. {#konferentsii-v-rossii-vse-highload-vozmozhno-codefest-dump-ili-uwdc-vozmozhno-c-russia} -Алексей Миловидов и все подготовленные докладчики +Алексей Миловидов и все подготовленные докладчики. +Upd. Есть Saint HighLoad online. ### 25.15. Конференции зарубежные: Percona, DataOps, попытка попасть на более крупные. {#konferentsii-zarubezhnye-percona-dataops-popytka-popast-na-bolee-krupnye} @@ -1848,7 +1913,7 @@ Amos Bird, но его решение слишком громоздкое и п ### 25.22. On-site помощь с ClickHouse компаниям в дни рядом с мероприятиями. {#on-site-pomoshch-s-clickhouse-kompaniiam-v-dni-riadom-s-meropriiatiiami} -[Иван Блинков](https://github.com/blinkov/) - организация +[Иван Блинков](https://github.com/blinkov/) - организация. Проверил мероприятие для турецкой компании. ### 25.23. Новый мерч для ClickHouse. {#novyi-merch-dlia-clickhouse} From 9210a50c5c95f26203377f5b3327b4f735fe2709 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 14 Apr 2020 03:00:48 +0300 Subject: [PATCH 387/752] simple backport script --- utils/simple-backport/backport.sh | 4 ++-- utils/simple-backport/changelog.sh | 4 ++-- utils/simple-backport/format-changelog.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/utils/simple-backport/backport.sh b/utils/simple-backport/backport.sh index ade0b54f24d..7d05f6902d0 100755 --- a/utils/simple-backport/backport.sh +++ b/utils/simple-backport/backport.sh @@ -15,8 +15,8 @@ git log "$merge_base..origin/$branch" --first-parent > "$branch-log.txt" # variant is squashed. Next are some backport message variants. find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ of]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ of]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") "${find_prs[@]}" master-log.txt | sort -rn > master-prs.txt "${find_prs[@]}" "$branch-log.txt" | sort -rn > "$branch-prs.txt" diff --git a/utils/simple-backport/changelog.sh b/utils/simple-backport/changelog.sh index 43a0b2d46da..b2f95f22533 100755 --- a/utils/simple-backport/changelog.sh +++ b/utils/simple-backport/changelog.sh @@ -11,8 +11,8 @@ git log "$from..$to" --first-parent > "changelog-log.txt" # variant is squashed. Next are some backport message variants. find_prs=(sed -n "s/^.*Merge pull request #\([[:digit:]]\+\).*$/\1/p; s/^.*(#\([[:digit:]]\+\))$/\1/p; - s/^.*back[- ]*port[ ]*#\([[:digit:]]\+\).*$/\1/Ip; - s/^.*cherry[- ]*pick[ ]*#\([[:digit:]]\+\).*$/\1/Ip") + s/^.*back[- ]*port[ed of]*#\([[:digit:]]\+\).*$/\1/Ip; + s/^.*cherry[- ]*pick[ed of]*#\([[:digit:]]\+\).*$/\1/Ip") "${find_prs[@]}" "changelog-log.txt" | sort -rn > "changelog-prs.txt" diff --git a/utils/simple-backport/format-changelog.py b/utils/simple-backport/format-changelog.py index 0f379d5529f..d6f97b358ed 100755 --- a/utils/simple-backport/format-changelog.py +++ b/utils/simple-backport/format-changelog.py @@ -9,7 +9,7 @@ import collections import re parser = argparse.ArgumentParser(description='Format changelog for given PRs.') -parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, default=sys.stdin, help='File with PR numbers, one per line.') +parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs='?', default=[sys.stdin], help='File with PR numbers, one per line.') args = parser.parse_args() # This function mirrors the PR description checks in ClickhousePullRequestTrigger. From b4499ca607c9d195ddd6e35846ca79438a2b614a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 14 Apr 2020 05:08:14 +0300 Subject: [PATCH 388/752] Fix ugly typo --- website/templates/index/scalable.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/index/scalable.html b/website/templates/index/scalable.html index fb86091ff06..672a02f202b 100644 --- a/website/templates/index/scalable.html +++ b/website/templates/index/scalable.html @@ -2,7 +2,7 @@
-

Linerarly scalable

+

Linearly scalable

ClickHouse scales well both vertically and horizontally. ClickHouse is easily adaptable to perform either on a cluster with hundreds or thousands of nodes or on a single server or even on a tiny virtual machine. Currently, there are installations with more multiple trillion rows or hundreds of terabytes of data per single node.

There are many ClickHouse clusters consisting of multiple hundred nodes, including few clusters of Yandex Metrica, while the largest known ClickHouse cluster is well over a thousand nodes. From 8e10a5504a5a98df1f8b4870619183e6e2905a56 Mon Sep 17 00:00:00 2001 From: zvrr Date: Tue, 14 Apr 2020 12:21:46 +0800 Subject: [PATCH 389/752] Update settings.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update "动物管理员" to zookeeper --- .../operations/server_configuration_parameters/settings.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/zh/operations/server_configuration_parameters/settings.md b/docs/zh/operations/server_configuration_parameters/settings.md index b78f8173741..fefeac9fc57 100644 --- a/docs/zh/operations/server_configuration_parameters/settings.md +++ b/docs/zh/operations/server_configuration_parameters/settings.md @@ -774,11 +774,11 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv users.xml ``` -## 动物园管理员 {#server-settings_zookeeper} +## zookeeper {#server-settings_zookeeper} -包含允许ClickHouse与 [动物园管理员](http://zookeeper.apache.org/) 集群。 +包含允许ClickHouse与 [zookpeer](http://zookeeper.apache.org/) 集群。 -ClickHouse使用ZooKeeper在使用复制表时存储副本的元数据。 如果未使用复制的表,则可以省略此部分参数。 +ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复制的表,则可以省略此部分参数。 本节包含以下参数: From b50d622e73e89fd16b4bb8da5f9d2587499ed147 Mon Sep 17 00:00:00 2001 From: zvrr Date: Tue, 14 Apr 2020 12:30:04 +0800 Subject: [PATCH 390/752] Update settings.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update some setting to 配置 --- .../server_configuration_parameters/settings.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/zh/operations/server_configuration_parameters/settings.md b/docs/zh/operations/server_configuration_parameters/settings.md index b78f8173741..2cffc7e5d2c 100644 --- a/docs/zh/operations/server_configuration_parameters/settings.md +++ b/docs/zh/operations/server_configuration_parameters/settings.md @@ -5,13 +5,13 @@ toc_priority: 57 toc_title: "\u670D\u52A1\u5668\u8BBE\u7F6E" --- -# 服务器设置 {#server-settings} +# 服务器配置 {#server-settings} ## builtin\_dictionaries\_reload\_interval {#builtin-dictionaries-reload-interval} -重新加载内置字典之前的时间间隔(以秒为单位)。 +重新加载内置字典的间隔时间(以秒为单位)。 -ClickHouse每x秒重新加载内置字典。 这使得编辑字典成为可能 “on the fly” 无需重新启动服务器。 +ClickHouse每x秒重新加载内置字典。 这使得编辑字典 “on the fly”,而无需重新启动服务器。 默认值:3600. @@ -23,7 +23,7 @@ ClickHouse每x秒重新加载内置字典。 这使得编辑字典成为可能 ## 压缩 {#server-settings-compression} -数据压缩设置 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)-发动机表。 +数据压缩配置 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)-引擎表。 !!! warning "警告" 如果您刚开始使用ClickHouse,请不要使用它。 @@ -41,7 +41,7 @@ ClickHouse每x秒重新加载内置字典。 这使得编辑字典成为可能 ``` -`` 字段: +`` 参数: - `min_part_size` – The minimum size of a data part. - `min_part_size_ratio` – The ratio of the data part size to the table size. @@ -82,9 +82,9 @@ ClickHouse每x秒重新加载内置字典。 这使得编辑字典成为可能 ## default\_profile {#default-profile} -默认设置配置文件。 +默认配置文件。 -设置配置文件位于参数中指定的文件中 `user_config`. +配置文件位于`user_config`参数指定的文件中 . **示例** From c5c424b4470ee3f0d03f6780729876ed31f97f22 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 10 Apr 2020 17:26:31 +0300 Subject: [PATCH 391/752] add junit_to_html_util --- utils/junit_to_html/junit-noframes.xsl | 398 +++++++++++++++++++++++++ utils/junit_to_html/junit_to_html | 25 ++ 2 files changed, 423 insertions(+) create mode 100644 utils/junit_to_html/junit-noframes.xsl create mode 100755 utils/junit_to_html/junit_to_html diff --git a/utils/junit_to_html/junit-noframes.xsl b/utils/junit_to_html/junit-noframes.xsl new file mode 100644 index 00000000000..a8df085f719 --- /dev/null +++ b/utils/junit_to_html/junit-noframes.xsl @@ -0,0 +1,398 @@ + + + + + + + + + + Test Results + + + + + + + + +


+ + + + + + + + + + + + + + + + + +

+
+
+ + + + + + + + +
+

+ Back to top + + +

Summary

+ + + + + + + + + + + + + + + + + Failure + Error + + + + + + + + +
TestsFailuresErrorsSuccess rateTime
+ + + + + + + +
+ + + + +
+ Note: failures are anticipated and checked for with assertions while errors are unanticipated. +
+ + + + + +

Test Results

+
+
+ + + Name + Tests + Errors + Failures + Time(s) + + + + + + Name + Tests + Errors + Failures + Time(s) + Time Stamp + Host + + + + + + Name + Status + Type + Time(s) + + + + + + + + + Failure + Error + + + + + + + + + + + + + + + + + + + + + Error + Failure + TableRowColor + + + + + + Failure + + + + Error + + + + Success + + + + + + + + + + + + +

+ + + + + +
+ + + +

+ + + + + +
+ + + + N/A + + + + + + +

+ at line + + + , column + + +
+
+
+ + + + + + + + + + 32 + + + + + + + + + + + + +
+ + + +
+ + +
+ + + +
+ + + +
+
+ + + + + + + + + diff --git a/utils/junit_to_html/junit_to_html b/utils/junit_to_html/junit_to_html new file mode 100755 index 00000000000..440541c3db6 --- /dev/null +++ b/utils/junit_to_html/junit_to_html @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import sys +import lxml.etree as etree + +def _convert_junit_to_html(junit_path, html_path): + with open(os.path.join(os.path.dirname(__file__), "junit-noframes.xsl")) as xslt_file: + junit_to_html_xslt = etree.parse(xslt_file) + with open(junit_path) as junit_file: + junit_xml = etree.parse(junit_file) + transform = etree.XSLT(junit_to_html_xslt) + print transform(junit_xml) + html = etree.tostring(transform(junit_xml), encoding="utf-8") + html_dir = os.path.dirname(html_path) + if not os.path.exists(html_dir): + os.makedirs(html_dir) + with open(html_path, "w") as html_file: + html_file.write(html) + +if __name__ == "__main__": + if len(sys.argv) < 3: + raise "Insufficient arguments: junit.xml result.html", level + junit_path, html_path = sys.argv[1] , sys.argv[2] + _convert_junit_to_html(junit_path, html_path) From 26a1b3163a3c14f4fc4aead4215716a3d12a0420 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 14 Apr 2020 10:11:05 +0300 Subject: [PATCH 392/752] better colors --- utils/junit_to_html/junit-noframes.xsl | 11 ++++++----- utils/junit_to_html/junit_to_html | 1 - 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/junit_to_html/junit-noframes.xsl b/utils/junit_to_html/junit-noframes.xsl index a8df085f719..4532ca87c60 100644 --- a/utils/junit_to_html/junit-noframes.xsl +++ b/utils/junit_to_html/junit-noframes.xsl @@ -34,6 +34,7 @@ under the License. Test Results - - - - - - - - - - - - - - - - - - - - - - - -
- - - -

Performance comparison of analytical DBMS

-
- -
- -

Most results are for single server setup. The server is: two socket Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz; 128 GiB RAM; md RAID-5 on 8 6TB SATA HDD; ext4.

- -

See as well hardware benchmark results page.

- -

Some additional results (marked as x2, x3, x6) are for clustered setup for comparison. These results are contributed from independent teams and hardware specification may differ.

- -

Disclaimer: some results are significantly outdated. Results for MonetDB was obtained at 2013 for version v11.15.11 (Feb2013-SP3). Results for MemSQL was obtained at 2015 for version 3.2. Results for Vertica are from 2015 for version 7.1.1.

- -
- -
- -
- -

Relative query processing time (lower is better):

-
- -
-

Full results:

- -
- -
- - - - - diff --git a/website/benchmark/benchmark.js b/website/benchmark/benchmark.js new file mode 100644 index 00000000000..ce60ca66941 --- /dev/null +++ b/website/benchmark/benchmark.js @@ -0,0 +1,411 @@ +var data_sizes = + [ + {id: "10000000", name: "10 mln."}, + {id: "100000000", name: "100 mln."}, + {id: "1000000000", name: "1 bn."} + ]; + + +var systems = []; +var systems_uniq = {}; +for (r in results) { + if (systems_uniq[results[r].system]) + continue; + systems_uniq[results[r].system] = 1; + systems.push(results[r].system); +} + +var runs = ["first (cold cache)", "second", "third"]; +var current_runs = ['0', '1']; + +try { + var state = JSON.parse(decodeURIComponent(window.location.hash.substring(1))); + current_data_size = state[0]; + current_systems = state[1]; + current_runs = state[2]; +} catch (e) { +} + +function update_hash() { + window.location.hash = JSON.stringify([current_data_size, current_systems, current_runs]); +} + + +function generate_selectors(elem) { + var html = ''; + if (current_data_size) { + html += ''; + html += '
Compare'; + + var available_results = results; + + if (current_data_size) { + available_results = results.filter(function (run) { + return run.data_size == current_data_size; + }); + } + var available_systems_for_current_data_size = available_results.map(function (run) { + return run.system; + }); + + for (var i = 0; i < systems.length; i++) { + var selected = current_systems.indexOf(systems[i]) != -1; + var available = available_systems_for_current_data_size.indexOf(systems[i]) != -1; + + html += ''; + } + + html += '
Dataset size'; + + for (var i = 0; i < data_sizes.length; i++) { + html += ''; + } + } + + html += '
Run'; + + for (var i = 0; i < runs.length; i++) { + html += ''; + } + + html += '
'; + + elem.html(html); + + $('#systems_selector button:not(.disabled)').click(function (event) { + var target = $(event.target || event.srcElement); + + if (target.hasClass("active") && current_systems.length == 1) { + return; + } + + target.toggleClass("active"); + + current_systems = $.map($('#systems_selector button'), function (elem) { + return $(elem).hasClass("active") ? $(elem).html() : null + }).filter(function (x) { + return x; + }); + + update_hash(); + generate_selectors(elem); + generate_comparison_table(); + generate_diagram(); + }); + + if (current_data_size) { + $('#data_size_selector button').click(function (event) { + var target = $(event.target || event.srcElement); + + current_data_size = target.attr("data-size-id"); + + update_hash(); + generate_selectors(elem); + generate_comparison_table(); + generate_diagram(); + }); + } + + $('#runs_selector button').click(function (event) { + var target = $(event.target || event.srcElement); + + if (target.hasClass("active") && current_runs.length == 1) { + return; + } + + target.toggleClass("active"); + + current_runs = $.map($('#runs_selector button'), function (elem) { + return $(elem).hasClass("active") ? $(elem).attr("data-run-id") : null + }).filter(function (x) { + return x; + }); + + update_hash(); + generate_selectors(elem); + generate_comparison_table(); + generate_diagram(); + }); +} + +function format_number_cell(value, ratio) { + var html = ""; + + var redness = (ratio - 1) / ratio; + var blackness = ratio < 10 ? 0 : ((ratio - 10) / ratio / 2); + + var color = !value ? "#FFF" : + ratio == 1 ? + ("rgba(0, 255, 0, 1)") : + ("rgba(" + ~~(255 * (1 - blackness)) + ", 0, 0, " + redness + ")"); + + html += ""; + html += value ? + (ratio == 1 ? "" : ("×" + ratio.toFixed(2))) + " (" + value.toFixed(3) + " s.)" : + "—"; + html += ""; + + return html; +} + +/* Ratio of execution time to best execution time: + * system index -> run index -> query index -> ratio. + */ +var ratios = []; + + +function generate_comparison_table() { + ratios = []; + + var filtered_results = results; + if (current_data_size) { + filtered_results = filtered_results.filter(function (x) { + return x.data_size == current_data_size; + }); + } + filtered_results = filtered_results.filter(function (x) { + return current_systems.indexOf(x.system) != -1; + }); + + var html = ""; + + html += ""; + html += ""; + html += ""; + html += ""; + for (var j = 0; j < filtered_results.length; j++) { + html += ""; + } + html += ""; + + for (var i = 0; i < queries.length; i++) { + html += ""; + html += ""; + + html += ""; + + // Max and min execution time per system, for each of three runs + var minimums = [0, 0, 0], maximums = [0, 0, 0]; + + for (var j = 0; j < filtered_results.length; j++) { + for (var current_run_idx = 0; current_run_idx < current_runs.length; current_run_idx++) { + var k = current_runs[current_run_idx]; + var value = filtered_results[j].result[i][k]; + + if (value && (!minimums[k] || value < minimums[k])) { + minimums[k] = value; + + // Ignore below 10ms + if (minimums[k] < 0.01) { + minimums[k] = 0.01; + } + } + + if (value > maximums[k]) { + maximums[k] = value; + } + } + } + + for (var j = 0; j < filtered_results.length; j++) { + if (!ratios[j]) { + ratios[j] = []; + } + + for (var current_run_idx = 0; current_run_idx < current_runs.length; current_run_idx++) { + var k = current_runs[current_run_idx]; + var value = filtered_results[j].result[i][k]; + + var ratio = value / minimums[k]; + + ratios[j][k] = ratios[j][k] || []; + + if (ratio && ratio <= 1) { + ratio = 1; + } + + ratios[j][k].push(ratio); + + html += format_number_cell(value, ratio); + } + } + html += ""; + } + + if (current_systems.length) { + html += ""; + html += ""; + html += ""; + + for (var j = 0; j < filtered_results.length; j++) { + for (var k = 0; k < current_runs.length; k++) { + html += ""; + } + } + + html += ""; + html += ""; + + for (var j = 0; j < filtered_results.length; j++) { + html += ""; + } + + html += ""; + } + + html += "
Query" + filtered_results[j].system + + (filtered_results[j].version ? " (" + filtered_results[j].version + ")" : "") + "
" + queries[i].query + "
Geometric mean of ratios
"; + + $('#comparison_table').html(html); + + for (var i = 0; i < queries.length; i++) { + $('#query_checkbox' + i).click(function () { + calculate_totals(); + generate_diagram(); + }); + } + $('#query_checkbox_toggler').click(function () { + for (var i = 0; i < queries.length; i++) { + var item = $('#query_checkbox' + i); + item.prop("checked", !item.prop("checked")); + } + }); + + calculate_totals(); +} + + +function calculate_totals() { + if (!current_systems.length) return; + var filtered_results = results; + if (current_data_size) { + filtered_results = filtered_results.filter(function (x) { + return x.data_size == current_data_size; + }); + } + + filtered_results = filtered_results.filter(function (x) { + return current_systems.indexOf(x.system) != -1; + }); + + var total_ratios = []; + + for (var j = 0; j < filtered_results.length; j++) { + for (var current_run_idx = 0; current_run_idx < current_runs.length; current_run_idx++) { + var k = current_runs[current_run_idx]; + + var current_ratios = ratios[j][k].filter( + function (x, i) { + return x && $("#query_checkbox" + i).is(':checked'); + } + ); + + var ratio = Math.pow( + current_ratios.reduce( + function (acc, cur) { + return acc * cur; + }, + 1), + 1 / current_ratios.length); + + total_ratios[j] = total_ratios[j] || 1; + total_ratios[j] *= ratio; + + $("#totals" + j + "_" + k).attr("data-ratio", ratio).html("x" + ratio.toFixed(2)); + } + } + + for (var j = 0; j < filtered_results.length; j++) { + var total_ratio = Math.pow(total_ratios[j], 1 / current_runs.length); + $("#absolute_totals" + j).attr("data-ratio", total_ratio).html("x" + total_ratio.toFixed(2)); + } +} + + +function generate_diagram() { + var html = ""; + var filtered_results = results; + if (current_data_size) { + filtered_results = filtered_results.filter(function (x) { + return x.data_size == current_data_size && current_systems.indexOf(x.system) != -1; + }); + } + filtered_results = filtered_results.filter(function (x) { + return current_systems.indexOf(x.system) != -1; + }); + + var max_ratio = 1; + var min_ratio = 0; + + var max_total_ratio = 1; + var min_total_ratio = 0; + + for (var j = 0; j < filtered_results.length; j++) { + for (var current_run_idx = 0; current_run_idx < current_runs.length; current_run_idx++) { + var k = current_runs[current_run_idx]; + var ratio = +$("#totals" + j + "_" + k).attr("data-ratio"); + + if (ratio > max_ratio) { + max_ratio = ratio; + } + + if (!min_ratio || ratio < min_ratio) { + min_ratio = ratio; + } + } + + var total_ratio = +$("#absolute_totals" + j).attr("data-ratio"); + + if (total_ratio > max_total_ratio) { + max_total_ratio = total_ratio; + } + + if (!min_total_ratio || total_ratio < min_total_ratio) { + min_total_ratio = total_ratio; + } + } + + html += ""; + + for (var j = 0; j < filtered_results.length; j++) { + var total_ratio = +$("#absolute_totals" + j).attr("data-ratio"); + + html += ""; + html += ""; + + html += ""; + + html += ""; + html += ""; + } + + html += "
" + filtered_results[j].system + "" + + (filtered_results[j].version ? "
(" + filtered_results[j].version.replace(/ /g, ' ') + ")" : "") + "
"; + + for (var current_run_idx = 0; current_run_idx < current_runs.length; current_run_idx++) { + var k = current_runs[current_run_idx]; + + var ratio = +$("#totals" + j + "_" + k).attr("data-ratio"); + var percents = (ratio * 100 / max_ratio).toFixed(2); + + if (!ratio) { + ratio = +$("#absolute_totals" + j).attr("data-ratio"); + percents = (ratio * 100 / max_total_ratio).toFixed(2); + } + + html += '
 
'; + + } + + html += "
" + (total_ratio / min_total_ratio).toFixed(2) + "
"; + + $('#diagram').html(html); +} + +generate_selectors($('#selectors')); +generate_comparison_table(); +generate_diagram(); diff --git a/website/benchmark/dbms/index.html b/website/benchmark/dbms/index.html new file mode 100644 index 00000000000..58c5f895f6a --- /dev/null +++ b/website/benchmark/dbms/index.html @@ -0,0 +1,55 @@ +{% extends 'templates/base.html' %} + +{% set title = 'Performance comparison of database management systems' %} +{% set extra_js = ['queries.js', 'results.js', '../benchmark.js'] %} +{% set no_footer = True %} + +{% block content %} +
+ +
+
+ + ClickHouse + +

Performance comparison of analytical DBMS

+
+
+ +
+
+
+ +
+
+

Relative query processing time (lower is better)

+
+
+
+ +
+
+

Full results

+
+
+
+ +
+
+

Comments

+

Most results are for single server setup with the following configuration: two socket Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz; 128 GiB RAM; md RAID-5 on 8 6TB SATA HDD; ext4.

+ +

Some additional results (marked as x2, x3, x6) are for clustered setup for comparison. These results are contributed from independent teams and hardware specification may differ.

+ +

+ Disclaimer! Some results are significantly outdated: +

    +
  • Results for MonetDB were obtained in 2013 for version v11.15.11 (Feb2013-SP3)
  • +
  • Results for MemSQL were obtained in 2015 for version 3.2.
  • +
  • Results for Vertica were obtained in 2015 for version 7.1.1.
  • +
+

+

See also: performance comparison of ClickHouse on various hardware.

+
+
+{% endblock %} diff --git a/website/benchmark/dbms/queries.js b/website/benchmark/dbms/queries.js new file mode 100644 index 00000000000..c92353ab0f2 --- /dev/null +++ b/website/benchmark/dbms/queries.js @@ -0,0 +1,179 @@ +var current_data_size = 1000000000; + +var current_systems = ["ClickHouse", "Vertica", "Greenplum"]; + +var queries = + [ + { + "query": "SELECT count() FROM hits", + "comment": "", + }, + { + "query": "SELECT count() FROM hits WHERE AdvEngineID != 0", + "comment": "", + }, + { + "query": "SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM hits", + "comment": "", + }, + { + "query": "SELECT sum(UserID) FROM hits", + "comment": "", + }, + { + "query": "SELECT uniq(UserID) FROM hits", + "comment": "", + }, + { + "query": "SELECT uniq(SearchPhrase) FROM hits", + "comment": "", + }, + { + "query": "SELECT min(EventDate), max(EventDate) FROM hits", + "comment": "", + }, + { + "query": "SELECT AdvEngineID, count() FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC", + "comment": "", + }, + { + "query": "SELECT RegionID, uniq(UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT MobilePhoneModel, uniq(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, count() AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, uniq(UserID) AS u FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchEngineID, SearchPhrase, count() AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, count() FROM hits GROUP BY UserID ORDER BY count() DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, SearchPhrase, count() FROM hits GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, SearchPhrase, count() FROM hits GROUP BY UserID, SearchPhrase LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID FROM hits WHERE UserID = 12345678901234567890", + "comment": "", + }, + { + "query": "SELECT count() FROM hits WHERE URL LIKE '%metrika%'", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, any(URL), count() AS c FROM hits WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM hits WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT * FROM hits WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10", + "comment": "", + }, + { + "query": "SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM hits WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25", + "comment": "", + }, + { + "query": "SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM hits WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25", + "comment": "", + }, + { + "query": "SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits", + "comment": "", + }, + { + "query": "SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT URL, count() AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT 1, URL, count() AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT URL, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT Title, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT URL, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000", + "comment": "", + }, + { + "query": "SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000", + "comment": "", + }, + { + "query": "SELECT URLHash, EventDate, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://yandex.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100", + "comment": "", + }, + { + "query": "SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://yandex.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;", + "comment": "", + }, + { + "query": "SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;", + "comment": "", + } + ] diff --git a/website/benchmark/dbms/results/001_clickhouse_19_1_6.json b/website/benchmark/dbms/results/001_clickhouse_19_1_6.json new file mode 100644 index 00000000000..7b3f68d0b16 --- /dev/null +++ b/website/benchmark/dbms/results/001_clickhouse_19_1_6.json @@ -0,0 +1,163 @@ +[ + { + "system": "ClickHouse", + "version": "19.1.6", + "data_size": 10000000, + "time": "2016-06-01 03:00:00", + "comments": "FORMAT Null", + "result": + [ + [0.092, 0.012, 0.006], + [0.087, 0.006, 0.006], + [0.098, 0.010, 0.011], + [0.134, 0.016, 0.010], + [0.176, 0.037, 0.040], + [0.233, 0.057, 0.058], + [0.066, 0.006, 0.007], + [0.071, 0.006, 0.014], + [0.259, 0.131, 0.128], + [0.266, 0.094, 0.078], + [0.217, 0.033, 0.032], + [0.211, 0.040, 0.034], + [0.249, 0.102, 0.102], + [0.343, 0.122, 0.107], + [0.302, 0.116, 0.122], + [0.209, 0.090, 0.083], + [0.415, 0.222, 0.221], + [0.287, 0.112, 0.115], + [0.562, 0.323, 0.351], + [0.127, 0.009, 0.009], + [0.410, 0.068, 0.073], + [0.494, 0.108, 0.083], + [0.911, 0.197, 0.193], + [2.426, 0.303, 0.286], + [0.311, 0.041, 0.046], + [0.203, 0.037, 0.040], + [0.262, 0.042, 0.052], + [0.365, 0.067, 0.066], + [0.735, 0.158, 0.177], + [0.549, 0.344, 0.521], + [0.357, 0.098, 0.103], + [0.509, 0.136, 0.130], + [0.775, 0.531, 0.532], + [0.641, 0.342, 0.320], + [0.600, 0.351, 0.352], + [0.295, 0.122, 0.134], + [0.291, 0.118, 0.122], + [0.203, 0.053, 0.057], + [0.234, 0.041, 0.037], + [0.502, 0.223, 0.234], + [0.275, 0.016, 0.018], + [0.222, 0.014, 0.017], + [0.182, 0.016, 0.014] + ] + }, + + { + "system": "ClickHouse", + "version": "19.1.6", + "data_size": 100000000, + "time": "2016-06-01 03:00:00", + "comments": "FORMAT Null", + "result": + [ + [0.133, 0.016, 0.016], + [0.086, 0.012, 0.037], + [0.254, 0.041, 0.037], + [0.478, 0.047, 0.045], + [0.655, 0.104, 0.105], + [0.581, 0.228, 0.297], + [0.105, 0.048, 0.022], + [0.076, 0.012, 0.015], + [0.892, 0.509, 0.487], + [1.077, 0.542, 0.589], + [0.529, 0.171, 0.167], + [0.541, 0.188, 0.182], + [0.975, 0.659, 0.603], + [1.479, 0.812, 0.770], + [1.082, 0.734, 0.778], + [0.952, 0.728, 0.729], + [2.212, 1.756, 1.952], + [1.230, 1.033, 0.907], + [4.613, 4.030, 3.955], + [0.477, 0.074, 0.037], + [1.648, 0.532, 0.555], + [2.002, 0.645, 0.651], + [3.676, 1.479, 1.487], + [7.792, 0.759, 0.788], + [1.139, 0.239, 0.215], + [0.522, 0.220, 0.215], + [1.018, 0.230, 0.238], + [1.768, 0.602, 0.603], + [1.818, 0.899, 0.916], + [3.301, 3.174, 3.283], + [1.313, 0.639, 0.631], + [2.136, 0.938, 0.975], + [5.894, 5.412, 5.467], + [3.359, 3.053, 3.061], + [3.355, 2.999, 3.004], + [1.224, 1.103, 1.063], + [0.185, 0.086, 0.092], + [0.138, 0.038, 0.038], + [0.155, 0.031, 0.031], + [0.390, 0.183, 0.162], + [0.203, 0.013, 0.013], + [0.178, 0.011, 0.011], + [0.128, 0.007, 0.007] + ] + }, + + { + "system": "ClickHouse", + "version": "19.1.6", + "data_size": 1000000000, + "time": "2016-06-01 03:00:00", + "comments": "FORMAT Null", + "result": + [ + [0.747, 0.090, 0.075], + [0.197, 0.074, 0.061], + [1.370, 0.419, 0.334], + [3.911, 0.390, 0.365], + [4.096, 0.636, 0.638], + [4.739, 1.626, 1.542], + [0.270, 0.172, 0.188], + [0.184, 0.067, 0.061], + [6.182, 3.016, 2.913], + [7.068, 3.329, 3.409], + [4.332, 1.128, 1.296], + [4.698, 1.351, 1.328], + [7.721, 5.721, 5.802], + [11.683, 7.687, 7.629], + [9.144, 6.987, 6.974], + [8.069, 6.115, 6.386], + [22.657, 20.622, 20.842], + [11.727, 10.574, 10.708], + [52.193, 51.836, 53.738], + [4.179, 0.378, 0.279], + [24.400, 4.712, 4.591], + [29.045, 5.533, 5.502], + [56.733, 13.958, 13.791], + [86.314, 8.349, 7.448], + [11.360, 1.762, 1.781], + [4.890, 1.568, 1.508], + [11.667, 1.962, 1.889], + [24.491, 5.554, 5.556], + [28.096, 8.528, 8.481], + [24.084, 25.500, 26.527], + [10.820, 5.387, 5.406], + [25.187, 8.879, 8.852], + [70.218, 67.707, 68.059], + [42.018, 36.039, 36.391], + [43.128, 35.813, 36.154], + [9.646, 8.490, 8.418], + [0.639, 0.250, 0.289], + [0.350, 0.064, 0.066], + [0.443, 0.106, 0.100], + [0.923, 0.461, 0.460], + [0.479, 0.030, 0.029], + [0.372, 0.025, 0.023], + [0.224, 0.012, 0.013] + ] + } +] diff --git a/website/benchmark/dbms/results/002_vertica_7_1_1.json b/website/benchmark/dbms/results/002_vertica_7_1_1.json new file mode 100644 index 00000000000..2b0c340747b --- /dev/null +++ b/website/benchmark/dbms/results/002_vertica_7_1_1.json @@ -0,0 +1,163 @@ +[ + { + "system": "Vertica", + "version": "7.1.1", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [0.015899, 0.012807, 0.012184], + [0.035133, 0.021397, 0.018946], + [0.060938, 0.034013, 0.037085], + [0.051492, 0.021746, 0.022661], + [0.150695, 0.144041, 0.143313], + [0.412724, 0.276505, 0.27558], + [0.039736, 0.021215, 0.025029], + [0.049819, 0.045784, 0.049303], + [0.248834, 0.222963, 0.217386], + [0.43366, 0.437333, 0.4883], + [0.106483, 0.094236, 0.092362], + [0.122424, 0.10004, 0.100646], + [0.952346, 0.963712, 0.994094], + [0.64299, 0.647605, 0.644699], + [0.606994, 0.552117, 0.563657], + [0.221643, 0.205149, 0.216158], + [0.789877, 0.848421, 0.869198], + [0.439601, 0.438257, 0.424207], + [1.51968, 1.351311, 1.495538], + [0.038791, 0.03504, 0.050796], + [0.847444, 0.412624, 0.413898], + [0.491446, 0.474577, 0.484147], + [1.693912, 1.166251, 1.236441], + [1.905181, 1.257361, 1.437238], + [0.36171, 0.263338, 0.28958], + [0.46795, 0.515716, 0.443451], + [0.28467, 0.248823, 0.251787], + [2.326452, 2.152684, 2.073438], + [2.953462, 2.324174, 2.845123], + [0.631466, 0.822281, 0.873889], + [0.446697, 0.320824, 0.304922], + [0.489312, 0.417575, 0.440902], + [2.178985, 2.094694, 2.164159], + [1.375479, 1.331646, 1.360907], + [1.347677, 1.354772, 1.344533], + [0.429637, 0.43564, 0.436279], + [3.297413, 3.177341, 3.194704], + [0.835327, 0.743157, 0.746247], + [0.248143, 0.20795, 0.218004], + [1.495476, 1.322633, 1.374602], + [0.187092, 0.12099, 0.127517], + [0.148605, 0.109589, 0.107272], + [0.148173, 0.12809, 0.133435] + ] + }, + + { + "system": "Vertica", + "version": "7.1.1", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [0.044914, 0.033471, 0.029564], + [0.124784, 0.044458, 0.04813], + [0.253575, 0.167392, 0.166981], + [0.267045, 0.060595, 0.059291], + [0.901021, 0.881088, 1.032132], + [1.284296, 0.991411, 1.011576], + [0.149862, 0.068061, 0.067217], + [0.148711, 0.083089, 0.078208], + [1.847624, 1.836724, 1.892968], + [4.278373, 4.313527, 4.564084], + [0.358281, 0.266623, 0.288294], + [0.42072, 0.38109, 0.371086], + [5.294134, 4.280055, 4.179055], + [8.849077, 8.744801, 8.750143], + [4.469753, 4.203493, 4.319043], + [1.542069, 1.506466, 1.605814], + [8.230461, 8.129543, 8.521089], + [5.063301, 5.177715, 4.989504], + [21.097494, 21.113905, 20.863796], + [0.238975, 0.163524, 0.162261], + [3.681673, 3.664944, 3.738555], + [3.996372, 3.875857, 3.897112], + [5.086255, 5.00034, 5.019747], + [5.105649, 4.51027, 4.780023], + [6.028593, 6.027804, 5.998026], + [4.324348, 4.357931, 4.451637], + [6.011405, 6.01204, 6.006612], + [13.744667, 14.174568, 14.053413], + [9.533647, 9.626582, 9.551671], + [2.652615, 2.689042, 2.617271], + [1.881931, 1.808578, 1.80198], + [3.926782, 3.813766, 3.806481], + [19.214651, 19.52602, 19.544008], + [27.55101, 27.641466, 28.128856], + [27.687275, 27.525594, 27.338331], + [4.989802, 5.243158, 4.861738], + [2.471475, 2.239634, 2.270449], + [0.814507, 0.733751, 0.760994], + [0.280513, 0.230994, 0.231817], + [1.479762, 1.293106, 1.277735], + [0.163974, 0.12314, 0.127756], + [0.148318, 0.112932, 0.107095], + [0.069868, 0.071664, 0.065618] + ] + }, + + { + "system": "Vertica", + "version": "7.1.1", + "data_size": 1000000000, + "time": "", + "comments": "", + "result": + [ + [0.09516, 0.094485, 0.092024], + [0.816135, 0.446083, 0.453634], + [2.11051, 1.773246, 1.791243], + [2.120462, 1.033154, 1.020443], + [8.149946, 7.740829, 7.659704], + [26.119083, 25.611287, 25.675649], + [1.035186, 0.818121, 0.822225], + [0.816869, 0.506568, 0.51574], + [13.108386, 13.20647, 13.556902], + [40.935852, 39.508237, 39.034314], + [3.183196, 2.862235, 2.86959], + [3.967054, 3.658079, 3.557328], + [30.73868, 30.722098, 31.301074], + [76.889072, 76.027064, 77.056729], + [32.033544, 31.866097, 32.772241], + [11.350141, 11.177469, 11.064054], + [74.059265, 73.061888, 73.041769], + [52.895306, 52.588376, 52.671085], + [186.891072, 184.998125, 185.913578], + [2.208113, 1.871682, 1.87381], + [21.705589, 21.595201, 21.415026], + [21.080841, 20.856408, 20.696303], + [45.916174, 45.351723, 45.433121], + [47.051243, 35.723046, 35.694351], + [53.209629, 53.406901, 53.069656], + [52.737858, 52.784361, 52.617806], + [53.430247, 53.206678, 53.309617], + [372.194119, 371.545597, 370.507236], + [76.594315, 76.464039, 76.319749], + [24.274602, 24.263616, 24.198579], + [14.400851, 13.927733, 13.747829], + [30.679117, 28.09498, 27.203538], + [210.606242, 214.108745, 214.521569], + [289.044749, 291.983512, 289.419234], + [289.46641, 290.132895, 289.4922], + [43.7288, 43.325352, 43.19419], + [5.028182, 4.798986, 4.728277], + [1.145844, 1.035948, 1.017571], + [0.398542, 0.3224, 0.324956], + [5.497337, 5.271507, 5.329618], + [0.210421, 0.162334, 0.161671], + [0.201661, 0.140586, 0.135319], + [0.178466, 0.162246, 0.159834] + ] + } +] diff --git a/website/benchmark/dbms/results/003_vertica_7_0_0_x3.json b/website/benchmark/dbms/results/003_vertica_7_0_0_x3.json new file mode 100644 index 00000000000..8f5ddaf9e73 --- /dev/null +++ b/website/benchmark/dbms/results/003_vertica_7_0_0_x3.json @@ -0,0 +1,109 @@ +[ + { + "system": "Vertica (x3)", + "version": "7.0.0-0", + "data_size": 1000000000, + "time": "", + "comments": "", + "result": + [ + [3.328, 0.397, 0.382], + [2.889, 0.24, 0.219], + [5.667, 0.781, 0.79], + [5.163, 0.58, 0.545], + [10.743, 7.414, 7.554], + [44.021, 43.629, 42.651], + [3.045, 0.416, 0.419], + [2.987, 0.366, 0.387], + [16.039, 13.626, 13.646], + [23.93, 21.818, 21.376], + [7.084, 2.683, 2.763], + [9.39, 3.176, 3.085], + [41.674, 42.039, 42.239], + [138.331, 136.452, 137.238], + [68.416, 67.551, 67.478], + [12.463, 10.125, 9.974], + [188.023, 186.817, 186.116], + [172.149, 170.75, 171.178], + [489.181, 488.154, 489.749], + [5.466, 0.916, 0.891], + [24.91, 18.009, 17.985], + [25.49, 18.525, 18.803], + [39.856, 17.993, 18.436], + [128.041, 9.876, 9.599], + [31.162, 30.831, 30.708], + [33.871, 32.901, 33.198], + [31.26, 30.795, 30.982], + [97.13, 93.233, 93.352], + [123.941, 123.625, 123.509], + [10.032, 9.418, 9.431], + [19.05, 9.184, 8.907], + [22.433, 19.726, 19.764], + [171.142, 162.149, 162.224], + [286.116, 283.672, 282.848], + [281.98, 278.234, 280.236], + [30.897, 30.486, 30.338], + [11.785, 11.42, 11.262], + [2.188, 1.739, 1.782], + [1.754, 1.582, 1.852], + [25.553, 24.89, 24.753], + [0.853, 0.324, 0.304], + [0.656, 0.368, 0.332], + [0.436, 0.356, 0.31] + ] + }, + + { + "system": "Vertica (x6)", + "version": "7.0.0-0", + "data_size": 1000000000, + "time": "", + "comments": "", + "result": + [ + [2.203, 0.392, 0.421], + [2.201, 0.336, 0.36], + [3.669, 0.704, 0.624], + [3.512, 0.516, 0.501], + [7.482, 5.696, 5.812], + [43.298, 48.75, 42.419], + [2.285, 0.411, 0.396], + [2.111, 0.454, 0.447], + [12.751, 10.454, 10.447], + [16.751, 15.247, 16.011], + [4.789, 2.06, 2.023], + [6.015, 2.207, 2.273], + [42.854, 41.299, 42.517], + [50.338, 48.5, 48.569], + [27.033, 25.38, 25.336], + [8.766, 6.73, 6.584], + [63.644, 64.514, 63.864], + [56.751, 56.018, 56.263], + [151.68, 149.595, 150.832], + [3.249, 0.701, 0.639], + [25.006, 18.019, 17.994], + [27.427, 18.74, 18.759], + [23.331, 9.915, 10.314], + [92.277, 6.708, 6.496], + [16.708, 15.827, 16.021], + [18.68, 18.99, 18.492], + [16.39, 16, 16.023], + [50.455, 48.204, 49.54], + [64.871, 64.565, 63.996], + [5.638, 5.103, 5.086], + [14.41, 6.785, 6.714], + [22.602, 14.089, 13.952], + [106.629, 105.692, 103.34], + [301.935, 293.388, 295.326], + [299.916, 290.717, 297.424], + [21.476, 21.447, 21.519], + [7.853, 7.158, 7.353], + [2.087, 1.573, 1.543], + [2.536, 1.788, 1.912], + [11.355, 10.46, 9.62], + [1.008, 0.401, 0.434], + [1.358, 0.373, 0.422], + [0.841, 0.438, 0.442] + ] + } +] diff --git a/website/benchmark/dbms/results/004_infinidb_3_6_23.json b/website/benchmark/dbms/results/004_infinidb_3_6_23.json new file mode 100644 index 00000000000..0a446564af8 --- /dev/null +++ b/website/benchmark/dbms/results/004_infinidb_3_6_23.json @@ -0,0 +1,109 @@ +[ + { + "system": "InfiniDB", + "version": "Enterprise 3.6.23", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [1.15, 0.17, 0.16], + [0.31, 0.17, 0.16], + [0.47, 0.31, 0.30], + [null, null, null], + [0.97, 0.87, 0.92], + [6.14, 6.56, 5.43], + [0.35, 0.25, 0.26], + [0.22, 0.17, 0.16], + [1.19, 0.94, 1.02], + [1.34, 1.37, 1.33], + [0.51, 0.39, 0.39], + [0.45, 0.40, 0.38], + [8.06, 8.02, 8.02], + [8.43, 8.83, 8.89], + [8.21, 8.31, 8.42], + [1.73, 1.76, 1.78], + [18.95, 17.76, 19.98], + [12.59, 13.64, 12.24], + [37.04, 35.00, 36.76], + [0.25, 0.14, 0.13], + [2.28, 0.81, 0.86], + [0.61, 0.63, 0.57], + [3.02, 1.15, 1.17], + [12.44, 1.19, 1.19], + [8.92, 8.83, 9.07], + [8.15, 8.11, 8.11], + [10.39, 10.18, 10.33], + [5.70, 5.70, 5.82], + [13.77, 15.06, 13.88], + [8.57, 9.29, 8.58], + [2.03, 2.02, 2.00], + [3.18, 3.31, 3.26], + [20.23, 19.45, 20.16], + [183.55, 156.42, 124.94], + [160.14, 164.08, 162.15], + [3.49, 1.67, 1.71], + [23.03, 21.05, 21.21], + [3.14, 1.70, 1.65], + [1.64, 1.27, 1.23], + [82.86, 72.81, 77.55], + [0.32, 0.18, 0.18], + [0.28, 0.18, 0.19], + [3.43, 1.61, 1.53] + ] + }, + + { + "system": "InfiniDB", + "version": "Enterprise 3.6.23", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [2.07, 0.34, 0.35], + [0.76, 0.3, 0.31], + [1.45, 1.23, 1.24], + [null, null, null], + [4.18, 3.89, 3.85], + [26.32, 28.07, 23.96], + [1.36, 1.04, 1.03], + [0.56, 0.32, 0.3], + [5.14, 4.54, 4.51], + [7.83, 8.18, 8.0], + [1.96, 1.4, 1.45], + [1.75, 1.52, 1.46], + [23.72, 23.01, 23.87], + [30.74, 30.86, 28.36], + [25.55, 24.76, 24.41], + [11.66, 11.59, 11.67], + [80.45, 85.49, 116.21], + [52.27, 50.76, 48.3], + [null, null, null], + [4.31, 0.24, 0.16], + [130.37, 7.24, 7.78], + [66.62, 10.19, 10.2], + [32.34, 19.66, 19.59], + [288.38, 58.86, 7.35], + [57.88, 57.95, 57.82], + [47.32, 52.59, 47.03], + [73.32, 65.1, 73.43], + [50.6, 51.5, 50.93], + [89.16, 85.75, 87.26], + [61.97, 60.49, 62.13], + [10.3, 10.4, 10.31], + [21.11, 20.86, 20.99], + [157.67, 151.81, 153.5], + [null, null, null], + [null, null, null], + [11.86, 11.08, 11.13], + [12.35, 12.49, 12.36], + [3.11, 3.12, 3.14], + [1.03, 0.89, 0.9], + [34.01, 45.75, 50.3], + [0.21, 0.23, 0.24], + [0.23, 0.21, 0.23], + [0.14, 0.15, 0.17] + ] + } +] diff --git a/website/benchmark/dbms/results/005_monetdb.json b/website/benchmark/dbms/results/005_monetdb.json new file mode 100644 index 00000000000..caa335babc9 --- /dev/null +++ b/website/benchmark/dbms/results/005_monetdb.json @@ -0,0 +1,110 @@ +[ + { + "system": "MonetDB", + "version": "", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [0.003851, 0.003389, 0.003633], + [0.400058, 0.045117, 0.007999], + [0.207544, 0.022219, 0.033007], + [null, null, null], + [0.873236, 0.61427, 0.564001], + [3.1, 2.8, 2.9], + [0.191616, 0.004704, 0.008579], + [0.056658, 0.049444, 0.035463], + [5.8, 5.8, 6.4], + [5.8, 8.4, 8.5], + [0.458164, 0.319166, 0.413914], + [0.344021, 0.314183, 0.348057], + [6.1, 13.3, 6.1], + [5.8, 5.6, 5.5], + [54.9, 70.0, 58.4], + [0.886465, 0.711899, 0.7329], + [5.8, 5.9, 5.9], + [5.8, 5.9, 6.0], + [20.2, 19.7, 21.0], + [0.331309, 0.000618, 0.085817], + [2.0, 0.056549, 0.09292], + [0.053756, 0.050622, 0.054916], + [2.2, 0.069379, 0.066628], + [13.0, 0.511351, 0.484708], + [0.137787, 0.030937, 0.030501], + [0.039574, 0.027741, 0.025684], + [0.046094, 0.043038, 0.039139], + [4.1, 2.7, 2.7], + [null, null, null], + [1.1, 0.940847, 0.921118], + [17.7, 18.2, 17.5], + [2.0, 0.236466, 0.236223], + [1.3, 1.3, 1.3], + [138.0, 140.0, 173.0], + [null, null, null], + [null, null, null], + [169.0, 175.0, null], + [261.0, 257.0, 268.0], + [3.1, 0.513628, 0.464017], + [262.0, 252.0, 257.0], + [0.524173, 0.066989, 0.123604], + [0.305343, 0.030736, 0.048725], + [2.3, 1.9, 1.9] + ] + }, + + { + "system": "MonetDB", + "version": "", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [0.028886, 0.003581, 0.003829], + [2.7, 0.139637, 0.060264], + [1.1, 0.146556, 0.146063], + [null, null, null], + [7.5, 5.6, 5.4], + [15.5, 12.7, 11.5], + [1.0, 0.038566, 0.036837], + [0.687604, 0.026271, 0.030261], + [195.0, 214.0, null], + [254.0, 267.0, 265.0], + [22.9, 20.0, 20.8], + [20.4, 20.8, 23.3], + [97.0, 115.0, null], + [111.0, 84.0, 58.8], + [656.0, null, null], + [6.8, 6.9, 6.7], + [264.0, 234.0, 208.0], + [222.0, 279.0, null], + [1173.0, null, null], + [3.9, 0.000787, 0.032566], + [27.4, 1.7, 2.1], + [4.0, 2.6, 2.6], + [null, null, null], + [209.0, 155, 143], + [10.5, 5.7, 40.9], + [4.8, 4.7, 4.7], + [85.0, 4.7, 5.7], + [52.4, 30.3, 29.1], + [null, null, null], + [null, null, null], + [12.7, 11.2, 11.4], + [392.0, 370.0, null], + [3.2, 1.8, 1.8], + [41.9, 37.0, 40.7], + [null, null, null], + [null, null, null], + [null, null, null], + [6.4, 2.3, 2.1], + [3.6, 1.0, 1.1], + [1.5, 1.2, 1.3], + [7.2, 4.6, 4.9], + [2.8, 0.050916, 0.04148], + [3.0, 0.045935, 0.047277], + [2.5, 0.049119, 0.04828] + ] + } +] diff --git a/website/benchmark/dbms/results/006_infobright_4_0_7.json b/website/benchmark/dbms/results/006_infobright_4_0_7.json new file mode 100644 index 00000000000..25cf680f9d9 --- /dev/null +++ b/website/benchmark/dbms/results/006_infobright_4_0_7.json @@ -0,0 +1,55 @@ +[ + { + "system": "Infobright", + "version": "CE 4.0.7", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [0.00, 0.00, 0.00], + [0.40, 0.38, 0.39], + [0.10, 0.00, 0.00], + [null, null, null], + [2.83, 1.91, 1.95], + [9.16, 1.65, 1.70], + [0.03, 0.00, 0.00], + [0.46, 0.40, 0.41], + [4.13, 2.97, 3.43], + [5.12, 4.46, 4.15], + [1.98, 1.24, 1.36], + [1.58, 1.26, 1.27], + [13.37, 30.81, 29.76], + [32.59, 30.22, 13.00], + [12.93, 11.39, 30.46], + [2.98, 3.05, 2.96], + [9.90, 20.86, 25.17], + [10.38, 10.19, 10.28], + [162.43, 164.35, 169.28], + [1, 0, 22, 0, 24], + [47.80, 4.40, 4.47], + [4.83, 1.15, 1.14], + [43.82, 2.14, 2.16], + [6.14, 4.39, 4.35], + [0.41, 0.41, 0.41], + [0.80, 0.80, 0.80], + [0.41, 0.41, 0.40], + [10.39, 10.12, 9.88], + [139.25, 79.33, 78.92], + [145.8, 146.2, 144.97], + [3.34, 2.02, 2.06], + [3.68, 3.05, 3.02], + [14.76, 14.82, 14.76], + [77.49, 91.4, 90.9], + [75.37, 83.55, 63.55], + [10.80, 10.52, 10.67], + [23.77, 9.47, 9.30], + [50.21, 3.37, 3.36], + [1.26, 0.77, 0.76], + [98.25, 104.6, 94.29], + [2.04, 0.47, 0.54], + [1.36, 0.42, 0.41], + [4.58, 4.01, 3.98] + ] + } +] diff --git a/website/benchmark/dbms/results/007_hive_0_11.json b/website/benchmark/dbms/results/007_hive_0_11.json new file mode 100644 index 00000000000..a025fe9d884 --- /dev/null +++ b/website/benchmark/dbms/results/007_hive_0_11.json @@ -0,0 +1,109 @@ +[ + { + "system": "Hive", + "version": "0.11, ORC File", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [47.388, 44.55, 43.513], + [25.332, 22.592, 22.629], + [27.558, 23.861, 24.986], + [26.148, 23.564, 23.508], + [35.237, 31.445, 32.552], + [34.063, 29.607, 29.268], + [25.999, 22.443, 22.559], + [38.784, 37.082, 37.652], + [49.973, 47.282, 46.027], + [54.759, 50.301, 51.858], + [42.793, 39.001, 38.998], + [42.858, 38.928, 40.035], + [55.967, 53.253, 53.053], + [58.068, 54.393, 53.189], + [58.359, 53.181, 54.164], + [63.096, 58.614, 60.153], + [73.175, 70.386, 69.204], + [35.511, 31.512, 31.482], + [109.132, 107.333, 106.376], + [17.948, 14.47, 14.154], + [27.452, 24.527, 24.674], + [41.792, 40.17, 40.052], + [45.079, 42.12, 43.438], + [50.847, 46.004, 45.95], + [31.007, 26.473, 26.277], + [30.985, 27.724, 27.357], + [32.747, 28.329, 27.439], + [62.932, 57.159, 59.233], + [63.563, 63.375, 63.307], + [74.663, 67.206, 68.586], + [58.017, 52.364, 53.155], + [62.907, 60.202, 59.653], + [127.206, 124.701, 123.291], + [89.931, 87.6, 87.325], + [98.879, 89.299, 90.377], + [63.792, 61.127, 61.517], + [44.325, 39.995, 39.979], + [43.852, 40.178, 40.131], + [44.493, 40.17, 40.171], + [36.108, 36.293, 36.241], + [43.025, 39.168, 40.042], + [42.914, 40.129, 39.135], + [33.91, 34.161, 34.191] + ] + }, + + { + "system": "Hive", + "version": "0.11, ORC File", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [110.676, 105.13, 107.358], + [55.195, 36.435, 32.201], + [39.991, 35.143, 35.085], + [44.465, 34.131, 34.032], + [110.69, 105.953, 107.343], + [68.119, 64.831, 64.269], + [37.809, 33.021, 33.13], + [53.788, 51.261, 48.653], + [87.479, 85.062, 85.039], + [106.577, 102.879, 101.705], + [60.4, 53.498, 53.516], + [61.275, 53.698, 53.577], + [87.924, 82.999, 82.867], + [94.281, 86.991, 87.084], + [91.05, 87.267, 87.731], + [132.697, 132.306, 130.91], + [141.357, 147.059, 140.75], + [60.884, 57.376, 57.367], + [237.554, 234.361, 234.271], + [34.019, 21.834, 21.08], + [41.195, 36.443, 35.979], + [60.385, 54.888, 56.541], + [67.257, 58.995, 59.828], + [87.697, 88.521, 89.324], + [53.796, 50.592, 50.118], + [68.786, 63.993, 62.886], + [60.715, 56.14, 55.303], + [112.58, 107.297, 106.493], + [115.068, 110.622, 109.541], + [136.36, 133.102, 135.896], + [113.348, 100.032, 99.905], + [124.002, 117.366, 109.524], + [301.77, 324.867, 294.034], + [233.937, 272.053, 238.167], + [241.283, 228.198, 246.999], + [120.684, 118.948, 118.18], + [70.292, 55.211, 55.076], + [63.4, 52.093, 52.895], + [67.483, 53.704, 54.814], + [60.588, 52.321, 53.356], + [62.644, 51.812, 53.23], + [69.068, 53.234, 52.853], + [46.67, 46.041, 45.95] + ] + } +] diff --git a/website/benchmark/dbms/results/008_mysql_5_5.json b/website/benchmark/dbms/results/008_mysql_5_5.json new file mode 100644 index 00000000000..c132ee3b2bb --- /dev/null +++ b/website/benchmark/dbms/results/008_mysql_5_5.json @@ -0,0 +1,109 @@ +[ + { + "system": "MySQL", + "version": "5.5.32, MyISAM", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [0.01, 0.01, 0.01], + [21.55, 18.91, 18.28], + [22.71, 19.86, 20.53], + [21.3, 18.93, 19.25], + [26.77, 25.74, 25.65], + [29.14, 26.92, 26.53], + [7.47, 7.38, 7.23], + [20.56, 18.84, 18.41], + [27.53, 25.14, 24.45], + [30.08, 26.07, 26.75], + [22.93, 19.82, 20.23], + [21.79, 19.75, 19.64], + [51.3, 42.27, 46.45], + [43.75, 42.38, 42.36], + [43.76, 48.66, 46.8], + [106.76, 106.53, 105.32], + [172.51, 181.59, 177.7], + [189.92, 166.09, 172.03], + [185.61, 181.38, 206.92], + [20.3, 19.24, 18.49], + [21.43, 20.03, 19.99], + [21.88, 20.06, 20.3], + [25.51, 20.72, 20.58], + [24.14, 20.04, 19.29], + [21.65, 19.11, 19.0], + [22.44, 20.35, 20.02], + [21.41, 19.96, 19.91], + [27.15, 26.28, 25.32], + [135.45, 134.31, 133.12], + [107.44, 106.26, 106.16], + [40.47, 36.67, 37.07], + [39.2, 36.86, 37.22], + [134.5, 130.75, 133.55], + [1057.45, 1075.29, 928.38], + [867.64, 1023.33, 1063.4], + [111.01, 109.86, 109.34], + [1160.03, 23.32, 23.08], + [1109.2, 14.24, 14.38], + [1086.92, 7.29, 6.63], + [31.74, 31.79, null], + [1074.29, 5.51, 5.43], + [1069.16, 5.6, 5.3], + [652.84, 4.84, 4.57] + ] + }, + + { + "system": "MySQL", + "version": "5.5.32, MyISAM", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [0.01, 0.01, 0.01], + [220.39, 234.32, 305.28], + [220.45, 198.31, 198.37], + [207.6, 190.59, 188.35], + [275.96, 250.84, 246.93], + [292.17, 254.14, 251.06], + [75.51, 76.11, 74.98], + [203.94, 184.14, 180.82], + [287.28, 252.52, 249.48], + [299.44, 282.02, 271.33], + [218.71, 197.51, 195.94], + [220.1, 197.17, 199.88], + [929.45, 869.74, 739.53], + [null, null, null], + [1196.42, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [447.72, 199.09, 185.82], + [582.73, 196.73, 195.8], + [582.54, 205.89, 199.15], + [568.75, 217.15, 209.65], + [562.72, 206.77, 203.19], + [602.47, 186.8, 186.62], + [565.26, 199.44, 199.24], + [657.78, 202.53, 196.95], + [675.84, 250.11, 248.9], + [null, null, null], + [1061.89, 1054.6, null], + [993.89, 918.67, null], + [604.48, 553.33, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [873.29, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [0.67, 0.0, 0.0] + ] + } +] diff --git a/website/benchmark/dbms/results/009_memsql_3_2.json b/website/benchmark/dbms/results/009_memsql_3_2.json new file mode 100644 index 00000000000..7195f904778 --- /dev/null +++ b/website/benchmark/dbms/results/009_memsql_3_2.json @@ -0,0 +1,55 @@ +[ + { + "system": "MemSQL", + "version": "3.2, column store", + "data_size": 10000000, + "time": "2015-04-05", + "comments": "", + "result": + [ + [0.01, 0.01, 0.01], + [0.29, 0.26, 0.25], + [0.48, 0.45, 0.39], + [0.75, 0.63, 0.62], + [1.03, 0.97, 0.89], + [2.76, 2.65, 2.57], + [0.36, 0.32, 0.32], + [0.29, 0.24, 0.24], + [1.71, 1.51, 1.47], + [1.91, 1.69, 1.67], + [0.83, 0.66, 0.65], + [0.88, 0.70, 0.70], + [2.55, 2.59, 2.32], + [null, null, null], + [4.36, 2.34, 2.39], + [1.23, 1.09, 1.09], + [3.26, 3.18, 2.81], + [2.76, 2.58, 2.58], + [5.23, 4.74, 4.45], + [null, null, null], + [5.12, 4.62, 4.81], + [5.43, 4.91, 4.90], + [7.32, 6.18, 6.14], + [22.61, 17.85, 17.89], + [1.04, 0.77, 0.75], + [0.93, 0.77, 0.90], + [1.04, 0.75, 0.76], + [2.84, 2.41, 2.31], + [18.64, 18.19, 18.38], + [5.78, 5.68, 5.67], + [2.24, 1.90, 1.85], + [2.65, 2.22, 2.22], + [8.82, 8.32, 8.01], + [11.30, 10.93, 11.21], + [11.22, 10.73, 10.72], + [1.60, 1.46, 1.45], + [1.86, 1.75, 1.83], + [1.16, 1.10, 1.11], + [0.54, 0.44, 0.44], + [3.79, 3.59, 3.58], + [null, null, null], + [null, null, null], + [0.37, 0.35, 0.35] + ] + } +] diff --git a/website/benchmark/dbms/results/010_greenplum_4_3_9.json b/website/benchmark/dbms/results/010_greenplum_4_3_9.json new file mode 100644 index 00000000000..c2c43b22de6 --- /dev/null +++ b/website/benchmark/dbms/results/010_greenplum_4_3_9.json @@ -0,0 +1,161 @@ +[ + { + "system": "Greenplum", + "version": "4.3.9.1", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [0.77, 0.77, 0.92], + [0.44, 0.27, 0.46], + [0.95, 0.90, 0.89], + [0.74, 0.91, 0.83], + [1.43, 1.47, 1.44], + [1.74, 1.63, 1.51], + [0.77, 0.83, 0.88], + [1.86, 1.95, 1.86], + [2.09, 1.92, 1.92], + [2.33, 2.41, 2.46], + [0.75, 0.78, 0.77], + [0.71, 0.70, 0.75], + [2.11, 2.22, 2.50], + [2.65, 2.47, 2.59], + [2.11, 2.40, 2.46], + [1.59, 1.66, 1.14], + [1.75, 1.96, 1.71], + [1.29, 1.30, 1.02], + [1.99, 2.25, 2.29], + [0.65, 0.53, 0.49], + [1.23, 0.98, 1.01], + [1.85, 1.81, 1.91], + [1.85, 1.24, 1.17], + [8.21, 3.48, 2.71], + [0.77, 0.49, 0.75], + [0.68, 0.68, 0.88], + [0.76, 0.46, 0.78], + [2.12, 2.07, 2.36], + [3.08, 2.86, 3.12], + [11.50, 11.90, 10.32], + [1.89, 1.85, 1.86], + [1.99, 1.96, 2.02], + [5.24, 4.93, 5.03], + [3.24, 3.40, 3.27], + [3.01, 2.64, 2.65], + [3.14, 2.80, 2.73], + [1.47, 1.14, 1.24], + [1.79, 1.05, 1.13], + [1.32, 1.11, 1.14], + [2.20, 1.51, 1.48], + [1.42, 1.01, 1.02], + [2.17, 1.74, 2.23], + [2.17, 1.70, 1.94] + ] + }, + { + "system": "Greenplum", + "version": "4.3.9.1", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [4.11, 4.32, 4.36], + [2.56, 2.53, 2.80], + [4.98, 5.14, 5.45], + [6.64, 5.99, 5.69], + [5.86, 5.72, 5.98], + [6.71, 6.76, 7.36], + [4.25, 4.39, 4.62], + [4.53, 4.23, 4.14], + [9.12, 9.23, 9.18], + [14.77, 15.38, 14.37], + [3.22, 2.87, 3.89], + [4.41, 4.29, 4.89], + [6.78, 7.19, 6.36], + [15.04, 16.93, 16.14], + [7.08, 6.21, 6.96], + [6.83, 6.17, 5.02], + [8.90, 9.81, 9.29], + [6.84, 5.31, 7.67], + [15.33, 16.31, 15.42], + [3.18, 4.29, 3.59], + [8.47, 8.73, 9.91], + [10.40, 8.38, 9.67], + [10.68, 8.16, 7.90], + [50.37, 32.46, 30.45], + [5.21, 3.64, 4.13], + [5.47, 5.68, 5.91], + [4.33, 3.20, 3.83], + [10.76, 11.14, 11.22], + [18.45, 17.26, 16.28], + [96.58, 97.29, 92.61], + [7.23, 7.36, 7.57], + [10.35, 8.20, 9.87], + [38.32, 37.57, 40.99], + [16.27, 15.24, 16.74], + [15.90, 15.33, 16.27], + [13.44, 13.84, 13.24], + [1.92, 1.16, 1.10], + [1.88, 1.16, 1.10], + [1.87, 1.07, 0.98], + [2.71, 1.22, 1.22], + [1.71, 0.97, 1.07], + [2.44, 1.78, 1.68], + [2.19, 1.72, 2.24] + ] + }, + { + "system": "Greenplum", + "version": "4.3.9.1", + "data_size": 1000000000, + "time": "", + "comments": "", + "result": + [ + [29.01, 32.82, 30.09], + [20.93, 22.20, 20.63], + [34.41, 35.26, 36.59], + [44.01, 41.56, 41.36], + [36.87, 35.96, 39.32], + [46.44, 50.24, 45.50], + [29.22, 31.75, 30.19], + [20.58, 20.76, 24.18], + [56.29, 56.67, 57.36], + [79.13, 81.78, 78.60], + [32.11, 33.21, 29.69], + [33.69, 29.92, 30.92], + [44.85, 42.52, 40.64], + [63.16, 63.16, 64.79], + [47.16, 43.26, 42.45], + [41.04, 43.67, 41.76], + [63.45, 64.64, 60.74], + [48.56, 51.07, 48.81], + [79.89, 81.48, 81.27], + [25.73, 29.27, 31.48], + [72.20, 75.93, 71.44], + [74.50, 73.46, 74.82], + [81.19, 80.76, 78.67], + [339.14, 296.80, 296.95], + [28.51, 31.36, 28.67], + [36.49, 36.47, 38.96], + [31.26, 29.18, 31.65], + [80.03, 83.33, 80.84], + [73.36, 73.27, 73.57], + [961.62, 935.00, 944.02], + [51.55, 50.44, 45.14], + [73.03, 72.86, 70.11], + [29.24, 28.79, 29.78], + [37.51, 39.70, 39.66], + [53.86, 53.37, 53.77], + [84.54, 84.86, 85.62], + [7.01, 1.93, 2.16], + [5.39, 1.55, 1.69], + [6.85, 1.43, 1.57], + [13.18, 3.17, 3.08], + [5.97, 1.30, 1.47], + [5.69, 2.12, 1.96], + [4.11, 2.27, 2.43] + ] + } +] diff --git a/website/benchmark/dbms/results/011_greenplum_4_3_9_x2.json b/website/benchmark/dbms/results/011_greenplum_4_3_9_x2.json new file mode 100644 index 00000000000..269f44d8dff --- /dev/null +++ b/website/benchmark/dbms/results/011_greenplum_4_3_9_x2.json @@ -0,0 +1,161 @@ +[ + { + "system": "Greenplum(x2)", + "version": "4.3.9.1", + "data_size": 100000000, + "time": "", + "comments": "", + "result": + [ + [2.11, 1.61, 1.60], + [1.30, 1.30, 1.25], + [2.19, 2.12, 2.10], + [2.48, 2.53, 2.50], + [2.96, 3.05, 3.12], + [4.02, 4.02, 3.98], + [1.99, 1.79, 1.81], + [4.26, 3.77, 3.80], + [7.83, 7.19, 6.67], + [11.48, 11.72, 11.62], + [2.12, 2.06, 2.02], + [2.18, 2.05, 2.03], + [5.23, 5.16, 5.29], + [7.01, 6.84, 6.91], + [5.11, 5.31, 5.25], + [3.47, 3.20, 3.24], + [4.19, 4.18, 4.19], + [3.25, 3.16, 3.25], + [6.72, 6.65, 6.84], + [1.70, 1.57, 1.55], + [3.84, 3.58, 3.46], + [6.80, 5.48, 5.31], + [10.50, 3.71, 3.86], + [31.39, 13.54, 14.30], + [3.00, 1.76, 1.70], + [2.13, 2.01, 2.11], + [2.69, 1.72, 1.67], + [6.26, 5.90, 5.58], + [16.77, 16.00, 15.89], + [45.96, 46.69, 47.78], + [4.61, 4.15, 4.22], + [7.08, 5.49, 5.64], + [28.43, 26.11, 28.32], + [19.05, 19.68, 19.23], + [19.23, 19.01, 20.48], + [7.71, 7.69, 7.61], + [2.50, 1.74, 1.74], + [2.15, 1.60, 1.77], + [2.21, 1.67, 1.97], + [2.88, 1.90, 1.82], + [2.25, 1.81, 1.84], + [3.36, 3.13, 3.18], + [3.16, 3.21, 2.90] + ] + }, + { + "system": "Greenplum(x2)", + "version": "4.3.9.1", + "data_size": 10000000, + "time": "", + "comments": "", + "result": + [ + [0.37, 0.17, 0.22], + [0.25, 0.13, 0.12], + [0.39, 0.22, 0.23], + [0.47, 0.26, 0.26], + [1.30, 1.49, 1.44], + [1.87, 1.85, 2.06], + [0.32, 0.19, 0.19], + [2.85, 3.32, 3.06], + [2.60, 2.44, 2.57], + [2.79, 2.46, 2.79], + [0.80, 0.57, 0.59], + [0.83, 0.60, 0.61], + [3.47, 3.21, 3.43], + [1.54, 1.33, 1.40], + [3.29, 3.11, 3.45], + [1.35, 1.45, 1.49], + [1.65, 1.54, 1.65], + [0.92, 0.81, 0.86], + [1.67, 1.25, 1.28], + [0.33, 0.19, 0.17], + [0.59, 0.40, 0.41], + [2.07, 2.22, 1.96], + [1.70, 0.95, 0.87], + [6.83, 1.74, 1.80], + [0.46, 0.20, 0.19], + [0.44, 0.28, 0.28], + [0.50, 0.19, 0.20], + [2.06, 2.07, 2.15], + [3.02, 2.94, 2.80], + [5.42, 5.62, 5.37], + [2.10, 1.95, 2.05], + [2.31, 2.16, 2.16], + [3.74, 3.59, 3.67], + [3.89, 3.99, 3.93], + [3.03, 2.89, 3.10], + [3.56, 3.41, 3.19], + [2.12, 1.96, 1.96], + [2.03, 1.87, 1.86], + [2.11, 1.83, 1.76], + [2.53, 2.10, 1.96], + [2.31, 1.68, 1.87], + [3.26, 3.22, 3.27], + [3.19, 3.23, 3.11] + ] + }, + { + "system": "Greenplum(x2)", + "version": "4.3.9.1", + "data_size": 1000000000, + "time": "", + "comments": "", + "result": + [ + [17.70, 12.63, 12.16], + [9.14, 9.22, 9.15], + [17.13, 17.56, 16.67], + [21.25, 20.67, 21.22], + [17.16, 16.49, 17.86], + [25.70, 25.60, 25.78], + [14.77, 15.00, 14.73], + [11.80, 12.09, 12.05], + [33.57, 33.21, 33.01], + [61.65, 59.21, 60.16], + [13.69, 13.74, 13.77], + [14.48, 14.19, 14.28], + [25.59, 26.22, 26.35], + [30.75, 31.32, 31.37], + [27.70, 28.49, 28.18], + [20.28, 20.50, 20.28], + [27.80, 27.13, 29.00], + [23.66, 14.42, 24.30], + [59.58, 58.06, 58.91], + [12.86, 13.18, 13.26], + [36.04, 32.46, 32.59], + [45.28, 34.80, 34.56], + [97.71, 34.57, 33.62], + [215.97, 121.61, 120.47], + [24.44, 13.65, 13.62], + [17.15, 17.01, 17.12], + [23.84, 13.32, 13.46], + [40.83, 39.39, 38.71], + [155.70, 155.18, 158.97], + [451.18, 448.88, 449.55], + [27.37, 25.11, 25.06], + [56.58, 42.46, 43.33], + [29.77, 29.24, 29.11], + [36.94, 31.05, 29.40], + [104.88, 102.26, 101.88], + [41.06, 41.52, 41.15], + [6.91, 2.29, 2.37], + [5.35, 2.00, 2.02], + [6.49, 1.98, 1.88], + [12.55, 3.30, 3.44], + [6.18, 1.95, 2.06], + [6.12, 3.02, 3.43], + [5.21, 3.66, 3.58] + ] + } +] diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html new file mode 100644 index 00000000000..49b85e5b99c --- /dev/null +++ b/website/benchmark/hardware/index.html @@ -0,0 +1,65 @@ +{% extends 'templates/base.html' %} + +{% set title = 'Performance comparison of ClickHouse on various hardware' %} +{% set extra_js = ['queries.js', 'results.js', '../benchmark.js'] %} +{% set no_footer = True %} + +{% block content %} +
+ +
+
+ + ClickHouse + +

{{ title }}

+
+
+ +
+
+
+ +
+
+

Relative query processing time (lower is better)

+
+
+
+ +
+
+

Full results

+
+
+
+ +
+
+

Comments

+

Submit your own results: https://clickhouse.tech/docs/en/operations/performance_test/

+

+Results for Lenovo B580 Laptop are from Ragıp Ünal. 16GB RAM 1600 GHz, 240GB SSD, Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz (2 Core / 4 HT)
+Results for Time4vps.eu are from Ragıp Ünal.
+Results for Dell PowerEdge R640, R641 (in Hetzner) are from Dmirty Titov.
+Results for Dell PowerEdge R730 are from Amos Bird.
+Results for Dell R530 are from Yuriy Zolkin.
+Results for Xeon 2176G are from Sergey Golod.
+Results for Azure DS3v2 are from Boris Granveaud.
+Results for AWS are from Wolf Kreuzerkrieg.
+Results for Huawei Taishan are from Peng Gao in sina.com.
+Results for Selectel and AMD EPYC 7402P are from Andrey Dudin.
+Results for ProLiant are from Denis Ustinov.
+Results for AMD EPYC 7502P 128GiB are from Kostiantyn Velychkovskyi.
+Results for AMD EPYC 7502P 512GiB are from Sergey Zakharov.
+Results for Pinebook Pro are from Aleksey R. @kITerE.
+Results for AMD Ryzen are from Alexey Milovidov. Firefox was running in background.
+Results for Azure E32s are from Piotr Maśko.
+Results for MacBook Pro are from Denis Glazachev. MacOS Catalina Version 10.15.4 (19E266). For "drop caches", the "Free Up RAM" in CleanMyMac is used.
+Results for AMD EPYC 7702 are from Peng Gao in sina.com.
+Results for Intel NUC are from Alexander Zaitsev, Altinity.
+Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
+ Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.

+
+
+{% endblock %} diff --git a/website/benchmark/hardware/queries.js b/website/benchmark/hardware/queries.js new file mode 100644 index 00000000000..9527da84abf --- /dev/null +++ b/website/benchmark/hardware/queries.js @@ -0,0 +1,182 @@ +var current_data_size = 0; + +var current_systems = [ +'Xeon Gold 6230, 2 sockets, 40 threads', +'Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID-1', +'E5-2650 v2 @ 2.60GHz, 2 sockets, 16 threads, 8xHDD RAID-5']; + +var queries = + [ + { + "query": "SELECT count() FROM hits", + "comment": "", + }, + { + "query": "SELECT count() FROM hits WHERE AdvEngineID != 0", + "comment": "", + }, + { + "query": "SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM hits", + "comment": "", + }, + { + "query": "SELECT sum(UserID) FROM hits", + "comment": "", + }, + { + "query": "SELECT uniq(UserID) FROM hits", + "comment": "", + }, + { + "query": "SELECT uniq(SearchPhrase) FROM hits", + "comment": "", + }, + { + "query": "SELECT min(EventDate), max(EventDate) FROM hits", + "comment": "", + }, + { + "query": "SELECT AdvEngineID, count() FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC", + "comment": "", + }, + { + "query": "SELECT RegionID, uniq(UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT MobilePhoneModel, uniq(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM hits WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, count() AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, uniq(UserID) AS u FROM hits WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchEngineID, SearchPhrase, count() AS c FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, count() FROM hits GROUP BY UserID ORDER BY count() DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, SearchPhrase, count() FROM hits GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, SearchPhrase, count() FROM hits GROUP BY UserID, SearchPhrase LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT UserID FROM hits WHERE UserID = 12345678901234567890", + "comment": "", + }, + { + "query": "SELECT count() FROM hits WHERE URL LIKE '%metrika%'", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, any(URL), count() AS c FROM hits WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM hits WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT * FROM hits WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10", + "comment": "", + }, + { + "query": "SELECT SearchPhrase FROM hits WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10", + "comment": "", + }, + { + "query": "SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM hits WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25", + "comment": "", + }, + { + "query": "SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM hits WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25", + "comment": "", + }, + { + "query": "SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits", + "comment": "", + }, + { + "query": "SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT URL, count() AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT 1, URL, count() AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT URL, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT Title, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10", + "comment": "", + }, + { + "query": "SELECT URL, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000", + "comment": "", + }, + { + "query": "SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000", + "comment": "", + }, + { + "query": "SELECT URLHash, EventDate, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://yandex.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100", + "comment": "", + }, + { + "query": "SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-31') AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://yandex.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;", + "comment": "", + }, + { + "query": "SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM hits WHERE CounterID = 34 AND EventDate >= toDate('2013-07-01') AND EventDate <= toDate('2013-07-02') AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute;", + "comment": "", + } + ]; diff --git a/website/benchmark/hardware/results/001_xeon_gold_6230.json b/website/benchmark/hardware/results/001_xeon_gold_6230.json new file mode 100644 index 00000000000..d43b5707967 --- /dev/null +++ b/website/benchmark/hardware/results/001_xeon_gold_6230.json @@ -0,0 +1,52 @@ +[ + { + "system": "Xeon Gold 6230, 2 sockets, 40 threads", + "time": "2020-01-01 00:00:00", + "result": + [ + [0.009, 0.002, 0.001], + [0.028, 0.008, 0.009], + [0.055, 0.014, 0.014], + [0.122, 0.018, 0.018], + [0.157, 0.069, 0.068], + [0.217, 0.124, 0.121], + [0.020, 0.008, 0.008], + [0.013, 0.008, 0.007], + [0.284, 0.213, 0.217], + [0.312, 0.241, 0.239], + [0.164, 0.087, 0.090], + [0.168, 0.092, 0.092], + [0.318, 0.256, 0.251], + [0.436, 0.323, 0.325], + [0.385, 0.327, 0.327], + [0.419, 0.402, 0.398], + [1.061, 0.918, 0.935], + [0.523, 0.474, 0.598], + [1.706, 1.621, 1.657], + [0.124, 0.037, 0.024], + [1.192, 0.263, 0.258], + [1.362, 0.303, 0.302], + [2.473, 0.730, 0.735], + [2.857, 0.451, 0.382], + [0.374, 0.106, 0.103], + [0.204, 0.079, 0.078], + [0.381, 0.114, 0.109], + [1.185, 0.327, 0.318], + [1.011, 0.408, 0.415], + [0.895, 0.925, 0.880], + [0.406, 0.277, 0.274], + [0.846, 0.386, 0.389], + [3.174, 2.500, 2.533], + [1.758, 1.311, 1.315], + [1.766, 1.332, 1.355], + [0.469, 0.449, 0.462], + [0.201, 0.169, 0.170], + [0.069, 0.061, 0.061], + [0.064, 0.056, 0.057], + [0.362, 0.365, 0.369], + [0.035, 0.022, 0.024], + [0.030, 0.019, 0.019], + [0.009, 0.005, 0.005] + ] + } +] diff --git a/website/benchmark/hardware/results/002_yandex_cloud_cascade_lake_64_vcpu.json b/website/benchmark/hardware/results/002_yandex_cloud_cascade_lake_64_vcpu.json new file mode 100644 index 00000000000..cf51a1a3b6e --- /dev/null +++ b/website/benchmark/hardware/results/002_yandex_cloud_cascade_lake_64_vcpu.json @@ -0,0 +1,103 @@ +[ + { + "system": "Yandex Cloud Cascade Lake, 64 vCPU (32 threads), 128 GB RAM, 400 GB SSD", + "time": "2020-01-11 00:00:00", + "result": + [ + [0.037, 0.002, 0.002], + [0.113, 0.008, 0.014], + [0.275, 0.016, 0.016], + [0.448, 0.022, 0.022], + [0.472, 0.083, 0.082], + [0.808, 0.137, 0.136], + [0.063, 0.016, 0.010], + [0.055, 0.008, 0.008], + [0.753, 0.280, 0.327], + [0.850, 0.308, 0.305], + [0.540, 0.110, 0.105], + [0.533, 0.113, 0.112], + [0.930, 0.326, 0.314], + [1.463, 0.392, 0.374], + [0.994, 0.389, 0.386], + [0.763, 0.471, 0.499], + [1.890, 1.271, 1.280], + [1.335, 0.642, 1.776], + [3.886, 2.448, 2.370], + [0.452, 0.052, 0.029], + [5.834, 0.380, 0.374], + [6.645, 0.509, 0.385], + [12.401, 1.204, 1.146], + [12.727, 0.502, 0.513], + [1.628, 0.154, 0.128], + [0.739, 0.121, 0.144], + [1.632, 0.173, 0.127], + [5.856, 0.495, 0.568], + [4.960, 0.685, 0.784], + [1.619, 0.974, 1.638], + [1.420, 0.377, 0.361], + [3.379, 0.596, 0.580], + [5.797, 4.241, 4.280], + [6.864, 2.376, 2.224], + [6.834, 2.112, 2.118], + [0.996, 0.890, 0.947], + [0.286, 0.226, 0.218], + [0.110, 0.080, 0.077], + [0.146, 0.075, 0.068], + [0.531, 0.500, 0.438], + [0.076, 0.025, 0.027], + [0.054, 0.021, 0.022], + [0.033, 0.004, 0.004] + ] + }, + + { + "system": "Yandex Cloud Cascade Lake, 64 vCPU (32 threads), 128 GB RAM, 4 TB SSD", + "time": "2020-01-13 00:00:00", + "result": + [ + [0.054, 0.002, 0.002], + [0.140, 0.009, 0.015], + [0.139, 0.017, 0.020], + [0.430, 0.022, 0.022], + [0.453, 0.083, 0.082], + [0.839, 0.160, 0.159], + [0.058, 0.010, 0.010], + [0.048, 0.009, 0.008], + [0.706, 0.307, 0.288], + [0.821, 0.328, 0.301], + [0.509, 0.108, 0.106], + [0.534, 0.117, 0.116], + [0.905, 0.318, 0.313], + [1.573, 0.429, 0.413], + [0.960, 0.410, 0.403], + [0.769, 0.619, 0.521], + [1.914, 1.335, 1.272], + [1.279, 0.657, 1.215], + [3.839, 2.264, 2.481], + [0.425, 0.064, 0.027], + [5.605, 0.344, 0.367], + [6.389, 0.382, 0.403], + [11.794, 0.894, 0.878], + [11.730, 0.536, 0.436], + [1.540, 0.120, 0.109], + [0.715, 0.091, 0.106], + [1.553, 0.132, 0.132], + [5.580, 0.375, 0.350], + [4.720, 0.511, 0.480], + [1.025, 0.953, 1.008], + [1.475, 0.359, 0.357], + [3.457, 0.504, 0.495], + [4.688, 3.581, 3.673], + [6.325, 1.913, 1.865], + [6.338, 1.933, 2.030], + [0.961, 0.785, 0.847], + [0.267, 0.221, 0.215], + [0.095, 0.071, 0.078], + [0.148, 0.065, 0.071], + [0.516, 0.471, 0.432], + [0.076, 0.028, 0.025], + [0.053, 0.018, 0.021], + [0.034, 0.004, 0.004] + ] + } +] diff --git a/website/benchmark/hardware/results/003_yandex_cloud_cascade_lake_4_vcpu.json b/website/benchmark/hardware/results/003_yandex_cloud_cascade_lake_4_vcpu.json new file mode 100644 index 00000000000..e10d567d86b --- /dev/null +++ b/website/benchmark/hardware/results/003_yandex_cloud_cascade_lake_4_vcpu.json @@ -0,0 +1,52 @@ +[ + { + "system": "Yandex Cloud Cascade Lake, 4 vCPU (2 threads), 16 GB RAM, 30 GB SSD", + "time": "2020-01-13 00:00:00", + "result": + [ + [0.621, 0.002, 0.002], + [0.288, 0.035, 0.030], + [1.023, 0.126, 0.132], + [5.152, 0.219, 0.194], + [0.458, 0.427, 0.447], + [6.848, 1.223, 1.232], + [0.271, 0.077, 0.058], + [0.130, 0.044, 0.032], + [3.722, 2.145, 2.159], + [2.571, 2.459, 2.490], + [0.764, 0.679, 0.721], + [0.892, 0.816, 0.816], + [5.743, 3.467, 3.294], + [5.177, 4.540, 4.596], + [5.294, 4.565, 4.510], + [5.109, 3.902, 3.845], + [14.256, 12.943, 12.882], + [8.741, 8.056, 9.738], + [30.649, 26.987, 26.702], + [2.063, 0.183, 0.239], + [54.740, 3.602, 3.559], + [54.077, 6.038, 4.264], + [107.285, 11.156, 9.986], + [114.734, 4.735, 4.673], + [15.581, 1.257, 1.249], + [3.779, 1.002, 0.992], + [4.864, 1.305, 1.305], + [55.450, 3.348, 3.230], + [46.372, 5.424, 5.263], + [6.437, 6.404, 6.179], + [11.933, 3.524, 3.546], + [20.803, 5.352, 5.216], + [43.065, 41.106, 41.870], + [58.396, 16.545, 16.610], + [51.752, 16.329, 16.221], + [6.722, 6.256, 6.391], + [0.533, 0.241, 0.237], + [0.113, 0.085, 0.077], + [0.093, 0.083, 0.074], + [0.624, 0.497, 0.492], + [0.286, 0.036, 0.028], + [0.088, 0.022, 0.021], + [0.099, 0.005, 0.005] + ] + } +] diff --git a/website/benchmark/hardware/results/004_yandex_cloud_broadwell_4_vcpu.json b/website/benchmark/hardware/results/004_yandex_cloud_broadwell_4_vcpu.json new file mode 100644 index 00000000000..c3b4f230aab --- /dev/null +++ b/website/benchmark/hardware/results/004_yandex_cloud_broadwell_4_vcpu.json @@ -0,0 +1,52 @@ +[ + { + "system": "Yandex Cloud Broadwell, 4 vCPU (2 threads), 16 GB RAM, 30 GB SSD", + "time": "2020-01-14 00:00:00", + "result": + [ + [0.507, 0.002, 0.002], + [0.267, 0.035, 0.034], + [0.970, 0.120, 0.121], + [4.600, 0.200, 0.194], + [0.554, 0.469, 0.462], + [7.314, 1.276, 1.251], + [0.164, 0.062, 0.063], + [0.159, 0.035, 0.036], + [5.551, 1.935, 1.946], + [2.291, 2.170, 2.188], + [0.718, 0.653, 0.686], + [0.841, 0.796, 0.776], + [7.636, 2.906, 2.849], + [6.644, 4.234, 3.796], + [3.847, 3.080, 3.029], + [4.308, 3.285, 3.214], + [9.768, 8.793, 8.694], + [6.103, 5.225, 6.072], + [20.421, 17.609, 17.372], + [2.141, 0.182, 0.189], + [55.415, 3.527, 3.553], + [55.961, 4.545, 4.011], + [106.069, 9.063, 8.975], + [116.871, 4.638, 4.542], + [16.100, 1.818, 1.186], + [2.543, 0.950, 0.933], + [5.086, 1.199, 1.192], + [55.720, 3.259, 3.240], + [46.784, 5.170, 5.190], + [6.505, 6.229, 6.191], + [11.382, 2.817, 2.863], + [22.205, 4.495, 4.348], + [34.430, 27.314, 27.662], + [58.643, 14.066, 14.196], + [50.675, 14.220, 13.868], + [5.674, 5.107, 5.219], + [0.577, 0.293, 0.272], + [0.151, 0.098, 0.094], + [0.107, 0.094, 0.089], + [0.692, 0.582, 0.610], + [0.302, 0.040, 0.036], + [0.101, 0.027, 0.026], + [0.094, 0.006, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/005_amd_epyc_7551.json b/website/benchmark/hardware/results/005_amd_epyc_7551.json new file mode 100644 index 00000000000..1af296d47e8 --- /dev/null +++ b/website/benchmark/hardware/results/005_amd_epyc_7551.json @@ -0,0 +1,52 @@ +[ + { + "system": "Dell PowerEdge R6415 DX180 AMD EPYC™ 7551P 32-Core Naples (Zen), 128 GB RAM, 2x SSD 960 GB RAID-1", + "time": "2020-01-13 00:00:00", + "result": + [ + [0.007, 0.002, 0.001], + [0.030, 0.016, 0.014], + [0.042, 0.026, 0.026], + [0.078, 0.043, 0.042], + [0.143, 0.120, 0.117], + [0.239, 0.198, 0.198], + [0.022, 0.014, 0.014], + [0.016, 0.013, 0.015], + [0.388, 0.380, 0.384], + [0.476, 0.429, 0.411], + [0.201, 0.192, 0.191], + [0.204, 0.207, 0.192], + [0.676, 0.654, 0.637], + [0.890, 0.932, 0.940], + [0.730, 0.789, 0.738], + [0.658, 0.641, 0.678], + [1.556, 1.430, 1.529], + [0.819, 1.096, 0.906], + [3.569, 3.626, 3.508], + [0.083, 0.047, 0.077], + [0.812, 1.010, 0.601], + [1.097, 0.847, 0.864], + [2.654, 3.146, 3.169], + [1.595, 0.922, 0.877], + [0.259, 0.227, 0.236], + [0.206, 0.187, 0.181], + [0.245, 0.235, 0.232], + [0.974, 1.018, 1.012], + [1.280, 1.398, 1.243], + [2.171, 2.270, 2.284], + [0.594, 0.592, 0.602], + [0.976, 0.946, 0.966], + [4.543, 4.471, 4.364], + [3.844, 4.052, 3.858], + [3.932, 3.961, 3.982], + [1.128, 1.117, 1.146], + [0.233, 0.216, 0.221], + [0.088, 0.082, 0.085], + [0.075, 0.070, 0.070], + [0.465, 0.445, 0.435], + [0.036, 0.026, 0.031], + [0.028, 0.024, 0.021], + [0.010, 0.006, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/006_xeon_sp_gold.json b/website/benchmark/hardware/results/006_xeon_sp_gold.json new file mode 100644 index 00000000000..a0fc33cb0b2 --- /dev/null +++ b/website/benchmark/hardware/results/006_xeon_sp_gold.json @@ -0,0 +1,52 @@ +[ + { + "system": "Dell PowerEdge R640 DX292 2x Xeon SP Gold 16-Core 2.10GHz, 196 GB RAM, 2x SSD 960 GB RAID-1", + "time": "2020-01-13 00:00:00", + "result": + [ + [0.005, 0.003, 0.003], + [0.035, 0.013, 0.016], + [0.043, 0.023, 0.023], + [0.076, 0.030, 0.027], + [0.109, 0.087, 0.098], + [0.184, 0.154, 0.151], + [0.030, 0.017, 0.016], + [0.018, 0.017, 0.016], + [0.346, 0.357, 0.375], + [0.467, 0.397, 0.410], + [0.165, 0.135, 0.137], + [0.166, 0.146, 0.143], + [0.452, 0.432, 0.415], + [0.543, 0.523, 0.527], + [0.508, 0.489, 0.472], + [0.638, 0.551, 0.549], + [1.280, 1.231, 1.272], + [0.680, 0.748, 0.611], + [2.380, 2.465, 2.351], + [0.073, 0.065, 0.040], + [0.724, 0.371, 0.376], + [0.805, 0.474, 0.450], + [1.547, 1.064, 1.117], + [1.798, 0.543, 0.507], + [0.217, 0.145, 0.142], + [0.139, 0.122, 0.133], + [0.221, 0.161, 0.159], + [0.730, 0.440, 0.449], + [0.875, 0.744, 0.721], + [1.307, 1.259, 1.318], + [0.457, 0.401, 0.404], + [0.716, 0.688, 0.617], + [4.147, 4.251, 3.844], + [2.082, 1.950, 2.187], + [2.109, 2.095, 1.930], + [0.875, 0.851, 0.848], + [0.233, 0.235, 0.221], + [0.103, 0.087, 0.086], + [0.087, 0.078, 0.078], + [0.452, 0.407, 0.403], + [0.047, 0.041, 0.054], + [0.036, 0.034, 0.035], + [0.013, 0.010, 0.010] + ] + } +] diff --git a/website/benchmark/hardware/results/007_xeon_e5_2650.json b/website/benchmark/hardware/results/007_xeon_e5_2650.json new file mode 100644 index 00000000000..5c2d79a34c2 --- /dev/null +++ b/website/benchmark/hardware/results/007_xeon_e5_2650.json @@ -0,0 +1,52 @@ +[ + { + "system": "E5-2650 v2 @ 2.60GHz, 2 sockets, 16 threads, 8xHDD RAID-5", + "time": "2020-01-12 00:00:00", + "result": + [ + [0.101, 0.002, 0.002], + [0.196, 0.019, 0.021], + [0.486, 0.035, 0.029], + [0.413, 0.045, 0.043], + [0.368, 0.134, 0.105], + [0.563, 0.282, 0.269], + [0.078, 0.030, 0.025], + [0.070, 0.019, 0.014], + [0.751, 0.522, 0.558], + [0.856, 0.549, 0.547], + [0.458, 0.155, 0.163], + [0.439, 0.169, 0.190], + [0.929, 0.699, 0.608], + [1.494, 0.863, 0.902], + [1.379, 0.778, 0.794], + [1.032, 0.832, 0.851], + [2.364, 1.974, 1.914], + [1.284, 1.140, 1.043], + [4.745, 4.279, 4.294], + [0.713, 0.085, 0.071], + [4.133, 0.775, 0.729], + [3.485, 0.924, 0.880], + [7.568, 1.808, 1.853], + [9.496, 1.115, 1.119], + [1.130, 0.209, 0.243], + [0.643, 0.225, 0.211], + [1.338, 0.293, 0.233], + [4.353, 0.803, 0.759], + [2.667, 1.158, 1.070], + [2.612, 1.753, 1.721], + [1.370, 0.641, 0.704], + [2.348, 0.977, 1.015], + [6.154, 5.822, 5.696], + [4.553, 3.076, 3.232], + [4.647, 2.960, 3.249], + [1.441, 1.424, 1.285], + [0.560, 0.303, 0.245], + [0.223, 0.082, 0.084], + [0.275, 0.078, 0.076], + [0.929, 0.487, 0.416], + [0.362, 0.033, 0.049], + [0.179, 0.035, 0.022], + [0.075, 0.013, 0.013] + ] + } +] diff --git a/website/benchmark/hardware/results/008_skylake_kvm.json b/website/benchmark/hardware/results/008_skylake_kvm.json new file mode 100644 index 00000000000..2ddff47a460 --- /dev/null +++ b/website/benchmark/hardware/results/008_skylake_kvm.json @@ -0,0 +1,52 @@ +[ + { + "system": "Time4vps.eu VPS (KVM) Linux Ubuntu 4 Core (Skylake) 16GB RAM 160GB Disk", + "time": "2020-01-13 00:00:00", + "result": + [ + [0.068, 0.002, 0.002], + [0.124, 0.021, 0.025], + [0.594, 0.089, 0.077], + [2.300, 0.133, 0.090], + [2.710, 0.205, 0.212], + [5.203, 0.603, 0.610], + [0.090, 0.029, 0.036], + [0.118, 0.021, 0.022], + [5.977, 1.295, 1.206], + [3.909, 1.415, 1.452], + [2.551, 0.336, 0.324], + [3.123, 0.446, 0.409], + [4.075, 1.743, 1.661], + [6.427, 2.499, 2.487], + [5.775, 2.156, 2.431], + [3.322, 2.288, 2.276], + [8.642, 6.463, 6.690], + [6.365, 3.852, 3.757], + [20.426, 13.849, 13.695], + [2.507, 0.105, 0.100], + [30.691, 1.747, 1.699], + [30.206, 2.010, 1.943], + [57.155, 4.699, 4.859], + [50.924, 2.173, 2.119], + [10.907, 0.660, 0.686], + [3.636, 0.505, 0.524], + [8.388, 0.683, 0.627], + [27.423, 1.650, 1.703], + [21.309, 2.824, 2.821], + [4.227, 4.053, 4.037], + [8.198, 1.797, 1.776], + [18.853, 2.927, 2.881], + [22.254, 21.156, 20.854], + [29.323, 8.728, 8.621], + [27.889, 8.759, 9.063], + [4.121, 3.837, 3.934], + [0.452, 0.292, 0.247], + [0.221, 0.093, 0.090], + [0.331, 0.069, 0.074], + [0.703, 0.469, 0.506], + [0.211, 0.026, 0.027], + [0.134, 0.021, 0.021], + [0.121, 0.007, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/009_core_i5_3210M_lenovo_b580.json b/website/benchmark/hardware/results/009_core_i5_3210M_lenovo_b580.json new file mode 100644 index 00000000000..dbbbf93b338 --- /dev/null +++ b/website/benchmark/hardware/results/009_core_i5_3210M_lenovo_b580.json @@ -0,0 +1,52 @@ +[ + { + "system": "Lenovo B580 Laptop (i5-3210M)", + "time": "2020-01-11 00:00:00", + "result": + [ + [0.035, 0.003, 0.005], + [0.093, 0.064, 0.060], + [0.265, 0.170, 0.167], + [0.880, 0.251, 0.266], + [0.954, 0.593, 0.561], + [2.140, 1.506, 1.525], + [0.148, 0.096, 0.105], + [0.064, 0.048, 0.044], + [2.727, 2.330, 2.280], + [3.386, 3.210, 2.951], + [1.218, 0.787, 0.749], + [1.293, 0.915, 0.904], + [3.713, 3.224, 3.190], + [4.943, 4.338, 4.310], + [4.503, 3.999, 3.918], + [4.001, 3.686, 4.144], + [10.714, 10.011, 10.035], + [7.456, 6.556, 6.675], + [20.201, 19.238, 19.135], + [0.888, 0.217, 0.209], + [9.685, 4.144, 4.023], + [11.201, 4.648, 4.636], + [21.037, 10.712, 10.571], + [18.186, 4.743, 4.743], + [2.844, 1.379, 1.358], + [1.623, 1.138, 1.130], + [2.861, 1.394, 1.417], + [9.691, 4.191, 4.129], + [10.285, 7.381, 7.379], + [6.879, 6.871, 6.829], + [4.131, 3.336, 3.240], + [7.157, 4.666, 4.616], + [29.371, 36.392, 29.946], + [17.929, 14.223, 14.127], + [17.058, 13.998, 14.055], + [5.667, 5.460, 5.408], + [0.325, 0.230, 0.217], + [0.115, 0.101, 0.094], + [0.148, 0.093, 0.084], + [0.585, 0.464, 0.459], + [0.078, 0.042, 0.035], + [0.057, 0.038, 0.032], + [0.024, 0.011, 0.010] + ] + } +] diff --git a/website/benchmark/hardware/results/010_xeon_e5_2640v4.json b/website/benchmark/hardware/results/010_xeon_e5_2640v4.json new file mode 100644 index 00000000000..d81c9a2a2a0 --- /dev/null +++ b/website/benchmark/hardware/results/010_xeon_e5_2640v4.json @@ -0,0 +1,52 @@ +[ + { + "system": "Dell PowerEdge R730xd, 2 socket 10 cores E5-2640 v4, HW RAID5 3TBx12 SATA", + "time": "2020-01-14 00:00:00", + "result": + [ + [0.225, 0.001, 0.002], + [0.534, 0.010, 0.010], + [0.229, 0.025, 0.026], + [0.530, 0.042, 0.040], + [0.265, 0.094, 0.090], + [0.685, 0.224, 0.219], + [0.172, 0.013, 0.013], + [0.181, 0.010, 0.011], + [0.908, 0.418, 0.424], + [0.725, 0.450, 0.462], + [0.517, 0.138, 0.139], + [0.445, 0.168, 0.161], + [1.065, 0.585, 0.584], + [1.325, 0.756, 0.747], + [1.184, 0.627, 0.637], + [0.905, 0.676, 0.699], + [2.101, 1.848, 1.775], + [1.275, 0.927, 0.988], + [5.285, 4.201, 4.088], + [0.465, 0.040, 0.071], + [7.380, 0.557, 0.538], + [7.636, 0.665, 0.718], + [13.905, 1.685, 1.645], + [18.739, 0.828, 0.790], + [1.950, 0.195, 0.185], + [0.549, 0.163, 0.156], + [1.384, 0.205, 0.190], + [7.199, 0.650, 0.611], + [6.514, 0.935, 0.885], + [2.154, 2.034, 2.031], + [1.538, 0.546, 0.525], + [3.711, 0.916, 0.936], + [5.993, 4.973, 5.183], + [8.215, 2.759, 2.741], + [8.162, 2.795, 2.772], + [1.347, 1.074, 1.051], + [0.478, 0.208, 0.204], + [0.147, 0.077, 0.074], + [0.197, 0.066, 0.066], + [0.694, 0.438, 0.453], + [0.217, 0.024, 0.033], + [0.137, 0.032, 0.020], + [0.058, 0.006, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/011_yandex_managed_clickhouse_s3_3xlarge.json b/website/benchmark/hardware/results/011_yandex_managed_clickhouse_s3_3xlarge.json new file mode 100644 index 00000000000..5b0c38ec60d --- /dev/null +++ b/website/benchmark/hardware/results/011_yandex_managed_clickhouse_s3_3xlarge.json @@ -0,0 +1,103 @@ +[ + { + "system": "Yandex Managed ClickHouse, s3.3xlarge, Cascade Lake 32 vCPU, 128 GB RAM, 1 TB local SSD", + "time": "2020-01-14 00:00:00", + "result": + [ + [0.039, 0.041, 0.046], + [0.079, 0.084, 0.081], + [0.069, 0.068, 0.060], + [0.067, 0.065, 0.073], + [0.114, 0.116, 0.115], + [0.176, 0.171, 0.166], + [0.055, 0.058, 0.065], + [0.060, 0.061, 0.057], + [0.290, 0.290, 0.289], + [0.323, 0.315, 0.309], + [0.128, 0.124, 0.137], + [0.144, 0.136, 0.136], + [0.344, 0.326, 0.325], + [0.402, 0.392, 0.431], + [0.430, 0.436, 0.414], + [0.509, 0.485, 0.485], + [1.233, 1.151, 1.167], + [0.652, 0.651, 0.631], + [2.078, 2.076, 2.046], + [0.070, 0.071, 0.070], + [0.358, 0.327, 0.355], + [0.428, 0.403, 0.405], + [0.974, 0.986, 0.976], + [0.532, 0.549, 0.500], + [0.164, 0.169, 0.158], + [0.128, 0.130, 0.138], + [0.166, 0.169, 0.159], + [0.428, 0.449, 0.471], + [0.586, 0.598, 0.568], + [1.115, 1.115, 1.147], + [0.342, 0.344, 0.342], + [0.481, 0.473, 0.470], + [4.436, 3.273, 3.320], + [1.661, 1.542, 1.545], + [1.573, 1.875, 1.576], + [0.600, 0.566, 0.586], + [0.261, 0.255, 0.251], + [0.134, 0.136, 0.120], + [0.135, 0.133, 0.132], + [0.525, 0.531, 0.521], + [0.073, 0.071, 0.068], + [0.060, 0.071, 0.071], + [0.051, 0.057, 0.050] + ] + }, + + { + "system": "Yandex Managed ClickHouse, s3.3xlarge, Cascade Lake 32 vCPU, 128 GB RAM, 12.5 TB local HDD", + "time": "2020-01-14 00:00:00", + "result": + [ + [0.049, 0.049, 0.045], + [0.297, 0.079, 0.068], + [0.350, 0.059, 0.065], + [1.099, 0.068, 0.064], + [0.120, 0.123, 0.117], + [1.847, 0.202, 0.191], + [0.124, 0.056, 0.060], + [0.062, 0.058, 0.055], + [0.681, 0.321, 0.312], + [0.346, 0.349, 0.368], + [0.200, 0.140, 0.145], + [0.227, 0.142, 0.152], + [0.447, 0.410, 0.414], + [0.499, 0.570, 0.487], + [0.658, 0.467, 0.465], + [0.587, 0.576, 0.580], + [1.376, 1.340, 1.290], + [0.746, 0.748, 0.717], + [3.608, 2.427, 2.455], + [0.073, 0.106, 0.076], + [9.138, 0.422, 0.386], + [0.615, 0.463, 0.449], + [12.166, 1.067, 1.094], + [13.856, 0.735, 0.548], + [0.194, 0.187, 0.179], + [0.145, 0.140, 0.151], + [0.185, 0.180, 0.182], + [0.493, 0.507, 0.498], + [9.379, 0.618, 0.617], + [1.182, 1.187, 1.268], + [1.088, 0.402, 0.407], + [3.943, 0.646, 0.538], + [4.547, 3.858, 4.407], + [13.240, 1.776, 1.808], + [1.760, 1.823, 1.918], + [0.719, 0.693, 0.729], + [0.346, 0.309, 0.279], + [0.151, 0.143, 0.134], + [0.191, 0.141, 0.149], + [0.804, 0.550, 0.686], + [0.147, 0.093, 0.086], + [0.099, 0.085, 0.084], + [0.056, 0.059, 0.057] + ] + } +] diff --git a/website/benchmark/hardware/results/012_dell_r530.json b/website/benchmark/hardware/results/012_dell_r530.json new file mode 100644 index 00000000000..93d7234df55 --- /dev/null +++ b/website/benchmark/hardware/results/012_dell_r530.json @@ -0,0 +1,52 @@ +[ + { + "system": "Dell R530, 128GB DDR4, 2x480 GB SATA SSD, Perc H730 RAID-1", + "time": "2020-01-14 00:00:00", + "result": + [ + [0.027, 0.002, 0.002], + [0.147, 0.017, 0.016], + [0.328, 0.034, 0.033], + [1.059, 0.050, 0.044], + [1.334, 0.123, 0.118], + [2.579, 0.239, 0.264], + [0.057, 0.020, 0.019], + [0.036, 0.019, 0.018], + [2.079, 0.648, 0.569], + [2.012, 0.631, 0.634], + [1.454, 0.158, 0.160], + [1.502, 0.178, 0.185], + [3.095, 0.722, 0.661], + [3.675, 0.816, 0.809], + [2.900, 0.903, 0.810], + [2.005, 0.861, 0.842], + [4.103, 1.983, 2.004], + [2.948, 1.200, 1.160], + [7.687, 4.411, 4.239], + [1.087, 0.054, 0.062], + [14.186, 0.651, 0.757], + [16.497, 0.739, 0.676], + [23.165, 1.703, 1.700], + [22.803, 0.898, 0.919], + [4.247, 0.317, 0.267], + [2.519, 0.214, 0.246], + [4.115, 0.316, 0.274], + [13.759, 0.805, 0.827], + [16.473, 1.215, 1.062], + [2.034, 1.870, 2.016], + [3.152, 0.677, 0.697], + [6.630, 1.216, 1.019], + [9.651, 6.131, 6.017], + [23.506, 3.416, 3.294], + [23.271, 3.547, 3.411], + [1.763, 1.344, 1.308], + [0.317, 0.215, 0.227], + [0.122, 0.090, 0.087], + [0.168, 0.074, 0.090], + [0.565, 0.419, 0.450], + [0.079, 0.037, 0.030], + [0.059, 0.032, 0.032], + [0.025, 0.015, 0.010] + ] + } +] diff --git a/website/benchmark/hardware/results/013_xeon_2176g.json b/website/benchmark/hardware/results/013_xeon_2176g.json new file mode 100644 index 00000000000..7df8050fc81 --- /dev/null +++ b/website/benchmark/hardware/results/013_xeon_2176g.json @@ -0,0 +1,52 @@ +[ + { + "system": "Xeon 2176G, 64GB RAM, 2xSSD 960GB (SAMSUNG MZQLB960HAJR-00007), ZFS RAID-1", + "time": "2020-01-14 00:00:00", + "result": + [ + [0.001, 0.001, 0.001], + [0.010, 0.011, 0.009], + [0.035, 0.031, 0.033], + [0.058, 0.056, 0.058], + [0.113, 0.126, 0.121], + [0.296, 0.300, 0.301], + [0.017, 0.016, 0.016], + [0.009, 0.009, 0.011], + [0.660, 0.659, 0.655], + [0.775, 0.746, 0.737], + [0.185, 0.181, 0.184], + [0.219, 0.237, 0.243], + [0.943, 0.933, 0.952], + [1.228, 1.185, 1.201], + [0.975, 0.963, 0.971], + [1.068, 1.086, 1.077], + [2.704, 2.713, 2.725], + [1.596, 1.564, 1.562], + [5.653, 5.571, 5.581], + [0.072, 0.065, 0.062], + [1.209, 0.958, 0.951], + [1.383, 1.222, 1.224], + [3.261, 2.771, 2.776], + [1.586, 1.210, 1.196], + [0.417, 0.392, 0.325], + [0.271, 0.268, 0.267], + [0.340, 0.338, 0.337], + [1.376, 1.160, 1.134], + [1.928, 1.643, 1.697], + [3.167, 3.135, 3.149], + [0.947, 0.859, 0.858], + [1.566, 1.446, 1.467], + [8.005, 8.065, 7.980], + [4.640, 4.322, 4.277], + [4.410, 4.330, 4.300], + [1.811, 1.749, 1.767], + [0.138, 0.142, 0.144], + [0.052, 0.047, 0.048], + [0.042, 0.043, 0.041], + [0.271, 0.249, 0.245], + [0.030, 0.016, 0.016], + [0.014, 0.013, 0.013], + [0.004, 0.004, 0.004] + ] + } +] diff --git a/website/benchmark/hardware/results/014_azure_ds3v2.json b/website/benchmark/hardware/results/014_azure_ds3v2.json new file mode 100644 index 00000000000..ed8d21be847 --- /dev/null +++ b/website/benchmark/hardware/results/014_azure_ds3v2.json @@ -0,0 +1,102 @@ +[ + { + "system": "Azure DS3v2 4vcpu 14GB RAM 1TB Standard SSD", + "time": "2020-01-15 00:00:00", + "result": + [ + [0.709, 0.004, 0.004], + [1.052, 0.028, 0.025], + [2.075, 0.077, 0.080], + [2.700, 0.104, 0.101], + [2.858, 0.267, 0.259], + [4.058, 0.737, 0.718], + [0.597, 0.038, 0.049], + [0.598, 0.025, 0.024], + [3.786, 1.324, 1.313], + [3.982, 1.579, 1.562], + [2.995, 0.395, 0.395], + [3.279, 0.467, 0.470], + [4.301, 1.674, 1.690], + [6.499, 2.126, 2.132], + [4.774, 1.886, 1.927], + [3.484, 1.872, 1.818], + [7.813, 4.801, 5.006], + [6.032, 3.162, 3.106], + [13.991, 10.573, 10.665], + [2.750, 0.118, 0.101], + [25.608, 1.978, 1.960], + [29.117, 2.297, 2.303], + [53.220, 5.367, 5.325], + [51.767, 2.669, 2.465], + [7.509, 0.890, 0.865], + [3.827, 0.666, 0.653], + [7.574, 0.918, 0.899], + [25.753, 1.904, 1.898], + [21.624, 3.269, 3.192], + [5.454, 4.966, 4.975], + [6.569, 1.870, 1.912], + [14.536, 2.844, 2.863], + [18.908, 16.591, 16.820], + [27.527, 7.790, 7.738], + [27.556, 7.694, 7.695], + [4.168, 3.568, 3.426], + [1.185, 0.307, 0.252], + [0.483, 0.096, 0.093], + [0.519, 0.086, 0.088], + [1.274, 0.525, 0.496], + [1.048, 0.033, 0.034], + [0.379, 0.027, 0.036], + [0.599, 0.010, 0.009] + ] + }, + { + "system": "Azure DS3v2 4vcpu 14GB RAM 1TB Premium SSD", + "time": "2020-01-15 00:00:00", + "result": + [ + [0.047, 0.004, 0.003], + [0.078, 0.023, 0.023], + [0.312, 0.077, 0.077], + [1.202, 0.105, 0.103], + [1.216, 0.260, 0.264], + [1.896, 0.751, 0.726], + [0.122, 0.041, 0.038], + [0.095, 0.028, 0.025], + [1.848, 1.304, 1.375], + [2.104, 1.534, 1.535], + [1.298, 0.394, 0.397], + [1.363, 0.469, 0.479], + [2.296, 1.728, 1.650], + [3.540, 2.320, 2.177], + [2.542, 1.863, 1.847], + [2.047, 1.861, 1.873], + [5.203, 4.830, 4.882], + [3.466, 3.131, 3.197], + [10.795, 10.396, 10.516], + [1.244, 0.111, 0.105], + [13.163, 2.019, 1.932], + [14.969, 2.346, 2.340], + [27.664, 5.259, 5.309], + [26.819, 2.589, 2.464], + [3.795, 0.902, 0.866], + [1.867, 0.665, 0.672], + [3.822, 0.919, 0.903], + [13.173, 1.916, 1.886], + [11.168, 3.253, 3.214], + [5.126, 5.290, 4.982], + [3.465, 1.866, 1.875], + [7.902, 3.009, 2.803], + [17.132, 17.154, 17.387], + [15.132, 7.755, 7.678], + [15.054, 7.779, 8.068], + [3.598, 3.590, 3.501], + [0.483, 0.279, 0.263], + [0.183, 0.094, 0.095], + [0.174, 0.084, 0.096], + [0.693, 0.480, 0.503], + [0.237, 0.038, 0.031], + [0.108, 0.029, 0.028], + [0.096, 0.010, 0.009] + ] + } +] diff --git a/website/benchmark/hardware/results/015_aws_i3_8xlarge.json b/website/benchmark/hardware/results/015_aws_i3_8xlarge.json new file mode 100644 index 00000000000..ddb87c457a5 --- /dev/null +++ b/website/benchmark/hardware/results/015_aws_i3_8xlarge.json @@ -0,0 +1,52 @@ +[ + { + "system": "AWS i3.8xlarge 32vCPU 244GiB 4x1900 NVMe SSD", + "time": "2020-01-15 00:00:00", + "result": + [ + [0.009, 0.002, 0.002], + [0.053, 0.040, 0.021], + [0.043, 0.028, 0.027], + [0.109, 0.036, 0.035], + [0.147, 0.108, 0.100], + [0.296, 0.239, 0.239], + [0.017, 0.013, 0.015], + [0.013, 0.010, 0.011], + [0.524, 0.460, 0.445], + [0.589, 0.519, 0.510], + [0.186, 0.142, 0.140], + [0.210, 0.167, 0.164], + [0.659, 0.584, 0.529], + [0.781, 0.679, 0.665], + [0.709, 0.630, 0.613], + [0.642, 0.590, 0.588], + [1.723, 1.564, 1.557], + [1.027, 0.925, 0.909], + [3.618, 3.432, 3.411], + [0.123, 0.037, 0.049], + [1.318, 0.587, 0.570], + [1.368, 0.655, 0.646], + [2.847, 1.518, 1.495], + [2.431, 0.812, 0.764], + [0.366, 0.213, 0.193], + [0.237, 0.167, 0.158], + [0.374, 0.204, 0.211], + [1.310, 0.590, 0.597], + [1.260, 0.877, 0.870], + [1.966, 1.952, 1.967], + [0.692, 0.571, 0.566], + [1.080, 0.823, 0.827], + [5.017, 4.816, 4.843], + [3.072, 2.661, 2.726], + [3.006, 2.711, 2.688], + [1.071, 0.999, 1.024], + [0.231, 0.221, 0.221], + [0.094, 0.090, 0.086], + [0.093, 0.085, 0.075], + [0.488, 0.432, 0.451], + [0.046, 0.029, 0.030], + [0.030, 0.023, 0.022], + [0.012, 0.007, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/016_aws_m5d_24xlarge.json b/website/benchmark/hardware/results/016_aws_m5d_24xlarge.json new file mode 100644 index 00000000000..f1b80c6269b --- /dev/null +++ b/website/benchmark/hardware/results/016_aws_m5d_24xlarge.json @@ -0,0 +1,52 @@ +[ + { + "system": "AWS m5d.24xlarge 96vCPU 384GiB 4x900 NVMe SSD", + "time": "2020-01-15 00:00:00", + "result": + [ + [0.012, 0.002, 0.002], + [0.061, 0.017, 0.008], + [0.043, 0.014, 0.014], + [0.160, 0.017, 0.016], + [0.193, 0.074, 0.075], + [0.300, 0.120, 0.118], + [0.023, 0.009, 0.009], + [0.015, 0.009, 0.009], + [0.321, 0.206, 0.203], + [0.351, 0.238, 0.244], + [0.205, 0.113, 0.112], + [0.211, 0.106, 0.091], + [0.394, 0.213, 0.211], + [0.519, 0.270, 0.259], + [0.439, 0.292, 0.286], + [0.394, 0.301, 0.296], + [1.195, 0.829, 0.806], + [0.561, 0.743, 0.418], + [1.841, 1.660, 1.650], + [0.163, 0.041, 0.026], + [1.632, 0.251, 0.269], + [1.885, 0.265, 0.265], + [3.425, 0.644, 0.620], + [3.839, 0.431, 0.367], + [0.486, 0.092, 0.086], + [0.256, 0.081, 0.091], + [0.493, 0.107, 0.106], + [1.646, 0.275, 0.255], + [1.445, 0.332, 0.332], + [0.768, 0.702, 0.721], + [0.509, 0.280, 0.268], + [1.071, 0.382, 0.374], + [2.800, 2.452, 2.389], + [2.159, 1.134, 1.181], + [2.153, 1.145, 1.200], + [0.516, 0.457, 0.493], + [0.256, 0.182, 0.188], + [0.091, 0.073, 0.070], + [0.121, 0.063, 0.064], + [0.506, 0.399, 0.421], + [0.055, 0.030, 0.027], + [0.041, 0.019, 0.023], + [0.016, 0.006, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/017_aws_i3en_24xlarge.json b/website/benchmark/hardware/results/017_aws_i3en_24xlarge.json new file mode 100644 index 00000000000..7f33b99b127 --- /dev/null +++ b/website/benchmark/hardware/results/017_aws_i3en_24xlarge.json @@ -0,0 +1,52 @@ +[ + { + "system": "AWS i3en.24xlarge 96vCPU 768GiB 8x7500 NVMe SSD", + "time": "2020-01-15 00:00:00", + "result": + [ + [0.010, 0.002, 0.002], + [0.067, 0.009, 0.009], + [0.040, 0.014, 0.013], + [0.120, 0.017, 0.017], + [0.159, 0.076, 0.077], + [0.240, 0.116, 0.119], + [0.020, 0.010, 0.009], + [0.015, 0.010, 0.009], + [0.279, 0.195, 0.197], + [0.299, 0.230, 0.258], + [0.199, 0.088, 0.111], + [0.185, 0.094, 0.094], + [0.327, 0.212, 0.206], + [0.439, 0.271, 0.267], + [0.370, 0.281, 0.280], + [0.367, 0.306, 0.312], + [1.092, 0.931, 1.022], + [0.533, 0.599, 0.413], + [1.629, 1.921, 1.572], + [0.130, 0.031, 0.026], + [1.451, 0.264, 0.269], + [1.714, 0.273, 0.261], + [3.668, 0.636, 0.669], + [3.837, 0.472, 0.402], + [0.378, 0.107, 0.079], + [0.199, 0.070, 0.088], + [0.381, 0.104, 0.086], + [1.426, 0.284, 0.272], + [1.246, 0.363, 0.360], + [0.737, 0.708, 0.741], + [0.426, 0.246, 0.284], + [0.877, 0.420, 0.384], + [2.698, 2.390, 2.375], + [1.918, 1.223, 1.122], + [1.909, 1.234, 1.217], + [0.486, 0.482, 0.473], + [0.235, 0.187, 0.200], + [0.083, 0.069, 0.072], + [0.111, 0.063, 0.062], + [0.473, 0.433, 0.406], + [0.050, 0.028, 0.027], + [0.038, 0.022, 0.021], + [0.012, 0.006, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/018_huawei_taishan_2280_v2.json b/website/benchmark/hardware/results/018_huawei_taishan_2280_v2.json new file mode 100644 index 00000000000..329ebe782b9 --- /dev/null +++ b/website/benchmark/hardware/results/018_huawei_taishan_2280_v2.json @@ -0,0 +1,52 @@ +[ + { + "system": "Huawei TaiShan 2280 v2 (AArch64) 64 core (2-die), one physical HDD", + "time": "2020-01-15 00:00:00", + "result": + [ + [0.356, 0.002, 0.002], + [0.333, 0.018, 0.017], + [0.608, 0.021, 0.021], + [1.885, 0.032, 0.032], + [0.598, 0.099, 0.097], + [2.884, 0.165, 0.167], + [0.356, 0.016, 0.014], + [0.349, 0.015, 0.015], + [0.981, 0.283, 0.296], + [0.783, 0.326, 0.328], + [0.580, 0.135, 0.136], + [0.511, 0.142, 0.142], + [1.060, 0.434, 0.438], + [1.069, 0.569, 0.566], + [1.116, 0.479, 0.479], + [0.825, 0.478, 0.486], + [1.899, 1.574, 1.590], + [1.260, 0.874, 0.849], + [5.456, 2.869, 2.903], + [0.418, 0.037, 0.034], + [19.336, 0.478, 0.494], + [22.442, 0.595, 0.595], + [45.958, 8.735, 1.363], + [41.321, 0.675, 0.706], + [6.074, 0.167, 0.159], + [0.925, 0.133, 0.133], + [1.151, 0.153, 0.152], + [19.627, 0.607, 0.622], + [16.496, 0.792, 0.787], + [1.770, 2.045, 1.242], + [4.827, 0.471, 0.466], + [7.695, 0.701, 0.647], + [5.246, 4.741, 4.676], + [20.496, 2.676, 2.628], + [20.338, 2.559, 2.557], + [1.696, 0.701, 0.724], + [0.665, 0.294, 0.302], + [0.402, 0.140, 0.137], + [0.366, 0.082, 0.086], + [0.867, 0.575, 0.552], + [0.334, 0.025, 0.025], + [0.333, 0.023, 0.022], + [0.340, 0.007, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/019_aws_m5ad_24xlarge.json b/website/benchmark/hardware/results/019_aws_m5ad_24xlarge.json new file mode 100644 index 00000000000..2b691153822 --- /dev/null +++ b/website/benchmark/hardware/results/019_aws_m5ad_24xlarge.json @@ -0,0 +1,52 @@ +[ + { + "system": "AWS m5ad.24xlarge 96vCPU 384GiB 4x900 NVMe SSD, AMD EPYC 7000 series 2.5 GHz", + "time": "2020-01-17 00:00:00", + "result": + [ + [0.013, 0.002, 0.002], + [0.055, 0.020, 0.025], + [0.054, 0.027, 0.026], + [0.154, 0.035, 0.035], + [0.221, 0.117, 0.118], + [0.325, 0.171, 0.166], + [0.042, 0.021, 0.017], + [0.025, 0.017, 0.018], + [0.353, 0.253, 0.253], + [0.477, 0.610, 0.720], + [0.257, 0.154, 0.139], + [0.251, 0.130, 0.114], + [0.513, 0.293, 0.286], + [0.618, 0.360, 0.350], + [0.468, 0.336, 0.329], + [0.390, 0.333, 0.411], + [1.112, 0.936, 1.497], + [2.434, 1.350, 0.886], + [2.590, 2.069, 2.331], + [0.160, 0.048, 0.036], + [1.638, 0.334, 0.312], + [1.841, 0.423, 0.373], + [3.673, 1.122, 1.078], + [3.808, 0.912, 0.494], + [0.480, 0.112, 0.120], + [0.248, 0.107, 0.099], + [0.470, 0.118, 0.114], + [1.648, 0.544, 0.469], + [1.418, 0.583, 0.624], + [0.966, 1.231, 0.999], + [0.539, 0.311, 0.370], + [1.159, 0.712, 0.716], + [3.755, 2.772, 2.973], + [2.748, 2.033, 2.242], + [2.842, 2.150, 2.019], + [0.784, 0.616, 0.641], + [0.304, 0.273, 0.235], + [0.106, 0.086, 0.093], + [0.117, 0.073, 0.075], + [0.604, 0.453, 0.502], + [0.050, 0.036, 0.034], + [0.043, 0.023, 0.027], + [0.013, 0.008, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/020_core_i7_8550u_lenovo_x1.json b/website/benchmark/hardware/results/020_core_i7_8550u_lenovo_x1.json new file mode 100644 index 00000000000..57d1d8198ee --- /dev/null +++ b/website/benchmark/hardware/results/020_core_i7_8550u_lenovo_x1.json @@ -0,0 +1,52 @@ +[ + { + "system": "Lenovo Thinkpad X1 Carbon 6th Gen i7-8550U CPU @ 1.80GHz 4 threads, 16 GiB", + "time": "2020-01-18 00:00:00", + "result": + [ + [0.006, 0.002, 0.002], + [0.031, 0.019, 0.020], + [0.082, 0.078, 0.080], + [0.157, 0.093, 0.092], + [0.274, 0.214, 0.206], + [0.601, 0.513, 0.513], + [0.038, 0.045, 0.041], + [0.023, 0.018, 0.018], + [1.394, 1.378, 1.323], + [1.567, 1.496, 1.483], + [0.406, 0.328, 0.327], + [0.468, 0.414, 0.397], + [1.846, 1.753, 1.737], + [2.492, 2.423, 2.404], + [2.136, 2.064, 2.078], + [2.038, 1.971, 1.971], + [5.794, 5.679, 5.708], + [3.430, 3.498, 3.356], + [11.946, 11.738, 11.700], + [0.158, 0.105, 0.091], + [2.151, 1.551, 1.593], + [2.581, 1.990, 1.985], + [6.101, 5.390, 5.320], + [3.528, 2.341, 2.322], + [0.772, 0.699, 0.701], + [0.606, 0.583, 0.587], + [0.877, 0.723, 0.728], + [2.398, 1.916, 1.924], + [3.634, 3.272, 3.247], + [4.102, 4.082, 4.078], + [1.885, 1.784, 1.741], + [2.994, 2.691, 2.707], + [19.060, 18.852, 18.929], + [8.745, 8.476, 8.553], + [8.685, 8.406, 8.946], + [3.416, 3.426, 3.397], + [0.238, 0.234, 0.210], + [0.080, 0.071, 0.072], + [0.078, 0.066, 0.066], + [0.470, 0.407, 0.396], + [0.034, 0.030, 0.029], + [0.025, 0.021, 0.021], + [0.010, 0.007, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/021_xeon_e5645.json b/website/benchmark/hardware/results/021_xeon_e5645.json new file mode 100644 index 00000000000..2f3f96232de --- /dev/null +++ b/website/benchmark/hardware/results/021_xeon_e5645.json @@ -0,0 +1,52 @@ +[ + { + "system": "E5645 @ 2.40GHz, 2 sockets, 12 threads, 96 GiB, 14 x 2TB HDD RAID-10", + "time": "2020-01-18 00:00:00", + "result": + [ + [0.061, 0.003, 0.003], + [0.203, 0.026, 0.019], + [0.231, 0.056, 0.060], + [0.533, 0.080, 0.099], + [0.458, 0.202, 0.213], + [0.723, 0.468, 0.411], + [0.143, 0.034, 0.029], + [0.117, 0.025, 0.023], + [1.033, 0.810, 0.745], + [1.165, 0.916, 0.898], + [0.514, 0.249, 0.297], + [0.600, 0.343, 0.385], + [1.294, 1.156, 1.221], + [1.859, 1.459, 1.384], + [1.627, 1.349, 1.346], + [1.414, 1.269, 1.306], + [3.798, 3.774, 3.631], + [2.177, 2.054, 2.016], + [7.002, 6.187, 6.263], + [0.461, 0.081, 0.116], + [3.860, 1.296, 1.330], + [4.705, 1.587, 1.503], + [9.533, 3.887, 3.564], + [11.468, 1.932, 1.712], + [1.362, 0.451, 0.403], + [0.648, 0.374, 0.414], + [1.195, 0.437, 0.418], + [4.187, 1.686, 1.474], + [3.289, 2.146, 2.159], + [3.919, 4.242, 4.208], + [1.673, 1.084, 1.040], + [3.264, 1.496, 1.629], + [8.883, 8.965, 9.027], + [5.813, 5.225, 5.365], + [5.874, 5.376, 5.353], + [2.053, 1.910, 1.951], + [0.478, 0.324, 0.325], + [0.206, 0.132, 0.124], + [0.222, 0.105, 0.111], + [0.699, 0.599, 0.563], + [0.213, 0.041, 0.040], + [0.133, 0.032, 0.040], + [0.062, 0.010, 0.010] + ] + } +] diff --git a/website/benchmark/hardware/results/022_amd_epyc_7402p.json b/website/benchmark/hardware/results/022_amd_epyc_7402p.json new file mode 100644 index 00000000000..1f03baeb94e --- /dev/null +++ b/website/benchmark/hardware/results/022_amd_epyc_7402p.json @@ -0,0 +1,152 @@ +[ + { + "system": "AMD EPYC 7402P 2.8 GHz, 128 GB DDR4, SSD RAID1 2×1920 GB SSD (INTEL SSDSC2KB019T7)", + "time": "2020-01-26 00:00:00", + "result": + [ + [0.014, 0.002, 0.002], + [0.031, 0.014, 0.010], + [0.077, 0.015, 0.015], + [0.255, 0.020, 0.019], + [0.286, 0.075, 0.073], + [0.452, 0.136, 0.135], + [0.025, 0.012, 0.012], + [0.021, 0.011, 0.011], + [0.431, 0.188, 0.188], + [0.491, 0.213, 0.214], + [0.308, 0.099, 0.097], + [0.319, 0.102, 0.098], + [0.491, 0.247, 0.248], + [0.786, 0.323, 0.316], + [0.574, 0.291, 0.291], + [0.414, 0.266, 0.267], + [1.097, 0.847, 0.835], + [0.748, 0.507, 0.505], + [1.977, 1.467, 1.488], + [0.264, 0.018, 0.029], + [2.937, 0.281, 0.254], + [3.288, 0.301, 0.283], + [6.502, 0.698, 0.687], + [7.260, 0.358, 0.351], + [0.796, 0.096, 0.095], + [0.399, 0.084, 0.083], + [0.873, 0.099, 0.101], + [3.215, 0.318, 0.300], + [2.680, 0.394, 0.391], + [1.099, 1.058, 1.055], + [0.802, 0.250, 0.251], + [1.823, 0.340, 0.341], + [2.750, 2.168, 2.157], + [3.638, 1.301, 1.267], + [3.583, 1.289, 1.288], + [0.455, 0.392, 0.393], + [0.279, 0.170, 0.159], + [0.089, 0.068, 0.066], + [0.135, 0.063, 0.061], + [0.479, 0.329, 0.341], + [0.059, 0.021, 0.020], + [0.042, 0.018, 0.020], + [0.011, 0.006, 0.006] + ] + }, + { + "system": "AMD EPYC 7402P 2.8 GHz, 128 GB DDR4, HDD RAID1 2×8000 GB HDD (TOSHIBA MG06ACA800E)", + "time": "2020-01-26 00:00:00", + "result": + [ + [0.149, 0.002, 0.002], + [0.263, 0.012, 0.011], + [0.631, 0.017, 0.016], + [1.829, 0.023, 0.020], + [2.073, 0.078, 0.076], + [2.981, 0.176, 0.138], + [0.204, 0.022, 0.012], + [0.195, 0.011, 0.011], + [2.652, 0.195, 0.193], + [2.949, 0.226, 0.218], + [2.124, 0.101, 0.099], + [2.369, 0.106, 0.102], + [2.978, 0.254, 0.248], + [4.546, 0.328, 0.321], + [3.391, 0.298, 0.297], + [2.211, 0.269, 0.268], + [4.889, 0.850, 0.842], + [4.627, 0.514, 0.505], + [8.805, 1.506, 1.432], + [1.979, 0.034, 0.044], + [18.744, 0.315, 0.248], + [22.946, 0.301, 0.276], + [41.584, 0.703, 0.692], + [42.963, 0.392, 0.335], + [5.992, 0.130, 0.096], + [3.050, 0.096, 0.085], + [6.390, 0.115, 0.101], + [20.038, 0.319, 0.296], + [17.610, 0.408, 0.396], + [1.187, 1.056, 1.055], + [5.134, 0.254, 0.249], + [10.690, 0.348, 0.341], + [9.296, 2.190, 2.149], + [20.999, 1.258, 1.258], + [22.020, 1.256, 1.254], + [1.715, 0.400, 0.390], + [0.403, 0.169, 0.164], + [0.147, 0.069, 0.069], + [0.137, 0.063, 0.062], + [0.568, 0.344, 0.359], + [0.152, 0.027, 0.021], + [0.076, 0.018, 0.017], + [0.021, 0.006, 0.006] + ] + }, + { + "system": "AMD EPYC 7502P / 128G DDR4 / 2NVME SAMSUNG MZQLB960HAJR", + "time": "2020-03-05 00:00:00", + "result": + [ + [0.012, 0.019, 0.009], + [0.042, 0.026, 0.038], + [0.026, 0.032, 0.017], + [0.058, 0.025, 0.027], + [0.095, 0.080, 0.087], + [0.143, 0.125, 0.124], + [0.018, 0.010, 0.016], + [0.013, 0.012, 0.013], + [0.201, 0.182, 0.182], + [0.228, 0.204, 0.204], + [0.093, 0.078, 0.077], + [0.100, 0.080, 0.081], + [0.241, 0.222, 0.218], + [0.291, 0.265, 0.270], + [0.268, 0.254, 0.256], + [0.255, 0.241, 0.242], + [0.623, 0.593, 0.599], + [0.373, 0.343, 0.339], + [1.354, 1.318, 1.311], + [0.054, 0.020, 0.022], + [0.495, 0.247, 0.242], + [0.520, 0.258, 0.248], + [0.957, 0.646, 0.652], + [null, null, null], + [0.149, 0.105, 0.099], + [0.091, 0.070, 0.069], + [0.150, 0.096, 0.094], + [0.499, 0.315, 0.309], + [0.437, 0.354, 0.357], + [1.002, 0.996, 0.991], + [0.234, 0.205, 0.207], + [0.380, 0.305, 0.305], + [1.733, 1.651, 1.655], + [1.230, 1.134, 1.132], + [1.217, 1.130, 1.114], + [0.396, 0.385, 0.383], + [0.156, 0.148, 0.160], + [0.065, 0.062, 0.063], + [0.057, 0.052, 0.052], + [0.368, 0.342, 0.336], + [0.030, 0.025, 0.027], + [0.022, 0.017, 0.019], + [0.005, 0.004, 0.004] + ] + } +] diff --git a/website/benchmark/hardware/results/023_xeon_e5_1650v3.json b/website/benchmark/hardware/results/023_xeon_e5_1650v3.json new file mode 100644 index 00000000000..78c3e4061ca --- /dev/null +++ b/website/benchmark/hardware/results/023_xeon_e5_1650v3.json @@ -0,0 +1,52 @@ +[ + { + "system": "Intel Xeon E5-1650v3 3.5 GHz, 64 GB DDR4, RAID1 2×480 GB SSD (INTEL SSDSC2BB480G4)", + "time": "2020-01-26 00:00:00", + "result": + [ + [0.005, 0.001, 0.001], + [0.025, 0.015, 0.016], + [0.085, 0.039, 0.034], + [0.302, 0.052, 0.057], + [0.313, 0.137, 0.136], + [0.639, 0.426, 0.434], + [0.036, 0.027, 0.027], + [0.021, 0.014, 0.014], + [0.871, 0.707, 0.707], + [1.060, 0.812, 0.811], + [0.521, 0.285, 0.290], + [0.548, 0.331, 0.339], + [1.358, 0.987, 0.983], + [1.832, 1.317, 1.326], + [1.464, 1.165, 1.134], + [1.311, 1.115, 1.111], + [3.426, 3.060, 3.069], + [2.113, 1.832, 1.849], + [7.000, 6.235, 6.309], + [0.293, 0.067, 0.074], + [3.325, 1.062, 1.030], + [3.765, 1.296, 1.311], + [7.438, 2.870, 2.831], + [7.820, 1.318, 1.277], + [1.139, 0.412, 0.405], + [0.704, 0.372, 0.362], + [1.154, 0.428, 0.427], + [3.557, 1.103, 1.060], + [3.249, 1.680, 1.666], + [2.411, 2.364, 2.312], + [1.365, 0.993, 1.003], + [2.696, 1.535, 1.523], + [9.226, 8.948, 8.734], + [6.053, 4.337, 4.321], + [6.020, 4.341, 4.300], + [1.919, 1.790, 1.759], + [0.220, 0.160, 0.161], + [0.085, 0.067, 0.061], + [0.116, 0.061, 0.061], + [0.431, 0.335, 0.320], + [0.050, 0.037, 0.025], + [0.083, 0.016, 0.019], + [0.029, 0.006, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/024_selectel_cloud_16vcpu.json b/website/benchmark/hardware/results/024_selectel_cloud_16vcpu.json new file mode 100644 index 00000000000..d41778e47b2 --- /dev/null +++ b/website/benchmark/hardware/results/024_selectel_cloud_16vcpu.json @@ -0,0 +1,152 @@ +[ + { + "system": "Selectel Cloud, 16 vCPU, 32 GB RAM, 'fast disk'", + "time": "2020-01-26 00:00:00", + "result": + [ + [0.017, 0.002, 0.002], + [0.047, 0.011, 0.012], + [0.070, 0.024, 0.024], + [0.350, 0.033, 0.031], + [0.353, 0.087, 0.091], + [0.623, 0.233, 0.226], + [0.040, 0.018, 0.019], + [0.021, 0.011, 0.011], + [0.628, 0.420, 0.415], + [0.725, 0.466, 0.468], + [0.389, 0.152, 0.151], + [0.416, 0.181, 0.172], + [0.819, 0.481, 0.495], + [1.332, 0.714, 0.719], + [1.044, 0.853, 0.796], + [0.865, 0.827, 0.846], + [2.010, 1.816, 1.798], + [1.097, 0.940, 0.927], + [4.414, 4.188, 4.310], + [0.336, 0.034, 0.030], + [4.807, 0.541, 0.527], + [5.494, 0.633, 0.608], + [10.233, 1.448, 1.469], + [9.897, 0.721, 0.676], + [1.322, 0.212, 0.213], + [0.594, 0.177, 0.175], + [1.319, 0.218, 0.216], + [4.804, 0.615, 0.567], + [4.093, 0.855, 0.801], + [1.428, 1.378, 1.300], + [1.299, 0.649, 0.646], + [2.921, 0.835, 0.809], + [5.717, 5.883, 6.368], + [5.655, 2.715, 2.666], + [5.943, 3.008, 2.795], + [1.091, 1.011, 1.089], + [0.264, 0.212, 0.221], + [0.097, 0.085, 0.081], + [0.083, 0.081, 0.075], + [0.498, 0.531, 0.446], + [0.063, 0.031, 0.033], + [0.029, 0.022, 0.022], + [0.006, 0.006, 0.005] + ] + }, + { + "system": "Selectel Cloud, 16 vCPU, 32 GB RAM, 'basic disk'", + "time": "2020-01-26 00:00:00", + "result": + [ + [0.142, 0.002, 0.002], + [0.090, 0.014, 0.013], + [0.478, 0.023, 0.023], + [2.305, 0.032, 0.032], + [2.371, 0.090, 0.087], + [3.377, 0.228, 0.224], + [0.080, 0.020, 0.017], + [0.034, 0.009, 0.009], + [3.210, 0.425, 0.414], + [3.389, 0.471, 0.459], + [2.446, 0.157, 0.152], + [2.331, 0.187, 0.177], + [3.826, 0.503, 0.505], + [5.749, 0.725, 0.701], + [4.521, 0.752, 0.705], + [2.345, 0.572, 0.588], + [6.360, 1.697, 1.512], + [5.928, 0.949, 0.908], + [11.325, 3.598, 3.829], + [6.485, 0.126, 0.033], + [24.494, 0.550, 0.519], + [27.711, 0.615, 0.594], + [52.599, 1.455, 1.435], + [50.820, 1.001, 0.648], + [6.933, 0.206, 0.203], + [3.278, 0.177, 0.172], + [7.082, 0.216, 0.206], + [24.399, 0.564, 0.541], + [20.916, 0.812, 0.801], + [1.574, 1.323, 1.358], + [6.717, 0.558, 0.550], + [13.772, 0.830, 0.801], + [12.547, 5.084, 4.637], + [25.178, 2.601, 2.337], + [25.118, 2.460, 2.432], + [1.626, 0.901, 0.895], + [0.286, 0.206, 0.209], + [0.130, 0.081, 0.083], + [0.220, 0.075, 0.076], + [6.502, 0.495, 0.520], + [0.223, 0.024, 0.024], + [0.045, 0.028, 0.030], + [0.006, 0.005, 0.004] + ] + }, + { + "system": "Selectel Cloud, 16 vCPU, 32 GB RAM, 'universal disk'", + "time": "2020-01-26 00:00:00", + "result": + [ + [0.016, 0.002, 0.002], + [0.055, 0.023, 0.009], + [0.252, 0.020, 0.019], + [1.337, 0.038, 0.031], + [1.342, 0.088, 0.086], + [2.192, 0.241, 0.222], + [0.047, 0.017, 0.016], + [0.017, 0.009, 0.009], + [1.946, 0.410, 0.412], + [2.262, 0.454, 0.455], + [1.427, 0.158, 0.150], + [1.487, 0.181, 0.172], + [2.327, 0.481, 0.464], + [3.794, 0.657, 0.633], + [2.582, 0.596, 0.602], + [1.708, 0.673, 0.670], + [4.431, 1.545, 1.622], + [3.661, 0.890, 0.871], + [7.950, 3.954, 3.844], + [1.327, 0.055, 0.031], + [16.200, 0.538, 0.523], + [18.455, 0.613, 0.599], + [34.059, 1.428, 1.436], + [31.534, 0.713, 0.653], + [4.576, 0.205, 0.201], + [2.155, 0.177, 0.172], + [4.574, 0.208, 0.206], + [16.201, 0.554, 0.534], + [13.787, 0.840, 0.809], + [1.443, 1.408, 1.331], + [3.815, 0.561, 0.536], + [8.831, 0.778, 0.761], + [9.031, 4.476, 4.428], + [16.938, 2.347, 2.287], + [17.093, 2.722, 2.257], + [1.133, 0.897, 0.888], + [0.240, 0.207, 0.202], + [0.101, 0.083, 0.082], + [0.086, 0.075, 0.074], + [0.467, 0.450, 0.440], + [0.060, 0.028, 0.024], + [0.028, 0.020, 0.020], + [0.006, 0.005, 0.005] + ] + } +] diff --git a/website/benchmark/hardware/results/035_aws_a1_4xlarge.json b/website/benchmark/hardware/results/035_aws_a1_4xlarge.json new file mode 100644 index 00000000000..9c3e32abd0e --- /dev/null +++ b/website/benchmark/hardware/results/035_aws_a1_4xlarge.json @@ -0,0 +1,52 @@ +[ + { + "system": "AWS a1.4xlarge (Graviton) 16 vCPU, 2.3 GHz, 32 GiB RAM, EBS", + "time": "2020-02-13 00:00:00", + "result": + [ + [0.012, 0.003, 0.003], + [0.073, 0.031, 0.031], + [0.098, 0.053, 0.053], + [0.209, 0.139, 0.141], + [0.251, 0.200, 0.202], + [0.662, 0.439, 0.436], + [0.062, 0.041, 0.041], + [0.040, 0.033, 0.032], + [3.379, 0.720, 0.722], + [0.934, 0.847, 0.845], + [0.436, 0.379, 0.377], + [0.500, 0.417, 0.430], + [1.536, 1.381, 1.373], + [1.956, 1.832, 1.855], + [1.527, 1.458, 1.466], + [1.613, 1.576, 1.581], + [3.644, 3.490, 3.530], + [2.143, 1.982, 1.965], + [7.808, 7.617, 7.764], + [0.390, 0.179, 0.168], + [8.797, 2.308, 2.257], + [10.138, 2.533, 2.517], + [19.626, 5.738, 5.707], + [20.183, 2.195, 2.156], + [1.841, 0.577, 0.578], + [0.535, 0.479, 0.476], + [1.830, 0.578, 0.577], + [8.786, 2.521, 2.524], + [7.364, 2.941, 2.926], + [3.373, 3.186, 3.203], + [1.641, 1.213, 1.209], + [4.890, 1.964, 1.913], + [10.442, 10.410, 10.427], + [11.183, 7.431, 7.402], + [11.175, 7.460, 7.487], + [2.317, 2.232, 2.221], + [0.473, 0.406, 0.418], + [0.201, 0.187, 0.183], + [0.193, 0.144, 0.160], + [0.901, 0.811, 0.836], + [0.090, 0.046, 0.041], + [0.053, 0.032, 0.033], + [0.015, 0.012, 0.012] + ] + } +] diff --git a/website/benchmark/hardware/results/036_xeon_x5675.json b/website/benchmark/hardware/results/036_xeon_x5675.json new file mode 100644 index 00000000000..56d16a46d08 --- /dev/null +++ b/website/benchmark/hardware/results/036_xeon_x5675.json @@ -0,0 +1,102 @@ +[ + { + "system": "ProLiant DL380 G7, 12Gb RAM, 2x Xeon X5675 3.07GHz, 8x300GB SAS soft RAID5", + "time": "2020-02-18 00:00:00", + "result": + [ + [0.041, 0.005, 0.005], + [0.084, 0.020, 0.019], + [0.403, 0.046, 0.043], + [0.190, 0.081, 0.082], + [0.192, 0.127, 0.131], + [0.388, 0.324, 0.309], + [0.078, 0.028, 0.038], + [0.055, 0.019, 0.019], + [0.677, 0.614, 0.604], + [0.808, 0.706, 0.727], + [0.282, 0.190, 0.181], + [0.312, 0.223, 0.229], + [0.997, 0.895, 0.891], + [1.167, 1.155, 1.115], + [1.155, 1.088, 1.143], + [1.119, 1.090, 1.109], + [3.451, 3.222, 3.153], + [1.743, 1.770, 1.655], + [9.346, 6.206, 6.436], + [0.352, 0.108, 0.105], + [2.985, 0.993, 0.976], + [3.594, 1.211, 1.195], + [6.626, 2.829, 2.800], + [10.086, 1.331, 1.318], + [1.072, 0.348, 0.332], + [0.535, 0.298, 0.269], + [1.046, 0.362, 0.334], + [3.487, 1.221, 1.165], + [2.718, 1.742, 1.719], + [3.200, 3.158, 3.116], + [1.346, 0.901, 0.917], + [2.336, 1.285, 1.285], + [8.876, 64.491, 123.728], + [10.200, 5.127, 4.743], + [5.196, 4.783, 4.659], + [1.628, 1.544, 1.527], + [0.476, 0.296, 0.285], + [0.172, 0.127, 0.097], + [0.170, 0.078, 0.083], + [0.670, 0.529, 0.511], + [0.181, 0.065, 0.039], + [0.123, 0.029, 0.033], + [0.045, 0.011, 0.011] + ] + }, + { + "system": "ProLiant DL380 G7, 12Gb RAM, 1x Xeon X5675 3.07GHz, 8x300GB SAS Soft RAID5", + "time": "2020-02-18 00:00:00", + "result": + [ + [0.048, 0.005, 0.005], + [0.092, 0.026, 0.026], + [0.167, 0.067, 0.073], + [0.200, 0.117, 0.116], + [0.263, 0.185, 0.203], + [0.587, 0.586, 0.586], + [0.094, 0.043, 0.043], + [0.067, 0.025, 0.026], + [1.371, 1.299, 1.298], + [1.638, 1.546, 1.548], + [0.441, 0.341, 0.337], + [0.482, 0.405, 0.385], + [2.682, 2.680, 2.630], + [3.189, 3.207, 3.167], + [2.634, 2.525, 2.556], + [3.181, 3.200, 3.213], + [7.793, 7.714, 7.768], + [3.802, 3.819, 3.960], + [19.101, 16.177, 15.840], + [0.320, 0.153, 0.134], + [3.108, 2.188, 2.115], + [4.515, 3.139, 3.069], + [7.712, 6.856, 6.906], + [11.063, 2.630, 2.567], + [1.015, 0.739, 0.723], + [0.738, 0.644, 0.623], + [1.048, 0.717, 0.736], + [3.371, 2.905, 2.903], + [4.772, 4.539, 4.518], + [11.700, 11.656, 11.589], + [2.217, 2.083, 2.072], + [4.329, 4.153, 3.889], + [21.212, 21.887, 21.417], + [12.816, 12.501, 12.664], + [13.192, 12.624, 12.820], + [5.454, 5.447, 5.462], + [0.376, 0.280, 0.288], + [0.152, 0.097, 0.113], + [0.171, 0.093, 0.100], + [0.594, 0.484, 0.464], + [0.129, 0.043, 0.036], + [0.098, 0.027, 0.045], + [0.033, 0.025, 0.011] + ] + } +] diff --git a/website/benchmark/hardware/results/037_pinebook_pro.json b/website/benchmark/hardware/results/037_pinebook_pro.json new file mode 100644 index 00000000000..d82d6533d7a --- /dev/null +++ b/website/benchmark/hardware/results/037_pinebook_pro.json @@ -0,0 +1,52 @@ +[ + { + "system": "Pinebook Pro (AArch64, 4 GiB RAM)", + "time": "2020-03-08 00:00:00", + "result": + [ + [0.021, 0.009, 0.007], + [0.195, 0.135, 0.144], + [0.439, 0.264, 0.273], + [1.266, 0.672, 0.706], + [1.337, 0.795, 0.790], + [2.706, 1.989, 1.947], + [0.246, 0.198, 0.197], + [0.157, 0.142, 0.133], + [4.150, 3.769, 3.617], + [5.223, 4.405, 4.234], + [2.391, 1.815, 1.785], + [2.534, 2.158, 2.042], + [7.895, 6.890, 7.003], + [10.338, 9.311, 9.410], + [8.139, 7.441, 7.312], + [8.532, 8.035, 8.011], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null], + [null, null, null] + ] + } +] diff --git a/website/benchmark/hardware/results/038_amd_ryzen_9_3950x.json b/website/benchmark/hardware/results/038_amd_ryzen_9_3950x.json new file mode 100644 index 00000000000..3c2c042e538 --- /dev/null +++ b/website/benchmark/hardware/results/038_amd_ryzen_9_3950x.json @@ -0,0 +1,52 @@ +[ + { + "system": "AMD Ryzen 9 3950X 16-Core Processor, 64 GiB RAM, Intel Optane 900P 280 GB", + "time": "2020-03-14 00:00:00", + "result": + [ + [0.002, 0.001, 0.001], + [0.018, 0.013, 0.012], + [0.041, 0.027, 0.026], + [0.091, 0.040, 0.041], + [0.115, 0.075, 0.074], + [0.201, 0.157, 0.153], + [0.017, 0.015, 0.014], + [0.013, 0.013, 0.013], + [0.363, 0.321, 0.313], + [0.441, 0.390, 0.389], + [0.189, 0.164, 0.162], + [0.201, 0.172, 0.178], + [0.879, 0.809, 0.802], + [1.081, 1.030, 1.027], + [0.875, 0.832, 0.837], + [1.042, 1.013, 1.019], + [2.604, 2.544, 2.529], + [1.435, 1.396, 1.414], + [4.208, 4.148, 4.132], + [0.094, 0.052, 0.037], + [0.965, 0.451, 0.450], + [1.141, 0.762, 0.763], + [2.813, 2.378, 2.383], + [2.127, 0.749, 0.733], + [0.301, 0.228, 0.228], + [0.218, 0.180, 0.174], + [0.301, 0.228, 0.227], + [1.241, 0.965, 0.958], + [1.469, 1.271, 1.274], + [3.782, 3.780, 3.762], + [0.738, 0.656, 0.655], + [1.147, 1.015, 1.009], + [7.114, 6.989, 7.074], + [4.277, 4.085, 4.069], + [4.256, 4.032, 4.073], + [1.811, 1.787, 1.785], + [0.138, 0.128, 0.129], + [0.057, 0.051, 0.053], + [0.058, 0.051, 0.047], + [0.283, 0.284, 0.278], + [0.023, 0.024, 0.027], + [0.024, 0.015, 0.021], + [0.007, 0.013, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/039_azure_e32s.json b/website/benchmark/hardware/results/039_azure_e32s.json new file mode 100644 index 00000000000..bf1c8c6af8b --- /dev/null +++ b/website/benchmark/hardware/results/039_azure_e32s.json @@ -0,0 +1,52 @@ +[ + { + "system": "Azure E32s v3 32 256 GiB 512 GiB", + "time": "2020-03-23 00:00:00", + "result": + [ + [0.003, 0.002, 0.003], + [0.114, 0.014, 0.013], + [0.230, 0.031, 0.029], + [0.893, 0.043, 0.042], + [0.915, 0.123, 0.143], + [1.475, 0.263, 0.264], + [0.055, 0.016, 0.017], + [0.056, 0.013, 0.013], + [1.467, 0.523, 0.523], + [1.661, 0.614, 0.608], + [0.999, 0.169, 0.176], + [1.058, 0.188, 0.190], + [1.839, 0.658, 0.697], + [2.753, 0.892, 0.881], + [2.034, 0.895, 0.895], + [1.425, 0.860, 0.879], + [3.401, 2.070, 2.091], + [2.573, 1.183, 1.208], + [6.376, 4.374, 4.442], + [0.922, 0.044, 0.043], + [10.137, 0.653, 0.691], + [11.589, 0.711, 0.805], + [21.234, 1.841, 1.827], + [22.035, 0.973, 0.940], + [2.983, 0.238, 0.237], + [1.493, 0.201, 0.186], + [3.016, 0.262, 0.259], + [10.139, 0.696, 0.676], + [8.723, 1.017, 1.008], + [1.561, 1.439, 1.563], + [2.688, 0.713, 0.728], + [5.942, 1.075, 1.063], + [7.803, 5.871, 5.826], + [11.131, 2.860, 2.798], + [11.089, 2.898, 2.847], + [1.550, 1.276, 1.256], + [0.851, 0.303, 0.280], + [0.376, 0.100, 0.108], + [0.571, 0.084, 0.082], + [1.654, 0.560, 0.533], + [0.356, 0.029, 0.027], + [0.232, 0.022, 0.024], + [0.032, 0.009, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/040_core_i7_macbook_pro_2018.json b/website/benchmark/hardware/results/040_core_i7_macbook_pro_2018.json new file mode 100644 index 00000000000..e0ad42f423b --- /dev/null +++ b/website/benchmark/hardware/results/040_core_i7_macbook_pro_2018.json @@ -0,0 +1,52 @@ +[ + { + "system": "MacBook Pro 2018, 2.7 GHz Quad-Core Intel Core i7, 16 GiB RAM, 1TB SSD", + "time": "2020-04-04 00:00:00", + "result": + [ + [0.002, 0.002, 0.002], + [0.028, 0.031, 0.025], + [0.060, 0.058, 0.047], + [0.125, 0.101, 0.070], + [0.164, 0.185, 0.168], + [0.672, 0.568, 0.557], + [0.072, 0.038, 0.037], + [0.031, 0.021, 0.021], + [0.849, 0.836, 0.820], + [0.941, 0.938, 0.942], + [0.423, 0.444, 0.457], + [0.617, 0.556, 0.555], + [1.761, 1.694, 1.641], + [2.190, 2.277, 2.226], + [1.964, 1.895, 1.934], + [1.956, 1.978, 1.884], + [6.029, 5.977, 5.975], + [3.372, 3.436, 3.439], + [12.883, 12.778, 12.572], + [0.116, 0.080, 0.076], + [1.874, 1.372, 1.467], + [2.321, 2.356, 2.238], + [5.304, 4.955, 4.912], + [2.474, 1.993, 2.033], + [0.744, 0.708, 0.719], + [0.562, 0.568, 0.602], + [0.737, 0.742, 0.719], + [1.547, 1.580, 1.583], + [3.074, 2.665, 2.697], + [5.466, 5.560, 5.693], + [1.658, 1.562, 1.543], + [2.935, 2.802, 2.743], + [19.141, 19.674, 19.212], + [8.738, 8.334, 8.302], + [8.268, 8.276, 8.364], + [3.311, 3.288, 3.243], + [0.182, 0.169, 0.169], + [0.075, 0.066, 0.066], + [0.066, 0.057, 0.053], + [0.353, 0.324, 0.327], + [0.030, 0.018, 0.018], + [0.018, 0.015, 0.015], + [0.011, 0.007, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/041_amd_epyc_7702.json b/website/benchmark/hardware/results/041_amd_epyc_7702.json new file mode 100644 index 00000000000..ae332605155 --- /dev/null +++ b/website/benchmark/hardware/results/041_amd_epyc_7702.json @@ -0,0 +1,52 @@ +[ + { + "system": "AMD EPYC 7702, 256 cores, 512 GiB, NVMe SSD", + "time": "2020-04-09 00:00:00", + "result": + [ + [0.006, 0.002, 0.002], + [0.252, 0.072, 0.057], + [0.113, 0.066, 0.057], + [0.197, 0.055, 0.065], + [0.311, 0.199, 0.217], + [0.360, 0.200, 0.183], + [0.119, 0.050, 0.045], + [0.066, 0.061, 0.057], + [0.320, 0.150, 0.144], + [0.346, 0.170, 0.162], + [0.226, 0.117, 0.115], + [0.265, 0.112, 0.118], + [0.402, 0.249, 0.250], + [0.561, 0.327, 0.332], + [0.397, 0.267, 0.257], + [0.323, 0.221, 0.233], + [0.710, 0.527, 0.517], + [0.667, 0.437, 0.443], + [1.269, 0.936, 0.957], + [0.189, 0.043, 0.043], + [1.673, 0.206, 0.169], + [1.937, 0.214, 0.184], + [3.527, 0.755, 0.737], + [3.197, 0.551, 0.523], + [0.519, 0.076, 0.086], + [0.268, 0.060, 0.080], + [0.522, 0.075, 0.079], + [1.693, 0.345, 0.351], + [1.466, 0.330, 0.318], + [1.078, 0.974, 1.019], + [0.501, 0.196, 0.200], + [1.032, 0.266, 0.271], + [1.621, 1.156, 1.169], + [2.089, 0.998, 0.972], + [2.106, 0.974, 0.959], + [0.366, 0.305, 0.305], + [0.190, 0.187, 0.183], + [0.071, 0.066, 0.075], + [0.072, 0.068, 0.062], + [0.415, 0.353, 0.457], + [0.034, 0.032, 0.028], + [0.031, 0.027, 0.032], + [0.024, 0.007, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/042_core_i7_6770hq_intel_nuc.json b/website/benchmark/hardware/results/042_core_i7_6770hq_intel_nuc.json new file mode 100644 index 00000000000..94b0e25d7ba --- /dev/null +++ b/website/benchmark/hardware/results/042_core_i7_6770hq_intel_nuc.json @@ -0,0 +1,52 @@ +[ + { + "system": "Intel NUC, 4 cores (Intel i7-6770HQ), 32 GiB RAM, 1 TB NVMe SSD", + "time": "2020-04-15 00:00:00", + "result": + [ + [0.003, 0.002, 0.001], + [0.025, 0.016, 0.018], + [0.084, 0.058, 0.057], + [0.158, 0.092, 0.085], + [0.273, 0.211, 0.190], + [0.671, 0.555, 0.539], + [0.031, 0.033, 0.033], + [0.026, 0.019, 0.017], + [1.183, 1.110, 1.090], + [1.330, 1.246, 1.254], + [0.352, 0.297, 0.296], + [0.441, 0.375, 0.352], + [1.611, 1.491, 1.439], + [2.130, 2.022, 1.976], + [1.903, 1.795, 1.819], + [1.927, 1.851, 1.861], + [5.282, 5.155, 5.172], + [3.246, 3.313, 3.189], + [12.059, 11.378, 10.562], + [0.146, 0.092, 0.090], + [2.103, 1.496, 1.477], + [2.447, 1.777, 1.734], + [5.123, 3.999, 3.955], + [3.733, 1.808, 1.775], + [0.685, 0.530, 0.523], + [0.525, 0.446, 0.438], + [0.755, 0.545, 0.547], + [2.052, 1.416, 1.403], + [2.976, 2.441, 2.423], + [2.197, 2.189, 2.164], + [1.748, 1.596, 1.607], + [2.773, 2.481, 2.466], + [18.903, 19.166, 16.563], + [7.457, 7.116, 6.943], + [7.311, 6.957, 6.958], + [3.036, 3.005, 2.991], + [0.247, 0.186, 0.162], + [0.100, 0.063, 0.065], + [0.098, 0.061, 0.056], + [0.434, 0.344, 0.331], + [0.040, 0.025, 0.025], + [0.049, 0.026, 0.026], + [0.022, 0.008, 0.006] + ] + } +] diff --git a/website/benchmark/hardware/results/043_amd_epyc_7502p.json b/website/benchmark/hardware/results/043_amd_epyc_7502p.json new file mode 100644 index 00000000000..95ea5d2d568 --- /dev/null +++ b/website/benchmark/hardware/results/043_amd_epyc_7502p.json @@ -0,0 +1,52 @@ +[ + { + "system": "AMD EPYC 7502P 32-Core Processor with HT (64 thread) / 512 Gb RAM / mdadm RAID1 SAMSUNG MZQLB3T8HALS-00007 + LVM", + "time": "2020-04-16 00:00:00", + "result": + [ + [0.007, 0.002, 0.002], + [0.022, 0.011, 0.011], + [0.028, 0.017, 0.017], + [0.050, 0.022, 0.022], + [0.098, 0.079, 0.078], + [0.149, 0.125, 0.126], + [0.020, 0.014, 0.014], + [0.015, 0.013, 0.013], + [0.169, 0.148, 0.148], + [0.237, 0.171, 0.168], + [0.103, 0.084, 0.082], + [0.099, 0.085, 0.084], + [0.262, 0.221, 0.221], + [0.312, 0.281, 0.282], + [0.274, 0.259, 0.255], + [0.255, 0.237, 0.237], + [0.616, 0.592, 0.589], + [0.398, 0.364, 0.359], + [1.358, 1.301, 1.292], + [0.056, 0.025, 0.022], + [0.485, 0.265, 0.263], + [0.510, 0.271, 0.255], + [0.938, 0.693, 0.674], + [1.262, 0.396, 0.367], + [0.144, 0.082, 0.081], + [0.088, 0.066, 0.065], + [0.141, 0.084, 0.082], + [0.488, 0.329, 0.325], + [0.441, 0.376, 0.374], + [1.054, 1.049, 1.068], + [0.222, 0.190, 0.189], + [0.386, 0.321, 0.319], + [1.703, 1.625, 1.660], + [1.272, 1.202, 1.207], + [1.276, 1.185, 1.204], + [0.398, 0.382, 0.382], + [0.171, 0.160, 0.160], + [0.071, 0.058, 0.059], + [0.059, 0.055, 0.053], + [0.364, 0.341, 0.354], + [0.028, 0.022, 0.027], + [0.027, 0.020, 0.018], + [0.010, 0.008, 0.005] + ] + } +] diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html deleted file mode 100644 index efb371c588f..00000000000 --- a/website/benchmark_hardware.html +++ /dev/null @@ -1,3032 +0,0 @@ - - - - - Performance Comparison Of ClickHouse On Various Hardware - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -

Performance Comparison Of ClickHouse On Various Hardware

-
- -
- -
- -

Relative query processing time (lower is better):

-
- -
-

Full results:

- -
- -
- -
-Submit your own results: https://clickhouse.yandex/docs/en/operations/performance_test/
-
-Results for Lenovo B580 Laptop are from Ragıp Ünal. 16GB RAM 1600 GHz, 240GB SSD, Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz (2 Core / 4 HT)
-Results for Time4vps.eu are from Ragıp Ünal.
-Results for Dell PowerEdge R640, R641 (in Hetzner) are from Dmirty Titov.
-Results for Dell PowerEdge R730 are from Amos Bird.
-Results for Dell R530 are from Yuriy Zolkin.
-Results for Xeon 2176G are from Sergey Golod.
-Results for Azure DS3v2 are from Boris Granveaud.
-Results for AWS are from Wolf Kreuzerkrieg.
-Results for Huawei Taishan are from Peng Gao in sina.com.
-Results for Selectel and AMD EPYC 7402P are from Andrey Dudin.
-Results for ProLiant are from Denis Ustinov.
-Results for AMD EPYC 7502P 128GiB are from Kostiantyn Velychkovskyi.
-Results for AMD EPYC 7502P 512GiB are from Sergey Zakharov.
-Results for Pinebook Pro are from Aleksey R. @kITerE.
-Results for AMD Ryzen are from Alexey Milovidov. Firefox was running in background.
-Results for Azure E32s are from Piotr Maśko.
-Results for MacBook Pro are from Denis Glazachev. MacOS Catalina Version 10.15.4 (19E266). For "drop caches", the "Free Up RAM" in CleanMyMac is used.
-Results for AMD EPYC 7702 are from Peng Gao in sina.com.
-Results for Intel NUC are from Alexander Zaitsev, Altinity.
-Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
-Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.
-
- - - - - diff --git a/website/css/base.css b/website/css/base.css index 4926bd3af8a..e7fe6d4723d 100644 --- a/website/css/base.css +++ b/website/css/base.css @@ -11,6 +11,19 @@ a:hover, a:active { text-decoration: underline; } +.btn.disabled, .btn:disabled { + cursor: default; + opacity: 0.4; +} + +.fake-btn { + display: inline-block; + padding: 0.375rem 0.75rem; + text-align: center; + font-size: 1rem; + line-height: 1.5; +} + #logo-icon, #docs-logo-icon { width: 40px; } @@ -26,20 +39,16 @@ a:hover, a:active { line-height: 1.2; } -.bg-dark-alt { +.bg-dark-alt, .bg-dark-alt:focus { background: #36363F; } -.bg-dark-alt:focus { - background: #36363F; -} - -.bg-secondary-alt { +.bg-secondary-alt, .bg-secondary-alt:focus { background: #444451; } -.bg-secondary-alt:focus { - background: #444451; +.bg-orange, .bg-orange:focus { + background: #f14600; } .text-dark-alt { @@ -161,3 +170,27 @@ a.btn-outline-yellow { .dots-cc { background: #fff url('/images/dots.svg') repeat-y 50% -17.5%; } + +.benchmark-query-cell { + width: 20rem; + white-space: pre; + overflow-x: hidden; +} + +.benchmark-query-cell:hover { + width: auto; + background-color: #efefef; + position: absolute; + padding: 0.5rem; + margin: -0.5rem 0 0 -0.5rem; + overflow-x: auto; + white-space: normal; +} + +.benchmark-query-cell-wrapper { + width: 22rem; +} + +.w-15 { + width: 15% !important; +} diff --git a/website/js/base.js b/website/js/base.js index fa0e8431839..3e91a01092f 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -74,10 +74,13 @@ s.type = "text/javascript"; s.async = true; s.src = "/js/metrika.js"; - - if (w.opera == "[object Opera]") { - d.addEventListener("DOMContentLoaded", f, false); - } else { f(); } + if (window.location.hostname.endsWith('clickhouse.tech')) { + if (w.opera == "[object Opera]") { + d.addEventListener("DOMContentLoaded", f, false); + } else { + f(); + } + } })(document, window, "yandex_metrika_callbacks2"); var beforePrint = function() { diff --git a/website/nginx/default.conf b/website/nginx/default.conf deleted file mode 100644 index a5b54e75cb7..00000000000 --- a/website/nginx/default.conf +++ /dev/null @@ -1,10 +0,0 @@ -server { - listen 80 default; - listen [::]:80 default ipv6only=on; - server_name localhost clickhouse.yandex clickhouse.yandex.ru clickhouse.yandex.com; - return 301 https://clickhouse.tech$request_uri; -} -server { - server_name test.clickhouse.yandex; - return 301 https://test.clickhouse.tech$request_uri; -} diff --git a/website/nginx/nginx.conf b/website/nginx/nginx.conf deleted file mode 100644 index 9468004b061..00000000000 --- a/website/nginx/nginx.conf +++ /dev/null @@ -1,41 +0,0 @@ - -user nginx; -worker_processes auto; - -error_log /var/log/nginx/error.log warn; -pid /var/run/nginx.pid; - - -events { - worker_connections 4096; - multi_accept on; - use epoll; -} - - -http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - access_log /var/log/nginx/access.log main; - - sendfile on; - tcp_nopush on; - - keepalive_timeout 65; - - gzip on; - gzip_comp_level 5; - gzip_min_length 256; - - add_header X-Content-Type-Options nosniff always; - add_header X-Frame-Options DENY always; - add_header X-XSS-Protection "1; mode=block" always; - - include /etc/nginx/conf.d/*.conf; - include /etc/nginx/sites-enabled/*; -} diff --git a/website/sitemap_static.xml b/website/sitemap_static.xml index 33d258674f6..6d6b41e5827 100644 --- a/website/sitemap_static.xml +++ b/website/sitemap_static.xml @@ -5,11 +5,11 @@ daily - https://clickhouse.tech/benchmark.html + https://clickhouse.tech/benchmark/dbms/ weekly - https://clickhouse.tech/benchmark_hardware.html + https://clickhouse.tech/benchmark/hardware/ weekly diff --git a/website/templates/common_css.html b/website/templates/common_css.html index 4aa9114e06d..ac10b233f25 100644 --- a/website/templates/common_css.html +++ b/website/templates/common_css.html @@ -1 +1,5 @@ + +{% for src in extra_css %} + +{% endfor %} diff --git a/website/templates/common_js.html b/website/templates/common_js.html index b2bed146503..72421f00562 100644 --- a/website/templates/common_js.html +++ b/website/templates/common_js.html @@ -1,4 +1,8 @@ - + + +{% for src in extra_js %} + +{% endfor %}