From 924075621ea101e25226761690ac69101b0db1ba Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 13 Sep 2016 16:24:24 +0300 Subject: [PATCH 01/15] Added three experimental implementations for groupUniqArray() aggregate function. [#METR-22071] --- .../AggregateFunctionGroupUniqArray.h | 10 +- .../AggregateFunctions/IAggregateFunction.h | 20 + dbms/include/DB/Columns/ColumnString.h | 1 - dbms/include/DB/Common/HashTable/HashTable.h | 4 + dbms/include/DB/Interpreters/Aggregator.h | 2 +- .../AggregateFunctionGroupUniqArray.cpp | 373 ++++++++++++++++++ dbms/src/Client/Benchmark.cpp | 29 +- dbms/src/Interpreters/Aggregator.cpp | 44 ++- 8 files changed, 467 insertions(+), 16 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 55d49b19af5..394c60cb9f1 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -5,6 +5,7 @@ #include #include +#include #include @@ -23,12 +24,12 @@ template struct AggregateFunctionGroupUniqArrayData { /// При создании, хэш-таблица должна быть небольшой. - typedef HashSet< + using Set = HashSet< T, DefaultHash, HashTableGrower<4>, HashTableAllocatorWithStackMemory - > Set; + >; Set value; }; @@ -70,7 +71,7 @@ public: const typename State::Set & set = this->data(place).value; size_t size = set.size(); writeVarUInt(size, buf); - for (typename State::Set::const_iterator it = set.begin(); it != set.end(); ++it) + for (auto it = set.begin(); it != set.end(); ++it) writeIntBinary(*it, buf); } @@ -94,11 +95,12 @@ public: data_to.resize(old_size + size); size_t i = 0; - for (typename State::Set::const_iterator it = set.begin(); it != set.end(); ++it, ++i) + for (auto it = set.begin(); it != set.end(); ++it, ++i) data_to[old_size + i] = *it; } }; + #undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE } diff --git a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h index 63eece3dcb4..d05a3d8b25e 100644 --- a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -22,6 +23,14 @@ using AggregateDataPtr = char *; using ConstAggregateDataPtr = const char *; +struct IAggregateDataWithArena +{ + Arena * arena = nullptr; + + inline Arena * getArena() { return arena; } +}; + + /** Интерфейс для агрегатных функций. * Экземпляры классов с этим интерфейсом не содержат самих данных для агрегации, * а содержат лишь метаданные (описание) агрегатной функции, @@ -96,6 +105,12 @@ public: */ virtual bool isState() const { return false; } + /** Возвращает true если при агрегации необходимо использовать "кучу", представленной Arena. + * В этом случае структура данных для агррегации должна быть унаследована от IAggregateDataWithArena. + * Указатель на необходимую Arena можно будет получить с помощью IAggregateDataWithArena::getArena(). + */ + virtual bool needArena() const { return false; } + /** Внутренний цикл, использующий указатель на функцию, получается лучше, чем использующий виртуальную функцию. * Причина в том, что в случае виртуальных функций, GCC 5.1.2 генерирует код, @@ -144,6 +159,11 @@ public: { return __alignof__(Data); } + + bool needArena() const override + { + return std::is_base_of(); + } }; diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index b7846f443d6..84b6d3f73b4 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -12,7 +12,6 @@ #include #include - namespace DB { diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index e5c41451555..b78053c3fb2 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -70,6 +70,10 @@ bool check(const T x) { return x == 0; } template void set(T & x) { x = 0; } +bool check(const std::string & x); + +void set(std::string & x); + }; diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 2951a4f179c..8eb30839e44 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -986,7 +986,7 @@ protected: /** Создать состояния агрегатных функций для одного ключа. */ - void createAggregateStates(AggregateDataPtr & aggregate_data) const; + void createAggregateStates(AggregateDataPtr & aggregate_data, Arena * arena) const; /** Вызвать методы destroy для состояний агрегатных функций. * Используется в обработчике исключений при агрегации, так как RAII в данном случае не применим. diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index e6dc6136e5a..6a1bc01465a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -1,3 +1,14 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include #include #include #include @@ -8,6 +19,365 @@ namespace DB namespace { +template +class Function_DataNaive; + +template +class Function_Fair; + +template +class Function_Map128; + + +template +//using Function_Default = Function_Map128; +using Function_Default = Function_Fair; + +struct DataNaive : public IAggregateDataWithArena +{ + using Set = std::unordered_set; + Set value; +}; + +struct DataFair : public IAggregateDataWithArena +{ + using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory<16>>; + Set value; +}; + +struct DataMap128 : public IAggregateDataWithArena +{ + using Set = HashMap, HashTableAllocatorWithStackMemory<16>>; + Set value; + +#ifdef AVERAGE_STAT + mutable bool was_merged = false; +#endif +}; + + + +template +inline StringRef packToStringRef(const IColumn & column, size_t row_num, Arena & arena) +{ + const char * begin = nullptr; + return column.serializeValueIntoArena(row_num, arena, begin); +} + +template <> +inline StringRef packToStringRef(const IColumn & column, size_t row_num, Arena & arena) +{ + StringRef str = column.getDataAt(row_num); + str.data = arena.insert(str.data, str.size); + return str; +} + +template +inline void unpackFromStringRef(StringRef str, IColumn & data_to) +{ + data_to.deserializeAndInsertFromArena(str.data); +} + +template <> +inline void unpackFromStringRef(StringRef str, IColumn & data_to) +{ + data_to.insertData(str.data, str.size); +} + + +template +class IAggreagteFunctionGroupUniqArray : public IUnaryAggregateFunction +{ +protected: + + mutable DataTypePtr input_data_type; + +public: + + using State = Data; + + String getName() const override { return "groupUniqArray"; } + + void setArgument(const DataTypePtr & argument) + { + input_data_type = argument; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(input_data_type->clone()); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED); + } + + void destroy(AggregateDataPtr place) const noexcept override + { + this->data(place).~Data(); + //LOG_DEBUG(&Logger::get("IAggreagteFunctionGroupUniqArray"), "destroyed."); + } +}; + +template +struct Function_DataNaive : public IAggreagteFunctionGroupUniqArray> +{ + using State = DataNaive; + + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + { + StringRef serialized_elem; + if (is_string_column) + serialized_elem = static_cast(column).getDataAt(row_num); + else + serialized_elem = packToStringRef(column, row_num, *this->data(place).arena); + + this->data(place).value.insert(serialized_elem.toString()); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + { + State::Set & cur_set = this->data(place).value; + const State::Set & rhs_set = this->data(rhs).value; + + for (auto it = begin(rhs_set); it != end(rhs_set); ++it) + { + cur_set.insert(*it); + } + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + ColumnArray & arr_to = static_cast(to); + ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); + IColumn & data_to = arr_to.getData(); + + const State::Set & set = this->data(place).value; + size_t size = set.size(); + offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size); + + for (auto it = begin(set); it != end(set); ++it) + { + unpackFromStringRef(*it, data_to); + } + } +}; + +template +struct Function_Fair : public IAggreagteFunctionGroupUniqArray> +{ + using State = DataFair; + + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + { + if (!is_string_column) + { + StringRef serialized_elem = packToStringRef(column, row_num, *this->data(place).arena); + + bool inserted; + State::Set::iterator it; + this->data(place).value.emplace(serialized_elem, it, inserted); + + if (!likely(inserted)) + this->data(place).arena->rollback(serialized_elem.size); + } + else + { + StringRef str = column.getDataAt(row_num); + + bool inserted; + State::Set::iterator it; + this->data(place).value.emplace(str, it, inserted); + + if (unlikely(inserted)) + { + it->data = this->data(place).arena->insert(str.data, str.size); + } + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + { + State::Set & cur_set = this->data(place).value; + const State::Set & rhs_set = this->data(rhs).value; + cur_set.merge(rhs_set); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + ColumnArray & arr_to = static_cast(to); + ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); + IColumn & data_to = arr_to.getData(); + + const State::Set & set = this->data(place).value; + size_t size = set.size(); + offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size); + + for (auto it = set.begin(); it != set.end(); ++it) + { + unpackFromStringRef(*it, data_to); + } + } +}; + +template +struct Function_Map128 : public IAggreagteFunctionGroupUniqArray> +{ + using State = DataMap128; + +#ifdef AVERAGE_STAT + mutable long sum_size_on_result = 0, sum_size_before_merge = 0; + mutable long cnt_on_result = 0, cnt_before_merge = 0; +#endif + + inline static UInt128 getUInt128Descriptor(StringRef elem_serialized) + { + UInt128 elem_desc; + SipHash hasher; + hasher.update(elem_serialized.data, elem_serialized.size); + hasher.get128(elem_desc.first, elem_desc.second); + return elem_desc; + } + + void addImpl_ownHashing(AggregateDataPtr place, const IColumn & column, size_t row_num) const + { + StringRef elem_serialized; + if (is_string_column) + elem_serialized = column.getDataAt(row_num); + else + elem_serialized = packToStringRef(column, row_num, *this->data(place).arena); + + auto elem_desc = getUInt128Descriptor(elem_serialized); + + bool inserted; + State::Set::iterator it; + this->data(place).value.emplace(elem_desc, it, inserted); + + if (likely(inserted)) + { + if (is_string_column) + elem_serialized.data = this->data(place).arena->insert(elem_serialized.data, elem_serialized.size); + it->second = elem_serialized; + } + else + { + /** The probability that two different elems has the same UInt128 descriptor is extremely slow. + * Don't handle this case. Free arena's memory, there are no need to store duplicates. + */ + if (!is_string_column) + this->data(place).arena->rollback(elem_serialized.size); + } + } + + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + { + SipHash hasher; + column.updateHashWithValue(row_num, hasher); + + UInt128 elem_desc; + hasher.get128(elem_desc.first, elem_desc.second); + + bool inserted; + State::Set::iterator it; + this->data(place).value.emplace(elem_desc, it, inserted); + + if (likely(inserted)) + { + Arena & arena = *this->data(place).arena; + StringRef & elem_serialization = it->second; + + if (is_string_column) + { + elem_serialization = column.getDataAt(row_num); + elem_serialization.data = arena.insert(elem_serialization.data, elem_serialization.size); + } + else + { + const char * ptr = nullptr; + elem_serialization = column.serializeValueIntoArena(row_num, arena, ptr); + } + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + { + State::Set & cur_set = this->data(place).value; + const State::Set & rhs_set = this->data(rhs).value; + + bool inserted; + State::Set::iterator it_cur; + for (auto it_rhs = rhs_set.begin(); it_rhs != rhs_set.end(); ++it_rhs) + { + cur_set.emplace(it_rhs->first, it_cur, inserted); + if (inserted) + it_cur->second = it_rhs->second; + } + +#ifdef AVERAGE_STAT + if (!this->data(rhs).was_merged) + { + sum_size_before_merge += rhs_set.size(); + cnt_before_merge += 1; + } + + this->data(place).was_merged = true; + this->data(rhs).was_merged = true; +#endif + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + ColumnArray & arr_to = static_cast(to); + ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); + IColumn & data_to = arr_to.getData(); + + const State::Set & set = this->data(place).value; + size_t size = set.size(); + offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size); + + for (auto && value : set) + { + unpackFromStringRef(value.second, data_to); + } + +#ifdef AVERAGE_STAT + sum_size_on_result += set.size(); + cnt_on_result += 1; + + LOG_DEBUG(&Logger::get("IAggreagteFunctionGroupUniqArray"), "sum_size_on_result=" << sum_size_on_result << ", cnt_on_result=" << cnt_on_result << ", average=" << sum_size_on_result / cnt_on_result); + LOG_DEBUG(&Logger::get("IAggreagteFunctionGroupUniqArray"), "sum_size_before_merge=" << sum_size_before_merge << ", cnt_before_merge=" << cnt_before_merge << ", average=" << sum_size_before_merge / cnt_before_merge); +#endif + } +}; + +} + + +namespace +{ + +static IAggregateFunction * createWithExtraTypes(const IDataType & argument_type) +{ + if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; + else if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; + else if (typeid_cast(&argument_type)) + { + return new Function_Default; + } + else + { + return new Function_Default<>; + } + + //return nullptr; +} + AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & name, const DataTypes & argument_types) { if (argument_types.size() != 1) @@ -16,6 +386,9 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & n AggregateFunctionPtr res(createWithNumericType(*argument_types[0])); + if (!res) + res = AggregateFunctionPtr(createWithExtraTypes(*argument_types[0])); + if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/Client/Benchmark.cpp b/dbms/src/Client/Benchmark.cpp index eb2f20b0256..8cb34839f94 100644 --- a/dbms/src/Client/Benchmark.cpp +++ b/dbms/src/Client/Benchmark.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -58,11 +59,11 @@ public: Benchmark(unsigned concurrency_, double delay_, const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, const String & stage, - bool randomize_, + bool randomize_, size_t num_repetions_, double max_time_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), connections(concurrency, host_, port_, default_database_, user_, password_), - randomize(randomize_), + randomize(randomize_), num_repetions(num_repetions_), max_time(max_time_), settings(settings_), pool(concurrency) { std::cerr << std::fixed << std::setprecision(3); @@ -94,6 +95,8 @@ private: ConnectionPool connections; bool randomize; + size_t num_repetions; + double max_time; Settings settings; QueryProcessingStage::Enum query_processing_stage; @@ -183,7 +186,7 @@ private: Stopwatch watch; /// В цикле, кладём все запросы в очередь. - for (size_t i = 0; !interrupt_listener.check(); ++i) + for (size_t i = 0; !interrupt_listener.check() && (!(num_repetions > 0) || i < num_repetions); ++i) { if (i >= queries.size()) i = 0; @@ -194,7 +197,7 @@ private: queue.push(queries[query_index]); - if (watch.elapsedSeconds() > delay) + if (delay > 0 && watch.elapsedSeconds() > delay) { auto total_queries = 0; { @@ -206,6 +209,18 @@ private: report(info_per_interval); watch.restart(); } + + if (num_repetions > 0 && info_total.queries >= num_repetions) + { + std::cout << "The execution is broken since request number of loops is reached\n"; + break; + } + + if (max_time > 0 && info_total.watch.elapsedSeconds() >= max_time) + { + std::cout << "The execution is broken since requested time limit is reached\n"; + break; + } } /// Попросим потоки завершиться. @@ -346,13 +361,15 @@ int main(int argc, char ** argv) desc.add_options() ("help", "produce help message") ("concurrency,c", boost::program_options::value()->default_value(1), "number of parallel queries") - ("delay,d", boost::program_options::value()->default_value(1), "delay between reports in seconds") + ("delay,d", boost::program_options::value()->default_value(1), "delay between reports in seconds (set 0 to disable)") ("host,h", boost::program_options::value()->default_value("localhost"), "") ("port", boost::program_options::value()->default_value(9000), "") ("user", boost::program_options::value()->default_value("default"), "") ("password", boost::program_options::value()->default_value(""), "") ("database", boost::program_options::value()->default_value("default"), "") ("stage", boost::program_options::value()->default_value("complete"), "request query processing up to specified stage") + ("loops,l", boost::program_options::value()->default_value(0), "number of tests repetions") + ("timelimit,t", boost::program_options::value()->default_value(0.), "stop repeating after specified time limit") ("randomize,r", boost::program_options::value()->default_value(false), "randomize order of execution") #define DECLARE_SETTING(TYPE, NAME, DEFAULT) (#NAME, boost::program_options::value (), "Settings.h") #define DECLARE_LIMIT(TYPE, NAME, DEFAULT) (#NAME, boost::program_options::value (), "Limits.h") @@ -392,6 +409,8 @@ int main(int argc, char ** argv) options["password"].as(), options["stage"].as(), options["randomize"].as(), + options["loops"].as(), + options["timelimit"].as(), settings); } catch (const Exception & e) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 09763b5acba..4124a065102 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -431,7 +431,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu } -void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const +void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data, Arena * arena) const { for (size_t j = 0; j < params.aggregates_size; ++j) { @@ -441,7 +441,15 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const * Для того, чтобы потом всё правильно уничтожилось, "откатываем" часть созданных состояний. * Код не очень удобный. */ + char * data_cur = aggregate_data + offsets_of_aggregate_states[j]; aggregate_functions[j]->create(aggregate_data + offsets_of_aggregate_states[j]); + + /// Прописываем указатель на Arena после создания, до этого она не валидна. + if (aggregate_functions[j]->needArena()) + { + //LOG_DEBUG(&Logger::get("Aggregator"), "set arena=" << arena << " for func " << aggregate_functions[j]->getName()); + reinterpret_cast(data_cur)->arena = arena; + } } catch (...) { @@ -557,7 +565,7 @@ void NO_INLINE Aggregator::executeImplCase( method.onNewKey(*it, params.keys_size, i, keys, *aggregates_pool); AggregateDataPtr place = aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place); + createAggregateStates(place, aggregates_pool); aggregate_data = place; } else @@ -677,7 +685,7 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, if ((params.overflow_row || result.type == AggregatedDataVariants::Type::without_key) && !result.without_key) { AggregateDataPtr place = result.aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place); + createAggregateStates(place, result.aggregates_pool); result.without_key = place; } @@ -1003,6 +1011,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( ColumnPlainPtrs & final_aggregate_columns, const Sizes & key_sizes) const { + //LOG_DEBUG(log, "convertToBlockImplFinal start"); for (const auto & value : data) { method.insertKeyIntoColumns(value, key_columns, params.keys_size, key_sizes); @@ -1014,6 +1023,7 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( } destroyImpl(method, data); /// NOTE Можно сделать лучше. + //LOG_DEBUG(log, "convertToBlockImplFinal exit"); } template @@ -1024,6 +1034,8 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( AggregateColumnsData & aggregate_columns, const Sizes & key_sizes) const { + //LOG_DEBUG(log, "convertToBlockImplFinal start"); + for (auto & value : data) { method.insertKeyIntoColumns(value, key_columns, params.keys_size, key_sizes); @@ -1034,6 +1046,7 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( Method::getAggregateData(value.second) = nullptr; } + //LOG_DEBUG(log, "convertToBlockImplFinal exit"); } @@ -1158,6 +1171,7 @@ BlocksList Aggregator::prepareBlocksAndFillSingleLevel(AggregatedDataVariants & const Sizes & key_sizes, bool final) { + //LOG_DEBUG(log, "prepareBlocksAndFillSingleLevel start"); #define M(NAME) \ else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \ @@ -1168,6 +1182,7 @@ BlocksList Aggregator::prepareBlocksAndFillSingleLevel(AggregatedDataVariants & #undef M else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + //LOG_DEBUG(log, "prepareBlocksAndFillSingleLevel exit"); }; BlocksList blocks; @@ -1337,6 +1352,8 @@ void NO_INLINE Aggregator::mergeDataImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + + //LOG_DEBUG(log, "mergeDataImpl"); } else { @@ -1373,6 +1390,8 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( aggregate_functions[i]->destroy( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + //LOG_DEBUG(log, "mergeDataNoMoreKeysImpl"); + Method::getAggregateData(it->second) = nullptr; } @@ -1402,6 +1421,8 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( aggregate_functions[i]->destroy( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + //LOG_DEBUG(log, "mergeDataOnlyExistingKeysImpl"); + Method::getAggregateData(it->second) = nullptr; } @@ -1426,6 +1447,8 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy(current_data + offsets_of_aggregate_states[i]); + //LOG_DEBUG(log, "mergeWithoutKeyDataImpl"); + current_data = nullptr; } } @@ -1441,6 +1464,8 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( /// Все результаты агрегации соединяем с первым. for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) { + //LOG_DEBUG(log, "mergeSingleLevelDataImpl for_begin " << i << "/" << size-1); + if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) break; @@ -1462,7 +1487,10 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( /// current не будет уничтожать состояния агрегатных функций в деструкторе current.aggregator = nullptr; + + //LOG_DEBUG(log, "mergeSingleLevelDataImpl for_end " << i << "/" << size-1); } + //LOG_DEBUG(log, "mergeSingleLevelDataImpl exit"); } @@ -1795,7 +1823,7 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( method.onNewKey(*it, params.keys_size, i, keys, *aggregates_pool); AggregateDataPtr place = aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place); + createAggregateStates(place, aggregates_pool); aggregate_data = place; } else @@ -1808,6 +1836,8 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( aggregate_functions[j]->merge( value + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]); + + LOG_DEBUG(log, "mergeStreamsImplCase"); } /// Пораньше освобождаем память. @@ -1845,7 +1875,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( if (!res) { AggregateDataPtr place = result.aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place); + createAggregateStates(place, result.aggregates_pool); res = place; } @@ -2284,6 +2314,8 @@ void NO_INLINE Aggregator::destroyImpl( Method & method, Table & table) const { + //LOG_DEBUG(log, "destroyImpl start"); + for (auto elem : table) { AggregateDataPtr & data = Method::getAggregateData(elem.second); @@ -2301,6 +2333,8 @@ void NO_INLINE Aggregator::destroyImpl( data = nullptr; } + + //LOG_DEBUG(log, "destroyImpl start"); } From 0a93de07273626ab98d3f4a9d62805b475f4b38f Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 19 Sep 2016 16:31:42 +0300 Subject: [PATCH 02/15] Leave only HashSet implementation. [#METR-22071] --- dbms/include/DB/Interpreters/Aggregator.h | 96 +++--- .../AggregateFunctionGroupUniqArray.cpp | 316 ++++-------------- dbms/src/Interpreters/Aggregator.cpp | 3 +- 3 files changed, 108 insertions(+), 307 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 8eb30839e44..9c1129bc843 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -39,20 +39,20 @@ namespace ErrorCodes /** Разные структуры данных, которые могут использоваться для агрегации * Для эффективности, сами данные для агрегации кладутся в пул. * Владение данными (состояний агрегатных функций) и пулом - * захватывается позднее - в функции convertToBlocks, объектом ColumnAggregateFunction. + * захватывается позднее - в функции convertToBlocks, объектом ColumnAggregateFunction. * * Большинство структур данных существует в двух вариантах: обычном и двухуровневом (TwoLevel). * Двухуровневая хэш-таблица работает чуть медленнее при маленьком количестве различных ключей, - * но при большом количестве различных ключей лучше масштабируется, так как позволяет - * распараллелить некоторые операции (слияние, пост-обработку) естественным образом. + * но при большом количестве различных ключей лучше масштабируется, так как позволяет + * распараллелить некоторые операции (слияние, пост-обработку) естественным образом. * * Чтобы обеспечить эффективную работу в большом диапазоне условий, - * сначала используются одноуровневые хэш-таблицы, - * а при достижении количеством различных ключей достаточно большого размера, - * они конвертируются в двухуровневые. + * сначала используются одноуровневые хэш-таблицы, + * а при достижении количеством различных ключей достаточно большого размера, + * они конвертируются в двухуровневые. * * PS. Существует много различных подходов к эффективной реализации параллельной и распределённой агрегации, - * лучшим образом подходящих для разных случаев, и этот подход - всего лишь один из них, выбранный по совокупности причин. + * лучшим образом подходящих для разных случаев, и этот подход - всего лишь один из них, выбранный по совокупности причин. */ using AggregatedDataWithoutKey = AggregateDataPtr; @@ -116,8 +116,8 @@ struct AggregationMethodOneNumber }; /// Из значения в хэш-таблице получить AggregateDataPtr. - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } /** Разместить дополнительные данные, если это необходимо, в случае, когда в хэш-таблицу был вставлен новый ключ. */ @@ -186,8 +186,8 @@ struct AggregationMethodString } }; - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) { @@ -247,8 +247,8 @@ struct AggregationMethodFixedString } }; - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) { @@ -301,8 +301,8 @@ struct AggregationMethodKeysFixed } }; - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) { @@ -360,8 +360,8 @@ struct AggregationMethodConcat } }; - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) { @@ -384,7 +384,7 @@ struct AggregationMethodConcat { /** Исправление, если все ключи - пустые массивы. Для них в хэш-таблицу записывается StringRef нулевой длины, но с ненулевым указателем. * Но при вставке в хэш-таблицу, такой StringRef оказывается равен другому ключу нулевой длины, - * у которого указатель на данные может быть любым мусором и использовать его нельзя. + * у которого указатель на данные может быть любым мусором и использовать его нельзя. */ for (size_t i = 0; i < keys_size; ++i) key_columns[i]->insertDefault(); @@ -438,8 +438,8 @@ struct AggregationMethodSerialized } }; - static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value; } static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) { @@ -497,8 +497,8 @@ struct AggregationMethodHashed } }; - static AggregateDataPtr & getAggregateData(Mapped & value) { return value.second; } - static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value.second; } + static AggregateDataPtr & getAggregateData(Mapped & value) { return value.second; } + static const AggregateDataPtr & getAggregateData(const Mapped & value) { return value.second; } static void onNewKey(typename Data::value_type & value, size_t keys_size, size_t i, StringRefs & keys, Arena & pool) { @@ -525,14 +525,14 @@ struct AggregatedDataVariants : private boost::noncopyable * - при агрегации, состояния создаются в пуле с помощью функции IAggregateFunction::create (внутри - placement new произвольной структуры); * - они должны быть затем уничтожены с помощью IAggregateFunction::destroy (внутри - вызов деструктора произвольной структуры); * - если агрегация завершена, то, в функции Aggregator::convertToBlocks, указатели на состояния агрегатных функций - * записываются в ColumnAggregateFunction; ColumnAggregateFunction "захватывает владение" ими, то есть - вызывает destroy в своём деструкторе. + * записываются в ColumnAggregateFunction; ColumnAggregateFunction "захватывает владение" ими, то есть - вызывает destroy в своём деструкторе. * - если при агрегации, до вызова Aggregator::convertToBlocks вылетело исключение, - * то состояния агрегатных функций всё-равно должны быть уничтожены, - * иначе для сложных состояний (наприемер, AggregateFunctionUniq), будут утечки памяти; + * то состояния агрегатных функций всё-равно должны быть уничтожены, + * иначе для сложных состояний (наприемер, AggregateFunctionUniq), будут утечки памяти; * - чтобы, в этом случае, уничтожить состояния, в деструкторе вызывается метод Aggregator::destroyAggregateStates, - * но только если переменная aggregator (см. ниже) не nullptr; + * но только если переменная aggregator (см. ниже) не nullptr; * - то есть, пока вы не передали владение состояниями агрегатных функций в ColumnAggregateFunction, установите переменную aggregator, - * чтобы при возникновении исключения, состояния были корректно уничтожены. + * чтобы при возникновении исключения, состояния были корректно уничтожены. * * PS. Это можно исправить, сделав пул, который знает о том, какие состояния агрегатных функций и в каком порядке в него уложены, и умеет сам их уничтожать. * Но это вряд ли можно просто сделать, так как в этот же пул планируется класть строки переменной длины. @@ -556,23 +556,23 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> key32; std::unique_ptr> key64; - std::unique_ptr> key_string; - std::unique_ptr> key_fixed_string; - std::unique_ptr> keys128; - std::unique_ptr> keys256; - std::unique_ptr> hashed; - std::unique_ptr> concat; - std::unique_ptr> serialized; + std::unique_ptr> key_string; + std::unique_ptr> key_fixed_string; + std::unique_ptr> keys128; + std::unique_ptr> keys256; + std::unique_ptr> hashed; + std::unique_ptr> concat; + std::unique_ptr> serialized; std::unique_ptr> key32_two_level; std::unique_ptr> key64_two_level; std::unique_ptr> key_string_two_level; - std::unique_ptr> key_fixed_string_two_level; - std::unique_ptr> keys128_two_level; - std::unique_ptr> keys256_two_level; - std::unique_ptr> hashed_two_level; - std::unique_ptr> concat_two_level; - std::unique_ptr> serialized_two_level; + std::unique_ptr> key_fixed_string_two_level; + std::unique_ptr> keys128_two_level; + std::unique_ptr> keys256_two_level; + std::unique_ptr> hashed_two_level; + std::unique_ptr> concat_two_level; + std::unique_ptr> serialized_two_level; /// В этом и подобных макросах, вариант without_key не учитывается. #define APPLY_FOR_AGGREGATED_VARIANTS(M) \ @@ -763,7 +763,7 @@ using ManyAggregatedDataVariants = std::vector; * В случае наличия group_by_overflow_mode = 'any', данные агрегируются как обычно, кроме ключей, не поместившихся в max_rows_to_group_by. * Для этих ключей, данные агрегируются в одну дополнительную строку - далее см. под названиями overflow_row, overflows... * Позже, состояния агрегатных функций для всех строк (прошедших через HAVING) мерджатся в одну, - * а также к ним прибавляется или не прибавляется (в зависимости от настройки totals_mode) также overflow_row - это и будет TOTALS. + * а также к ним прибавляется или не прибавляется (в зависимости от настройки totals_mode) также overflow_row - это и будет TOTALS. */ @@ -792,7 +792,7 @@ public: /// Настройки двухуровневой агрегации (используется для большого количества ключей). /** При каком количестве ключей или размере состояния агрегации в байтах, - * начинает использоваться двухуровневая агрегация. Достаточно срабатывания хотя бы одного из порогов. + * начинает использоваться двухуровневая агрегация. Достаточно срабатывания хотя бы одного из порогов. * 0 - соответствующий порог не задан. */ const size_t group_by_two_level_threshold; @@ -850,7 +850,7 @@ public: * Если overflow_row = true, то агрегаты для строк, не попавших в max_rows_to_group_by, кладутся в первый блок. * * Если final = false, то в качестве столбцов-агрегатов создаются ColumnAggregateFunction с состоянием вычислений, - * которые могут быть затем объединены с другими состояниями (для распределённой обработки запроса). + * которые могут быть затем объединены с другими состояниями (для распределённой обработки запроса). * Если final = true, то в качестве столбцов-агрегатов создаются столбцы с готовыми значениями. */ BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; @@ -917,8 +917,8 @@ protected: * Внутренний цикл (для случая without_key) получается почти в два раза компактнее; прирост производительности около 30%. * * 2. Вызов по указателю на функцию лучше, чем виртуальный вызов, потому что в случае виртуального вызова, - * GCC 5.1.2 генерирует код, который на каждой итерации цикла заново грузит из памяти в регистр адрес функции - * (значение по смещению в таблице виртуальных функций). + * GCC 5.1.2 генерирует код, который на каждой итерации цикла заново грузит из памяти в регистр адрес функции + * (значение по смещению в таблице виртуальных функций). */ struct AggregateFunctionInstruction { @@ -947,9 +947,9 @@ protected: /** Динамически скомпилированная библиотека для агрегации, если есть. * Смысл динамической компиляции в том, чтобы специализировать код - * под конкретный список агрегатных функций. + * под конкретный список агрегатных функций. * Это позволяет развернуть цикл по созданию и обновлению состояний агрегатных функций, - * а также использовать вместо виртуальных вызовов inline-код. + * а также использовать вместо виртуальных вызовов inline-код. */ struct CompiledData { @@ -977,7 +977,7 @@ protected: void initialize(const Block & block); /** Установить блок - пример результата, - * только если он ещё не был установлен. + * только если он ещё не был установлен. */ void setSampleBlock(const Block & block); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 6a1bc01465a..c9d6f635b65 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -19,82 +19,47 @@ namespace DB namespace { -template -class Function_DataNaive; - -template -class Function_Fair; - -template -class Function_Map128; - - -template -//using Function_Default = Function_Map128; -using Function_Default = Function_Fair; - -struct DataNaive : public IAggregateDataWithArena -{ - using Set = std::unordered_set; - Set value; -}; - -struct DataFair : public IAggregateDataWithArena +struct DataGroupUniqArray : public IAggregateDataWithArena { using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory<16>>; Set value; }; -struct DataMap128 : public IAggregateDataWithArena -{ - using Set = HashMap, HashTableAllocatorWithStackMemory<16>>; - Set value; -#ifdef AVERAGE_STAT - mutable bool was_merged = false; -#endif -}; - - - -template -inline StringRef packToStringRef(const IColumn & column, size_t row_num, Arena & arena) +template +inline StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena) { const char * begin = nullptr; return column.serializeValueIntoArena(row_num, arena, begin); } template <> -inline StringRef packToStringRef(const IColumn & column, size_t row_num, Arena & arena) +inline StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena) { - StringRef str = column.getDataAt(row_num); - str.data = arena.insert(str.data, str.size); - return str; + return column.getDataAt(row_num); } -template -inline void unpackFromStringRef(StringRef str, IColumn & data_to) +template +inline void deserializeAndInsert(StringRef str, IColumn & data_to) { data_to.deserializeAndInsertFromArena(str.data); } template <> -inline void unpackFromStringRef(StringRef str, IColumn & data_to) +inline void deserializeAndInsert(StringRef str, IColumn & data_to) { data_to.insertData(str.data, str.size); } -template -class IAggreagteFunctionGroupUniqArray : public IUnaryAggregateFunction +template +class AggreagteFunctionGroupUniqArrayGeneric : public IUnaryAggregateFunction> { -protected: - mutable DataTypePtr input_data_type; -public: + using State = DataGroupUniqArray; - using State = Data; +public: String getName() const override { return "groupUniqArray"; } @@ -108,96 +73,57 @@ public: return std::make_shared(input_data_type->clone()); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED); + + auto & set = this->data(place).value; + writeVarUInt(set.size(), buf); + + for (auto & elem: set) + { + writeStringBinary(elem, buf); + } } void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED); - } - void destroy(AggregateDataPtr place) const noexcept override - { - this->data(place).~Data(); - //LOG_DEBUG(&Logger::get("IAggreagteFunctionGroupUniqArray"), "destroyed."); - } -}; + State::Set & set = this->data(place).value; + size_t size; + readVarUInt(size, buf); + //TODO: set.reserve(size); -template -struct Function_DataNaive : public IAggreagteFunctionGroupUniqArray> -{ - using State = DataNaive; - - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const - { - StringRef serialized_elem; - if (is_string_column) - serialized_elem = static_cast(column).getDataAt(row_num); - else - serialized_elem = packToStringRef(column, row_num, *this->data(place).arena); - - this->data(place).value.insert(serialized_elem.toString()); - } - - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override - { - State::Set & cur_set = this->data(place).value; - const State::Set & rhs_set = this->data(rhs).value; - - for (auto it = begin(rhs_set); it != end(rhs_set); ++it) + std::string str_buf; + for (size_t i = 0; i < size; i++) { - cur_set.insert(*it); + readStringBinary(str_buf, buf); + set.insert(StringRef(str_buf)); } } - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override - { - ColumnArray & arr_to = static_cast(to); - ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); - IColumn & data_to = arr_to.getData(); - - const State::Set & set = this->data(place).value; - size_t size = set.size(); - offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size); - - for (auto it = begin(set); it != end(set); ++it) - { - unpackFromStringRef(*it, data_to); - } - } -}; - -template -struct Function_Fair : public IAggreagteFunctionGroupUniqArray> -{ - using State = DataFair; - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const { - if (!is_string_column) + auto arena = this->data(place).arena; + auto & set = this->data(place).value; + + bool inserted; + State::Set::iterator it; + + StringRef str_serialized = getSerialization(column, row_num, *arena); + set.emplace(str_serialized, it, inserted); + + if (!is_plain_column) { - StringRef serialized_elem = packToStringRef(column, row_num, *this->data(place).arena); - - bool inserted; - State::Set::iterator it; - this->data(place).value.emplace(serialized_elem, it, inserted); - if (!likely(inserted)) - this->data(place).arena->rollback(serialized_elem.size); + arena->rollback(str_serialized.size); } else { - StringRef str = column.getDataAt(row_num); - - bool inserted; - State::Set::iterator it; - this->data(place).value.emplace(str, it, inserted); - if (unlikely(inserted)) { - it->data = this->data(place).arena->insert(str.data, str.size); + it->data = arena->insert(str_serialized.data, str_serialized.size); } } } @@ -215,147 +141,16 @@ struct Function_Fair : public IAggreagteFunctionGroupUniqArraydata(place).value; - size_t size = set.size(); - offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size); + auto & set = this->data(place).value; + offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + set.size()); - for (auto it = set.begin(); it != set.end(); ++it) + for (auto & elem : set) { - unpackFromStringRef(*it, data_to); + deserializeAndInsert(elem, data_to); } } }; -template -struct Function_Map128 : public IAggreagteFunctionGroupUniqArray> -{ - using State = DataMap128; - -#ifdef AVERAGE_STAT - mutable long sum_size_on_result = 0, sum_size_before_merge = 0; - mutable long cnt_on_result = 0, cnt_before_merge = 0; -#endif - - inline static UInt128 getUInt128Descriptor(StringRef elem_serialized) - { - UInt128 elem_desc; - SipHash hasher; - hasher.update(elem_serialized.data, elem_serialized.size); - hasher.get128(elem_desc.first, elem_desc.second); - return elem_desc; - } - - void addImpl_ownHashing(AggregateDataPtr place, const IColumn & column, size_t row_num) const - { - StringRef elem_serialized; - if (is_string_column) - elem_serialized = column.getDataAt(row_num); - else - elem_serialized = packToStringRef(column, row_num, *this->data(place).arena); - - auto elem_desc = getUInt128Descriptor(elem_serialized); - - bool inserted; - State::Set::iterator it; - this->data(place).value.emplace(elem_desc, it, inserted); - - if (likely(inserted)) - { - if (is_string_column) - elem_serialized.data = this->data(place).arena->insert(elem_serialized.data, elem_serialized.size); - it->second = elem_serialized; - } - else - { - /** The probability that two different elems has the same UInt128 descriptor is extremely slow. - * Don't handle this case. Free arena's memory, there are no need to store duplicates. - */ - if (!is_string_column) - this->data(place).arena->rollback(elem_serialized.size); - } - } - - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const - { - SipHash hasher; - column.updateHashWithValue(row_num, hasher); - - UInt128 elem_desc; - hasher.get128(elem_desc.first, elem_desc.second); - - bool inserted; - State::Set::iterator it; - this->data(place).value.emplace(elem_desc, it, inserted); - - if (likely(inserted)) - { - Arena & arena = *this->data(place).arena; - StringRef & elem_serialization = it->second; - - if (is_string_column) - { - elem_serialization = column.getDataAt(row_num); - elem_serialization.data = arena.insert(elem_serialization.data, elem_serialization.size); - } - else - { - const char * ptr = nullptr; - elem_serialization = column.serializeValueIntoArena(row_num, arena, ptr); - } - } - } - - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override - { - State::Set & cur_set = this->data(place).value; - const State::Set & rhs_set = this->data(rhs).value; - - bool inserted; - State::Set::iterator it_cur; - for (auto it_rhs = rhs_set.begin(); it_rhs != rhs_set.end(); ++it_rhs) - { - cur_set.emplace(it_rhs->first, it_cur, inserted); - if (inserted) - it_cur->second = it_rhs->second; - } - -#ifdef AVERAGE_STAT - if (!this->data(rhs).was_merged) - { - sum_size_before_merge += rhs_set.size(); - cnt_before_merge += 1; - } - - this->data(place).was_merged = true; - this->data(rhs).was_merged = true; -#endif - } - - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override - { - ColumnArray & arr_to = static_cast(to); - ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); - IColumn & data_to = arr_to.getData(); - - const State::Set & set = this->data(place).value; - size_t size = set.size(); - offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size); - - for (auto && value : set) - { - unpackFromStringRef(value.second, data_to); - } - -#ifdef AVERAGE_STAT - sum_size_on_result += set.size(); - cnt_on_result += 1; - - LOG_DEBUG(&Logger::get("IAggreagteFunctionGroupUniqArray"), "sum_size_on_result=" << sum_size_on_result << ", cnt_on_result=" << cnt_on_result << ", average=" << sum_size_on_result / cnt_on_result); - LOG_DEBUG(&Logger::get("IAggreagteFunctionGroupUniqArray"), "sum_size_before_merge=" << sum_size_before_merge << ", cnt_before_merge=" << cnt_before_merge << ", average=" << sum_size_before_merge / cnt_before_merge); -#endif - } -}; - } @@ -366,16 +161,23 @@ static IAggregateFunction * createWithExtraTypes(const IDataType & argument_type { if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; else if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; - else if (typeid_cast(&argument_type)) - { - return new Function_Default; - } else { - return new Function_Default<>; - } + /// Check that we can use plain version of AggreagteFunctionGroupUniqArrayGeneric - //return nullptr; + if (typeid_cast(&argument_type)) + return new AggreagteFunctionGroupUniqArrayGeneric; + + auto * array_type = typeid_cast(&argument_type); + if (array_type) + { + auto nested_type = array_type->getNestedType(); + if (nested_type->isNumeric() || typeid_cast(nested_type.get())) + return new AggreagteFunctionGroupUniqArrayGeneric; + } + + return new AggreagteFunctionGroupUniqArrayGeneric; + } } AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & name, const DataTypes & argument_types) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 4124a065102..532539f6236 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -447,7 +447,6 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data, Arena /// Прописываем указатель на Arena после создания, до этого она не валидна. if (aggregate_functions[j]->needArena()) { - //LOG_DEBUG(&Logger::get("Aggregator"), "set arena=" << arena << " for func " << aggregate_functions[j]->getName()); reinterpret_cast(data_cur)->arena = arena; } } @@ -1055,7 +1054,7 @@ Block Aggregator::prepareBlockAndFill( AggregatedDataVariants & data_variants, bool final, size_t rows, - Filler && filler) const + Filler && filler) const { Block res = sample.cloneEmpty(); From 451a4b1d6784cd8561d56bd4102aa7407a6c9f96 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 20 Sep 2016 01:30:40 +0300 Subject: [PATCH 03/15] Added new Arena parameter to add() method of IAggregateFunction. [#METR-22071] --- .../AggregateFunctionArray.h | 8 +++---- .../AggregateFunctions/AggregateFunctionAvg.h | 2 +- .../AggregateFunctionGroupArray.h | 4 ++-- .../AggregateFunctionGroupUniqArray.h | 2 +- .../AggregateFunctions/AggregateFunctionIf.h | 8 +++---- .../AggregateFunctionMerge.h | 6 ++--- .../AggregateFunctionQuantile.h | 4 ++-- .../AggregateFunctionQuantileDeterministic.h | 4 ++-- .../AggregateFunctionQuantileExact.h | 4 ++-- .../AggregateFunctionQuantileExactWeighted.h | 4 ++-- .../AggregateFunctionQuantileTDigest.h | 8 +++---- .../AggregateFunctionQuantileTiming.h | 8 +++---- .../AggregateFunctionSequenceMatch.h | 6 ++--- .../AggregateFunctionState.h | 8 +++---- .../AggregateFunctions/AggregateFunctionSum.h | 2 +- .../AggregateFunctionUniq.h | 8 +++---- .../AggregateFunctionUniqUpTo.h | 8 +++---- .../AggregateFunctionsArgMinMax.h | 2 +- .../AggregateFunctionsMinMaxAny.h | 2 +- .../AggregateFunctionsStatistics.h | 4 ++-- .../AggregateFunctions/IAggregateFunction.h | 14 +++++------ .../IBinaryAggregateFunction.h | 8 +++---- .../INullaryAggregateFunction.h | 4 ++-- .../IUnaryAggregateFunction.h | 21 +++++++++++++---- dbms/include/DB/Functions/FunctionsArray.h | 2 +- dbms/include/DB/Interpreters/Aggregator.h | 3 ++- .../DB/Interpreters/SpecializedAggregator.h | 14 ++++++----- .../AggregateFunctionGroupUniqArray.cpp | 6 ++--- .../GraphiteRollupSortedBlockInputStream.cpp | 2 +- dbms/src/Interpreters/Aggregator.cpp | 23 ++++++++++--------- 30 files changed, 107 insertions(+), 92 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h index 9f776924611..58fc8ebd7a0 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h @@ -83,7 +83,7 @@ public: return nested_func->alignOfData(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { const IColumn * nested[num_agruments]; @@ -97,7 +97,7 @@ public: size_t end = offsets[row_num]; for (size_t i = begin; i < end; ++i) - nested_func->add(place, nested, i); + nested_func->add(place, nested, i, nullptr); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override @@ -120,9 +120,9 @@ public: nested_func->insertResultInto(place, to); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h index 87e32867f40..8d03211c003 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h @@ -42,7 +42,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).sum += static_cast &>(column).getData()[row_num]; ++this->data(place).count; diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h index f73b61a31f5..8bff6d57553 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h @@ -51,7 +51,7 @@ public: { } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).value.push_back(static_cast &>(column).getData()[row_num]); } @@ -128,7 +128,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { data(place).value.push_back(Array::value_type()); column.get(row_num, data(place).value.back()); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 394c60cb9f1..d9919067ce4 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -56,7 +56,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).value.insert(static_cast &>(column).getData()[row_num]); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h index ee3ef7b59c3..c699cbaee3f 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h @@ -77,10 +77,10 @@ public: return nested_func->alignOfData(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { if (static_cast(*columns[num_agruments - 1]).getData()[row_num]) - nested_func->add(place, columns, row_num); + nested_func->add(place, columns, row_num, nullptr); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override @@ -103,9 +103,9 @@ public: nested_func->insertResultInto(place, to); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h index 26019025e9b..caa7c46d222 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h @@ -79,7 +79,7 @@ public: return nested_func->alignOfData(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { nested_func->merge(place, static_cast(*columns[0]).getData()[row_num]); } @@ -104,9 +104,9 @@ public: nested_func->insertResultInto(place, to); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h index 07d8efc0134..b157f474013 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h @@ -68,7 +68,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).sample.insert(static_cast &>(column).getData()[row_num]); } @@ -145,7 +145,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).sample.insert(static_cast &>(column).getData()[row_num]); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h index 8041a623f33..6c1096f9d2a 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h @@ -74,7 +74,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, const IColumn & determinator, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, const IColumn & determinator, size_t row_num, Arena *) const { this->data(place).sample.insert(static_cast &>(column).getData()[row_num], determinator.get64(row_num)); @@ -158,7 +158,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, const IColumn & determinator, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, const IColumn & determinator, size_t row_num, Arena *) const { this->data(place).sample.insert(static_cast &>(column).getData()[row_num], determinator.get64(row_num)); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h index 589556d2f2b..17f23620c14 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h @@ -68,7 +68,7 @@ public: level = apply_visitor(FieldVisitorConvertToNumber(), params[0]); } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).array.push_back(static_cast &>(column).getData()[row_num]); } @@ -150,7 +150,7 @@ public: levels.set(params); } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).array.push_back(static_cast &>(column).getData()[row_num]); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h index a970f06753f..72394daae17 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h @@ -74,7 +74,7 @@ public: level = apply_visitor(FieldVisitorConvertToNumber(), params[0]); } - void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const { this->data(place) .map[static_cast &>(column_value).getData()[row_num]] @@ -189,7 +189,7 @@ public: levels.set(params); } - void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const { this->data(place) .map[static_cast &>(column_value).getData()[row_num]] diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h index 3c24b9679a7..a5ead2ebc2e 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h @@ -382,7 +382,7 @@ public: level = apply_visitor(FieldVisitorConvertToNumber(), params[0]); } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).digest.add(params, static_cast &>(column).getData()[row_num]); } @@ -449,7 +449,7 @@ public: level = apply_visitor(FieldVisitorConvertToNumber(), params[0]); } - void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const { this->data(place).digest.add(params, static_cast &>(column_value).getData()[row_num], @@ -513,7 +513,7 @@ public: levels.set(params); } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).digest.add(params, static_cast &>(column).getData()[row_num]); } @@ -593,7 +593,7 @@ public: levels.set(params); } - void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const { this->data(place).digest.add(params, static_cast &>(column_value).getData()[row_num], diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h index 2ff71affd8b..83f99bb3bd0 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h @@ -815,7 +815,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).insert(static_cast &>(column).getData()[row_num]); } @@ -873,7 +873,7 @@ public: level = apply_visitor(FieldVisitorConvertToNumber(), params[0]); } - void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const { this->data(place).insertWeighted( static_cast &>(column_value).getData()[row_num], @@ -930,7 +930,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).insert(static_cast &>(column).getData()[row_num]); } @@ -991,7 +991,7 @@ public: levels.set(params); } - void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const { this->data(place).insertWeighted( static_cast &>(column_value).getData()[row_num], diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h index d75a9fd9854..f39826418d4 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -192,7 +192,7 @@ public: parsePattern(); } - void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override { const auto timestamp = static_cast(columns[0])->getData()[row_num]; @@ -234,9 +234,9 @@ public: static_cast(to).getData().push_back(match(events_it, events_end)); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h index b8ed12c229e..781a8027db0 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h @@ -72,9 +72,9 @@ public: return nested_func->alignOfData(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - nested_func->add(place, columns, row_num); + nested_func->add(place, columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override @@ -102,9 +102,9 @@ public: AggregateFunctionPtr getNestedFunction() const { return nested_func_owner; } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num); + return static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h index 3342a85fd39..182b9fbc123 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h @@ -40,7 +40,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).sum += static_cast &>(column).getData()[row_num]; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 1b0e40b340f..6abb9334447 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -340,7 +340,7 @@ public: { } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { detail::OneAdder::addImpl(this->data(place), column, row_num); } @@ -395,7 +395,7 @@ public: num_args = arguments.size(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { this->data(place).set.insert(UniqVariadicHash::apply(num_args, columns, row_num)); } @@ -420,9 +420,9 @@ public: static_cast(to).getData().push_back(this->data(place).set.size()); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h index 0bf345c723e..a0875c7356e 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -151,7 +151,7 @@ public: threshold = threshold_param; } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).addImpl(column, row_num, threshold); } @@ -224,7 +224,7 @@ public: threshold = threshold_param; } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { this->data(place).insert(UniqVariadicHash::apply(num_args, columns, row_num), threshold); } @@ -249,9 +249,9 @@ public: static_cast(to).getData().push_back(this->data(place).size()); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *arena) { - return static_cast(*that).add(place, columns, row_num); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h index b6ac1216e73..fe84ff9eb11 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h @@ -41,7 +41,7 @@ public: type_val = arguments[1]; } - void addImpl(AggregateDataPtr place, const IColumn & column_arg, const IColumn & column_max, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_arg, const IColumn & column_max, size_t row_num, Arena *) const { if (this->data(place).value.changeIfBetter(column_max, row_num)) this->data(place).result.change(column_arg, row_num); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h index 1f459e1195f..05780dc12ac 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h @@ -649,7 +649,7 @@ public: } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).changeIfBetter(column, row_num); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h index 9d25e8c4317..dde98f84157 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h @@ -129,7 +129,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const { this->data(place).update(column, row_num); } @@ -397,7 +397,7 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - void addImpl(AggregateDataPtr place, const IColumn & column_left, const IColumn & column_right, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column_left, const IColumn & column_right, size_t row_num, Arena *) const { this->data(place).update(column_left, column_right, row_num); } diff --git a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h index d05a3d8b25e..f9f2e06d8cc 100644 --- a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h @@ -86,7 +86,7 @@ public: virtual size_t alignOfData() const = 0; /// Добавить значение. columns - столбцы, содержащие аргументы, row_num - номер строки в столбцах. - virtual void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const = 0; + virtual void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0; /// Объединить состояние с другим состоянием. virtual void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const = 0; @@ -109,7 +109,7 @@ public: * В этом случае структура данных для агррегации должна быть унаследована от IAggregateDataWithArena. * Указатель на необходимую Arena можно будет получить с помощью IAggregateDataWithArena::getArena(). */ - virtual bool needArena() const { return false; } + //virtual bool needArena() const { return false; } /** Внутренний цикл, использующий указатель на функцию, получается лучше, чем использующий виртуальную функцию. @@ -118,7 +118,7 @@ public: * Это даёт падение производительности на простых запросах в районе 12%. * После появления более хороших компиляторов, код можно будет убрать. */ - using AddFunc = void (*)(const IAggregateFunction *, AggregateDataPtr, const IColumn **, size_t); + using AddFunc = void (*)(const IAggregateFunction *, AggregateDataPtr, const IColumn **, size_t, Arena *); virtual AddFunc getAddressOfAddFunction() const = 0; }; @@ -160,10 +160,10 @@ public: return __alignof__(Data); } - bool needArena() const override - { - return std::is_base_of(); - } +// bool needArena() const override +// { +// return std::is_base_of(); +// } }; diff --git a/dbms/include/DB/AggregateFunctions/IBinaryAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IBinaryAggregateFunction.h index c93c47eb2b9..a74f7e6a523 100644 --- a/dbms/include/DB/AggregateFunctions/IBinaryAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IBinaryAggregateFunction.h @@ -25,14 +25,14 @@ public: getDerived().setArgumentsImpl(arguments); } - void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num) const override final + void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena * arena) const override final { - getDerived().addImpl(place, *columns[0], *columns[1], row_num); + getDerived().addImpl(place, *columns[0], *columns[1], row_num, arena); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).addImpl(place, *columns[0], *columns[1], row_num); + static_cast(*that).addImpl(place, *columns[0], *columns[1], row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/INullaryAggregateFunction.h b/dbms/include/DB/AggregateFunctions/INullaryAggregateFunction.h index 08452500253..4af5bd87d9e 100644 --- a/dbms/include/DB/AggregateFunctions/INullaryAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/INullaryAggregateFunction.h @@ -26,12 +26,12 @@ public: } /// Добавить значение. - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override final + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override final { getDerived().addImpl(place); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) { return static_cast(*that).addImpl(place); } diff --git a/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h index f3d517d651e..62ee3a5ebe5 100644 --- a/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h @@ -27,14 +27,27 @@ public: } /// Добавить значение. - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const override final + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override final { - getDerived().addImpl(place, *columns[0], row_num); + getDerived().addImpl(place, *columns[0], row_num, arena); } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num) +// template +// static void addFreeWrapper(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) +// { +// static_cast(*that).addImpl(place, *columns[0], row_num); +// } +// +// template <> +// static void addFreeWrapper(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) +// { +// static_cast(*that).addImpl(place, *columns[0], row_num, arena); +// } + + static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).addImpl(place, *columns[0], row_num); + static_cast(*that).addImpl(place, *columns[0], row_num, arena); + //addFreeWrapper(that, place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override { return &addFree; } diff --git a/dbms/include/DB/Functions/FunctionsArray.h b/dbms/include/DB/Functions/FunctionsArray.h index bc5087ce2b1..2fd4aaa443f 100644 --- a/dbms/include/DB/Functions/FunctionsArray.h +++ b/dbms/include/DB/Functions/FunctionsArray.h @@ -2714,7 +2714,7 @@ public: try { for (size_t j = current_offset; j < next_offset; ++j) - agg_func.add(place, aggregate_arguments, j); + agg_func.add(place, aggregate_arguments, j, nullptr); agg_func.insertResultInto(place, res_col); } diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 9c1129bc843..3a0cb150517 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -1024,7 +1024,8 @@ protected: void executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t rows, - AggregateFunctionInstruction * aggregate_instructions) const; + AggregateFunctionInstruction * aggregate_instructions, + Arena * arena) const; template void writeToTemporaryFileImpl( diff --git a/dbms/include/DB/Interpreters/SpecializedAggregator.h b/dbms/include/DB/Interpreters/SpecializedAggregator.h index 3d236363dea..d75d95c8e5f 100644 --- a/dbms/include/DB/Interpreters/SpecializedAggregator.h +++ b/dbms/include/DB/Interpreters/SpecializedAggregator.h @@ -74,11 +74,12 @@ struct AggregateFunctionsUpdater const Sizes & offsets_of_aggregate_states_, Aggregator::AggregateColumns & aggregate_columns_, AggregateDataPtr & value_, - size_t row_num_) + size_t row_num_ + Arena * arena_ = nullptr) : aggregate_functions(aggregate_functions_), offsets_of_aggregate_states(offsets_of_aggregate_states_), aggregate_columns(aggregate_columns_), - value(value_), row_num(row_num_) + value(value_), row_num(row_num_), arena(arena_) { } @@ -90,6 +91,7 @@ struct AggregateFunctionsUpdater Aggregator::AggregateColumns & aggregate_columns; AggregateDataPtr & value; size_t row_num; + Arena * arena; }; template @@ -98,7 +100,7 @@ void AggregateFunctionsUpdater::operator()() static_cast(aggregate_functions[column_num])->add( value + offsets_of_aggregate_states[column_num], &aggregate_columns[column_num][0], - row_num); + row_num, arena); } struct AggregateFunctionsCreator @@ -205,7 +207,7 @@ void NO_INLINE Aggregator::executeSpecializedCase( /// Добавляем значения в агрегатные функции. AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i)); + aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i, aggregates_pool)); method.onExistingKey(key, keys, *aggregates_pool); continue; @@ -254,7 +256,7 @@ void NO_INLINE Aggregator::executeSpecializedCase( /// Добавляем значения в агрегатные функции. AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i)); + aggregate_functions, offsets_of_aggregate_states, aggregate_columns, value, i, aggregates_pool)); } } @@ -278,7 +280,7 @@ void NO_INLINE Aggregator::executeSpecializedWithoutKey( for (size_t i = 0; i < rows; ++i) { AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, res, i)); + aggregate_functions, offsets_of_aggregate_states, aggregate_columns, res, i, nullptr)); } } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index c9d6f635b65..0b35dd7cffc 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -73,7 +73,7 @@ public: return std::make_shared(input_data_type->clone()); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED); @@ -103,9 +103,8 @@ public: } } - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num) const + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const { - auto arena = this->data(place).arena; auto & set = this->data(place).value; bool inserted; @@ -164,7 +163,6 @@ static IAggregateFunction * createWithExtraTypes(const IDataType & argument_type else { /// Check that we can use plain version of AggreagteFunctionGroupUniqArrayGeneric - if (typeid_cast(&argument_type)) return new AggreagteFunctionGroupUniqArrayGeneric; diff --git a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp index 9cf3af82694..5d54da90e6b 100644 --- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp @@ -223,7 +223,7 @@ template void GraphiteRollupSortedBlockInputStream::accumulateRow(TSortCursor & cursor) { if (current_pattern) - current_pattern->function->add(place_for_aggregate_state.data(), &cursor->all_columns[value_column_num], cursor->pos); + current_pattern->function->add(place_for_aggregate_state.data(), &cursor->all_columns[value_column_num], cursor->pos, nullptr); } } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 532539f6236..7e8d87b6304 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -441,14 +441,14 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data, Arena * Для того, чтобы потом всё правильно уничтожилось, "откатываем" часть созданных состояний. * Код не очень удобный. */ - char * data_cur = aggregate_data + offsets_of_aggregate_states[j]; aggregate_functions[j]->create(aggregate_data + offsets_of_aggregate_states[j]); - /// Прописываем указатель на Arena после создания, до этого она не валидна. - if (aggregate_functions[j]->needArena()) - { - reinterpret_cast(data_cur)->arena = arena; - } +// /// Прописываем указатель на Arena после создания, до этого она не валидна. +// char * data_cur = aggregate_data + offsets_of_aggregate_states[j]; +// if (aggregate_functions[j]->needArena()) +// { +// reinterpret_cast(data_cur)->arena = arena; +// } } catch (...) { @@ -526,7 +526,7 @@ void NO_INLINE Aggregator::executeImplCase( /// Добавляем значения в агрегатные функции. AggregateDataPtr value = Method::getAggregateData(it->second); for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) - (*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i); + (*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i, aggregates_pool); method.onExistingKey(key, keys, *aggregates_pool); continue; @@ -574,7 +574,7 @@ void NO_INLINE Aggregator::executeImplCase( /// Добавляем значения в агрегатные функции. for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) - (*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i); + (*inst->func)(inst->that, value + inst->state_offset, inst->arguments, i, aggregates_pool); } } @@ -585,7 +585,8 @@ void NO_INLINE Aggregator::executeImplCase( void NO_INLINE Aggregator::executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t rows, - AggregateFunctionInstruction * aggregate_instructions) const + AggregateFunctionInstruction * aggregate_instructions, + Arena * arena) const { /// Оптимизация в случае единственной агрегатной функции count. AggregateFunctionCount * agg_count = params.aggregates_size == 1 @@ -600,7 +601,7 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( { /// Добавляем значения for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) - (*inst->func)(inst->that, res + inst->state_offset, inst->arguments, i); + (*inst->func)(inst->that, res + inst->state_offset, inst->arguments, i, arena); } } } @@ -701,7 +702,7 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, (compiled_data->compiled_method_ptr)(*this, result.without_key, rows, aggregate_columns); } else - executeWithoutKeyImpl(result.without_key, rows, &aggregate_functions_instructions[0]); + executeWithoutKeyImpl(result.without_key, rows, &aggregate_functions_instructions[0], result.aggregates_pool); } else { From 37ed6a303876bccd3621f04e450c8419ce692398 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 21 Sep 2016 19:39:44 +0300 Subject: [PATCH 04/15] Fixed dynamic complation errors. [#METR-22071] --- .../AggregateFunctionGroupUniqArray.h | 130 +++++++++++++++ .../AggregateFunctionState.h | 2 +- .../AggregateFunctions/IAggregateFunction.h | 14 -- .../IUnaryAggregateFunction.h | 13 -- dbms/include/DB/Interpreters/Aggregator.h | 45 +++--- .../DB/Interpreters/SpecializedAggregator.h | 9 +- .../AggregateFunctionGroupUniqArray.cpp | 149 +----------------- dbms/src/Interpreters/Aggregator.cpp | 18 +-- 8 files changed, 166 insertions(+), 214 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h index d9919067ce4..cc526a48900 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -101,6 +101,136 @@ public: }; +/// Generic implementation +struct AggreagteFunctionGroupUniqArrayGenericData +{ + using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory<16>>; + Set value; +}; + +template +class AggreagteFunctionGroupUniqArrayGeneric : public IUnaryAggregateFunction> +{ + mutable DataTypePtr input_data_type; + + using State = AggreagteFunctionGroupUniqArrayGenericData; + + static StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena); + + static void deserializeAndInsert(StringRef str, IColumn & data_to); + +public: + + String getName() const override { return "groupUniqArray"; } + + void setArgument(const DataTypePtr & argument) + { + input_data_type = argument; + } + + DataTypePtr getReturnType() const override + { + return std::make_shared(input_data_type->clone()); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + auto & set = this->data(place).value; + writeVarUInt(set.size(), buf); + + for (auto & elem: set) + { + writeStringBinary(elem, buf); + } + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + { + State::Set & set = this->data(place).value; + size_t size; + readVarUInt(size, buf); + //TODO: set.reserve(size); + + std::string str_buf; + for (size_t i = 0; i < size; i++) + { + readStringBinary(str_buf, buf); + set.insert(StringRef(str_buf)); + } + } + + void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const + { + auto & set = this->data(place).value; + + bool inserted; + State::Set::iterator it; + + StringRef str_serialized = getSerialization(column, row_num, *arena); + set.emplace(str_serialized, it, inserted); + + if (!is_plain_column) + { + if (!likely(inserted)) + arena->rollback(str_serialized.size); + } + else + { + if (likely(inserted)) + it->data = arena->insert(str_serialized.data, str_serialized.size); + } + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + { + auto & cur_set = this->data(place).value; + auto & rhs_set = this->data(rhs).value; + cur_set.merge(rhs_set); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { + ColumnArray & arr_to = static_cast(to); + ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); + IColumn & data_to = arr_to.getData(); + + auto & set = this->data(place).value; + offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + set.size()); + + for (auto & elem : set) + { + deserializeAndInsert(elem, data_to); + } + } +}; + + +template <> +inline StringRef AggreagteFunctionGroupUniqArrayGeneric::getSerialization(const IColumn & column, size_t row_num, Arena & arena) +{ + const char * begin = nullptr; + return column.serializeValueIntoArena(row_num, arena, begin); +} + +template <> +inline StringRef AggreagteFunctionGroupUniqArrayGeneric::getSerialization(const IColumn & column, size_t row_num, Arena &) +{ + return column.getDataAt(row_num); +} + +template <> +inline void AggreagteFunctionGroupUniqArrayGeneric::deserializeAndInsert(StringRef str, IColumn & data_to) +{ + data_to.deserializeAndInsertFromArena(str.data); +} + +template <> +inline void AggreagteFunctionGroupUniqArrayGeneric::deserializeAndInsert(StringRef str, IColumn & data_to) +{ + data_to.insertData(str.data, str.size); +} + + #undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h index 781a8027db0..0005f251dec 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h @@ -104,7 +104,7 @@ public: static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { - return static_cast(*that).add(place, columns, row_num, arena); + static_cast(*that).add(place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override final { return &addFree; } diff --git a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h index f9f2e06d8cc..64269194750 100644 --- a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h @@ -23,14 +23,6 @@ using AggregateDataPtr = char *; using ConstAggregateDataPtr = const char *; -struct IAggregateDataWithArena -{ - Arena * arena = nullptr; - - inline Arena * getArena() { return arena; } -}; - - /** Интерфейс для агрегатных функций. * Экземпляры классов с этим интерфейсом не содержат самих данных для агрегации, * а содержат лишь метаданные (описание) агрегатной функции, @@ -105,12 +97,6 @@ public: */ virtual bool isState() const { return false; } - /** Возвращает true если при агрегации необходимо использовать "кучу", представленной Arena. - * В этом случае структура данных для агррегации должна быть унаследована от IAggregateDataWithArena. - * Указатель на необходимую Arena можно будет получить с помощью IAggregateDataWithArena::getArena(). - */ - //virtual bool needArena() const { return false; } - /** Внутренний цикл, использующий указатель на функцию, получается лучше, чем использующий виртуальную функцию. * Причина в том, что в случае виртуальных функций, GCC 5.1.2 генерирует код, diff --git a/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h index 62ee3a5ebe5..eef9f855a80 100644 --- a/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IUnaryAggregateFunction.h @@ -32,22 +32,9 @@ public: getDerived().addImpl(place, *columns[0], row_num, arena); } -// template -// static void addFreeWrapper(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) -// { -// static_cast(*that).addImpl(place, *columns[0], row_num); -// } -// -// template <> -// static void addFreeWrapper(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) -// { -// static_cast(*that).addImpl(place, *columns[0], row_num, arena); -// } - static void addFree(const IAggregateFunction * that, AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) { static_cast(*that).addImpl(place, *columns[0], row_num, arena); - //addFreeWrapper(that, place, columns, row_num, arena); } IAggregateFunction::AddFunc getAddressOfAddFunction() const override { return &addFree; } diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 3a0cb150517..15e0e05787f 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -39,20 +39,20 @@ namespace ErrorCodes /** Разные структуры данных, которые могут использоваться для агрегации * Для эффективности, сами данные для агрегации кладутся в пул. * Владение данными (состояний агрегатных функций) и пулом - * захватывается позднее - в функции convertToBlocks, объектом ColumnAggregateFunction. + * захватывается позднее - в функции convertToBlocks, объектом ColumnAggregateFunction. * * Большинство структур данных существует в двух вариантах: обычном и двухуровневом (TwoLevel). * Двухуровневая хэш-таблица работает чуть медленнее при маленьком количестве различных ключей, - * но при большом количестве различных ключей лучше масштабируется, так как позволяет - * распараллелить некоторые операции (слияние, пост-обработку) естественным образом. + * но при большом количестве различных ключей лучше масштабируется, так как позволяет + * распараллелить некоторые операции (слияние, пост-обработку) естественным образом. * * Чтобы обеспечить эффективную работу в большом диапазоне условий, - * сначала используются одноуровневые хэш-таблицы, - * а при достижении количеством различных ключей достаточно большого размера, - * они конвертируются в двухуровневые. + * сначала используются одноуровневые хэш-таблицы, + * а при достижении количеством различных ключей достаточно большого размера, + * они конвертируются в двухуровневые. * * PS. Существует много различных подходов к эффективной реализации параллельной и распределённой агрегации, - * лучшим образом подходящих для разных случаев, и этот подход - всего лишь один из них, выбранный по совокупности причин. + * лучшим образом подходящих для разных случаев, и этот подход - всего лишь один из них, выбранный по совокупности причин. */ using AggregatedDataWithoutKey = AggregateDataPtr; @@ -384,7 +384,7 @@ struct AggregationMethodConcat { /** Исправление, если все ключи - пустые массивы. Для них в хэш-таблицу записывается StringRef нулевой длины, но с ненулевым указателем. * Но при вставке в хэш-таблицу, такой StringRef оказывается равен другому ключу нулевой длины, - * у которого указатель на данные может быть любым мусором и использовать его нельзя. + * у которого указатель на данные может быть любым мусором и использовать его нельзя. */ for (size_t i = 0; i < keys_size; ++i) key_columns[i]->insertDefault(); @@ -525,14 +525,14 @@ struct AggregatedDataVariants : private boost::noncopyable * - при агрегации, состояния создаются в пуле с помощью функции IAggregateFunction::create (внутри - placement new произвольной структуры); * - они должны быть затем уничтожены с помощью IAggregateFunction::destroy (внутри - вызов деструктора произвольной структуры); * - если агрегация завершена, то, в функции Aggregator::convertToBlocks, указатели на состояния агрегатных функций - * записываются в ColumnAggregateFunction; ColumnAggregateFunction "захватывает владение" ими, то есть - вызывает destroy в своём деструкторе. + * записываются в ColumnAggregateFunction; ColumnAggregateFunction "захватывает владение" ими, то есть - вызывает destroy в своём деструкторе. * - если при агрегации, до вызова Aggregator::convertToBlocks вылетело исключение, - * то состояния агрегатных функций всё-равно должны быть уничтожены, - * иначе для сложных состояний (наприемер, AggregateFunctionUniq), будут утечки памяти; + * то состояния агрегатных функций всё-равно должны быть уничтожены, + * иначе для сложных состояний (наприемер, AggregateFunctionUniq), будут утечки памяти; * - чтобы, в этом случае, уничтожить состояния, в деструкторе вызывается метод Aggregator::destroyAggregateStates, - * но только если переменная aggregator (см. ниже) не nullptr; + * но только если переменная aggregator (см. ниже) не nullptr; * - то есть, пока вы не передали владение состояниями агрегатных функций в ColumnAggregateFunction, установите переменную aggregator, - * чтобы при возникновении исключения, состояния были корректно уничтожены. + * чтобы при возникновении исключения, состояния были корректно уничтожены. * * PS. Это можно исправить, сделав пул, который знает о том, какие состояния агрегатных функций и в каком порядке в него уложены, и умеет сам их уничтожать. * Но это вряд ли можно просто сделать, так как в этот же пул планируется класть строки переменной длины. @@ -763,7 +763,7 @@ using ManyAggregatedDataVariants = std::vector; * В случае наличия group_by_overflow_mode = 'any', данные агрегируются как обычно, кроме ключей, не поместившихся в max_rows_to_group_by. * Для этих ключей, данные агрегируются в одну дополнительную строку - далее см. под названиями overflow_row, overflows... * Позже, состояния агрегатных функций для всех строк (прошедших через HAVING) мерджатся в одну, - * а также к ним прибавляется или не прибавляется (в зависимости от настройки totals_mode) также overflow_row - это и будет TOTALS. + * а также к ним прибавляется или не прибавляется (в зависимости от настройки totals_mode) также overflow_row - это и будет TOTALS. */ @@ -792,7 +792,7 @@ public: /// Настройки двухуровневой агрегации (используется для большого количества ключей). /** При каком количестве ключей или размере состояния агрегации в байтах, - * начинает использоваться двухуровневая агрегация. Достаточно срабатывания хотя бы одного из порогов. + * начинает использоваться двухуровневая агрегация. Достаточно срабатывания хотя бы одного из порогов. * 0 - соответствующий порог не задан. */ const size_t group_by_two_level_threshold; @@ -850,7 +850,7 @@ public: * Если overflow_row = true, то агрегаты для строк, не попавших в max_rows_to_group_by, кладутся в первый блок. * * Если final = false, то в качестве столбцов-агрегатов создаются ColumnAggregateFunction с состоянием вычислений, - * которые могут быть затем объединены с другими состояниями (для распределённой обработки запроса). + * которые могут быть затем объединены с другими состояниями (для распределённой обработки запроса). * Если final = true, то в качестве столбцов-агрегатов создаются столбцы с готовыми значениями. */ BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; @@ -917,8 +917,8 @@ protected: * Внутренний цикл (для случая without_key) получается почти в два раза компактнее; прирост производительности около 30%. * * 2. Вызов по указателю на функцию лучше, чем виртуальный вызов, потому что в случае виртуального вызова, - * GCC 5.1.2 генерирует код, который на каждой итерации цикла заново грузит из памяти в регистр адрес функции - * (значение по смещению в таблице виртуальных функций). + * GCC 5.1.2 генерирует код, который на каждой итерации цикла заново грузит из памяти в регистр адрес функции + * (значение по смещению в таблице виртуальных функций). */ struct AggregateFunctionInstruction { @@ -947,9 +947,9 @@ protected: /** Динамически скомпилированная библиотека для агрегации, если есть. * Смысл динамической компиляции в том, чтобы специализировать код - * под конкретный список агрегатных функций. + * под конкретный список агрегатных функций. * Это позволяет развернуть цикл по созданию и обновлению состояний агрегатных функций, - * а также использовать вместо виртуальных вызовов inline-код. + * а также использовать вместо виртуальных вызовов inline-код. */ struct CompiledData { @@ -977,7 +977,7 @@ protected: void initialize(const Block & block); /** Установить блок - пример результата, - * только если он ещё не был установлен. + * только если он ещё не был установлен. */ void setSampleBlock(const Block & block); @@ -1065,7 +1065,8 @@ public: void executeSpecializedWithoutKey( AggregatedDataWithoutKey & res, size_t rows, - AggregateColumns & aggregate_columns) const; + AggregateColumns & aggregate_columns, + Arena * arena) const; protected: /// Слить данные из хэш-таблицы src в dst. diff --git a/dbms/include/DB/Interpreters/SpecializedAggregator.h b/dbms/include/DB/Interpreters/SpecializedAggregator.h index d75d95c8e5f..6a684178460 100644 --- a/dbms/include/DB/Interpreters/SpecializedAggregator.h +++ b/dbms/include/DB/Interpreters/SpecializedAggregator.h @@ -74,8 +74,8 @@ struct AggregateFunctionsUpdater const Sizes & offsets_of_aggregate_states_, Aggregator::AggregateColumns & aggregate_columns_, AggregateDataPtr & value_, - size_t row_num_ - Arena * arena_ = nullptr) + size_t row_num_, + Arena * arena_) : aggregate_functions(aggregate_functions_), offsets_of_aggregate_states(offsets_of_aggregate_states_), aggregate_columns(aggregate_columns_), @@ -266,7 +266,8 @@ template void NO_INLINE Aggregator::executeSpecializedWithoutKey( AggregatedDataWithoutKey & res, size_t rows, - AggregateColumns & aggregate_columns) const + AggregateColumns & aggregate_columns, + Arena * arena) const { /// Оптимизация в случае единственной агрегатной функции count. AggregateFunctionCount * agg_count = params.aggregates_size == 1 @@ -280,7 +281,7 @@ void NO_INLINE Aggregator::executeSpecializedWithoutKey( for (size_t i = 0; i < rows; ++i) { AggregateFunctionsList::forEach(AggregateFunctionsUpdater( - aggregate_functions, offsets_of_aggregate_states, aggregate_columns, res, i, nullptr)); + aggregate_functions, offsets_of_aggregate_states, aggregate_columns, res, i, arena)); } } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 0b35dd7cffc..efa89ede5af 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -1,158 +1,11 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include #include #include #include +#include namespace DB { -namespace -{ - -struct DataGroupUniqArray : public IAggregateDataWithArena -{ - using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory<16>>; - Set value; -}; - - -template -inline StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena) -{ - const char * begin = nullptr; - return column.serializeValueIntoArena(row_num, arena, begin); -} - -template <> -inline StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena) -{ - return column.getDataAt(row_num); -} - -template -inline void deserializeAndInsert(StringRef str, IColumn & data_to) -{ - data_to.deserializeAndInsertFromArena(str.data); -} - -template <> -inline void deserializeAndInsert(StringRef str, IColumn & data_to) -{ - data_to.insertData(str.data, str.size); -} - - -template -class AggreagteFunctionGroupUniqArrayGeneric : public IUnaryAggregateFunction> -{ - mutable DataTypePtr input_data_type; - - using State = DataGroupUniqArray; - -public: - - String getName() const override { return "groupUniqArray"; } - - void setArgument(const DataTypePtr & argument) - { - input_data_type = argument; - } - - DataTypePtr getReturnType() const override - { - return std::make_shared(input_data_type->clone()); - } - - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED); - - auto & set = this->data(place).value; - writeVarUInt(set.size(), buf); - - for (auto & elem: set) - { - writeStringBinary(elem, buf); - } - } - - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED); - - State::Set & set = this->data(place).value; - size_t size; - readVarUInt(size, buf); - //TODO: set.reserve(size); - - std::string str_buf; - for (size_t i = 0; i < size; i++) - { - readStringBinary(str_buf, buf); - set.insert(StringRef(str_buf)); - } - } - - void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena * arena) const - { - auto & set = this->data(place).value; - - bool inserted; - State::Set::iterator it; - - StringRef str_serialized = getSerialization(column, row_num, *arena); - set.emplace(str_serialized, it, inserted); - - if (!is_plain_column) - { - if (!likely(inserted)) - arena->rollback(str_serialized.size); - } - else - { - if (unlikely(inserted)) - { - it->data = arena->insert(str_serialized.data, str_serialized.size); - } - } - } - - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override - { - State::Set & cur_set = this->data(place).value; - const State::Set & rhs_set = this->data(rhs).value; - cur_set.merge(rhs_set); - } - - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override - { - ColumnArray & arr_to = static_cast(to); - ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets(); - IColumn & data_to = arr_to.getData(); - - auto & set = this->data(place).value; - offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + set.size()); - - for (auto & elem : set) - { - deserializeAndInsert(elem, data_to); - } - } -}; - -} - - namespace { diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 7e8d87b6304..a591a3e214c 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -283,17 +283,18 @@ void Aggregator::compileIfPossible(AggregatedDataVariants::Type type) code << "template void Aggregator::executeSpecializedWithoutKey<\n" "\t" << "TypeList<" << aggregate_functions_typenames << ">>(\n" - "\tAggregatedDataWithoutKey &, size_t, AggregateColumns &) const;\n" + "\tAggregatedDataWithoutKey &, size_t, AggregateColumns &, Arena *) const;\n" "\n" "static void wrapper(\n" "\tconst Aggregator & aggregator,\n" "\tAggregatedDataWithoutKey & method,\n" "\tsize_t rows,\n" - "\tAggregator::AggregateColumns & aggregate_columns)\n" + "\tAggregator::AggregateColumns & aggregate_columns,\n" + "\tArena * arena)\n" "{\n" "\taggregator.executeSpecializedWithoutKey<\n" "\t\tTypeList<" << aggregate_functions_typenames << ">>(\n" - "\t\tmethod, rows, aggregate_columns);\n" + "\t\tmethod, rows, aggregate_columns, arena);\n" "}\n" "\n" "void * getPtr() __attribute__((__visibility__(\"default\")));\n" @@ -442,13 +443,6 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data, Arena * Код не очень удобный. */ aggregate_functions[j]->create(aggregate_data + offsets_of_aggregate_states[j]); - -// /// Прописываем указатель на Arena после создания, до этого она не валидна. -// char * data_cur = aggregate_data + offsets_of_aggregate_states[j]; -// if (aggregate_functions[j]->needArena()) -// { -// reinterpret_cast(data_cur)->arena = arena; -// } } catch (...) { @@ -698,8 +692,8 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, if (compiled_data->compiled_method_ptr) { reinterpret_cast< - void (*)(const Aggregator &, AggregatedDataWithoutKey &, size_t, AggregateColumns &)> - (compiled_data->compiled_method_ptr)(*this, result.without_key, rows, aggregate_columns); + void (*)(const Aggregator &, AggregatedDataWithoutKey &, size_t, AggregateColumns &, Arena *)> + (compiled_data->compiled_method_ptr)(*this, result.without_key, rows, aggregate_columns, result.aggregates_pool); } else executeWithoutKeyImpl(result.without_key, rows, &aggregate_functions_instructions[0], result.aggregates_pool); From 946a037f71e5bf773c6231f92b9b9e77bde7f701 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 23 Sep 2016 02:26:08 +0300 Subject: [PATCH 05/15] Added requested changes. [#METR-22071] --- .../AggregateFunctionArray.h | 4 +-- .../AggregateFunctions/AggregateFunctionAvg.h | 2 +- .../AggregateFunctionCount.h | 2 +- .../AggregateFunctionGroupArray.h | 4 +-- .../AggregateFunctionGroupUniqArray.h | 31 ++++++++-------- .../AggregateFunctions/AggregateFunctionIf.h | 4 +-- .../AggregateFunctionMerge.h | 4 +-- .../AggregateFunctionQuantile.h | 4 +-- .../AggregateFunctionQuantileDeterministic.h | 4 +-- .../AggregateFunctionQuantileExact.h | 4 +-- .../AggregateFunctionQuantileExactWeighted.h | 4 +-- .../AggregateFunctionQuantileTDigest.h | 8 ++--- .../AggregateFunctionQuantileTiming.h | 8 ++--- .../AggregateFunctionSequenceMatch.h | 2 +- .../AggregateFunctionState.h | 4 +-- .../AggregateFunctions/AggregateFunctionSum.h | 2 +- .../AggregateFunctionUniq.h | 4 +-- .../AggregateFunctionUniqUpTo.h | 4 +-- .../AggregateFunctionsArgMinMax.h | 2 +- .../AggregateFunctionsMinMaxAny.h | 2 +- .../AggregateFunctionsStatistics.h | 4 +-- .../AggregateFunctions/IAggregateFunction.h | 21 ++++++----- .../DB/Columns/ColumnAggregateFunction.h | 2 +- dbms/include/DB/Common/HashTable/HashTable.h | 4 --- dbms/include/DB/IO/ReadHelpers.h | 14 ++++++++ dbms/include/DB/Interpreters/Aggregator.h | 6 ++-- .../AggregateFunctionDebug.cpp | 2 +- .../AggregateFunctionGroupUniqArray.cpp | 7 ++-- .../DataTypes/DataTypeAggregateFunction.cpp | 12 ++++--- dbms/src/Interpreters/Aggregator.cpp | 35 +++---------------- 30 files changed, 99 insertions(+), 111 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h index 58fc8ebd7a0..8e1d0b808b2 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h @@ -110,9 +110,9 @@ public: nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(place, buf); + nested_func->deserialize(place, buf, arena); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h index 8d03211c003..9bfd7730f59 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h @@ -60,7 +60,7 @@ public: writeVarUInt(this->data(place).count, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { readBinary(this->data(place).sum, buf); readVarUInt(this->data(place).count, buf); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h index 805ea48f071..ccf9584ebb4 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h @@ -46,7 +46,7 @@ public: writeVarUInt(data(place).count, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { readVarUInt(data(place).count, buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h index 8bff6d57553..23a801d2d7e 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h @@ -69,7 +69,7 @@ public: buf.write(reinterpret_cast(&value[0]), size * sizeof(value[0])); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { size_t size = 0; readVarUInt(size, buf); @@ -148,7 +148,7 @@ public: type->serializeBinary(value[i], buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { size_t size = 0; readVarUInt(size, buf); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h index cc526a48900..a4b2329f311 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -68,14 +68,14 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - const typename State::Set & set = this->data(place).value; + auto & set = this->data(place).value; size_t size = set.size(); writeVarUInt(size, buf); - for (auto it = set.begin(); it != set.end(); ++it) - writeIntBinary(*it, buf); + for (auto & elem : set) + writeIntBinary(elem, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).value.read(buf); } @@ -108,10 +108,13 @@ struct AggreagteFunctionGroupUniqArrayGenericData Set value; }; -template +/** Template parameter with true value should be used for columns that store their elements in memory continuously. + * For such columns groupUniqArray() can be implemented more efficently (especially for small numeric arrays). + */ +template class AggreagteFunctionGroupUniqArrayGeneric : public IUnaryAggregateFunction> { - mutable DataTypePtr input_data_type; + DataTypePtr input_data_type; using State = AggreagteFunctionGroupUniqArrayGenericData; @@ -138,24 +141,24 @@ public: auto & set = this->data(place).value; writeVarUInt(set.size(), buf); - for (auto & elem: set) + for (const auto & elem : set) { writeStringBinary(elem, buf); } } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - State::Set & set = this->data(place).value; + auto & set = this->data(place).value; size_t size; readVarUInt(size, buf); //TODO: set.reserve(size); - std::string str_buf; + arena = new Arena(size * 10); + for (size_t i = 0; i < size; i++) { - readStringBinary(str_buf, buf); - set.insert(StringRef(str_buf)); + set.insert(readStringBinaryInto(*arena, buf)); } } @@ -171,12 +174,12 @@ public: if (!is_plain_column) { - if (!likely(inserted)) + if (!inserted) arena->rollback(str_serialized.size); } else { - if (likely(inserted)) + if (inserted) it->data = arena->insert(str_serialized.data, str_serialized.size); } } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h index c699cbaee3f..f119b4b4e5d 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h @@ -93,9 +93,9 @@ public: nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(place, buf); + nested_func->deserialize(place, buf, arena); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h index caa7c46d222..70b460f7eb9 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h @@ -94,9 +94,9 @@ public: nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(place, buf); + nested_func->deserialize(place, buf, arena); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h index b157f474013..c460ceef46d 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h @@ -83,7 +83,7 @@ public: this->data(place).sample.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).sample.read(buf); } @@ -160,7 +160,7 @@ public: this->data(place).sample.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).sample.read(buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h index 6c1096f9d2a..f05fdbef37a 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h @@ -90,7 +90,7 @@ public: this->data(place).sample.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).sample.read(buf); } @@ -174,7 +174,7 @@ public: this->data(place).sample.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).sample.read(buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h index 17f23620c14..9f614e1fe2e 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h @@ -87,7 +87,7 @@ public: buf.write(reinterpret_cast(&array[0]), size * sizeof(array[0])); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { auto & array = this->data(place).array; @@ -169,7 +169,7 @@ public: buf.write(reinterpret_cast(&array[0]), size * sizeof(array[0])); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { auto & array = this->data(place).array; diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h index 72394daae17..aee687801c7 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h @@ -95,7 +95,7 @@ public: this->data(place).map.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { typename AggregateFunctionQuantileExactWeightedData::Map::Reader reader(buf); @@ -210,7 +210,7 @@ public: this->data(place).map.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { typename AggregateFunctionQuantileExactWeightedData::Map::Reader reader(buf); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h index a5ead2ebc2e..0d64d9724f9 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h @@ -397,7 +397,7 @@ public: this->data(const_cast(place)).digest.write(params, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).digest.read(params, buf); } @@ -466,7 +466,7 @@ public: this->data(const_cast(place)).digest.write(params, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).digest.read(params, buf); } @@ -528,7 +528,7 @@ public: this->data(const_cast(place)).digest.write(params, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).digest.read(params, buf); } @@ -610,7 +610,7 @@ public: this->data(const_cast(place)).digest.write(params, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).digest.read(params, buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h index 83f99bb3bd0..be4cedd4e1e 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h @@ -830,7 +830,7 @@ public: this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } @@ -890,7 +890,7 @@ public: this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } @@ -945,7 +945,7 @@ public: this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } @@ -1008,7 +1008,7 @@ public: this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h index f39826418d4..ff0eb7ce7a0 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -216,7 +216,7 @@ public: data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { data(place).deserialize(buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h index 0005f251dec..ce5818dd7e8 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h @@ -87,9 +87,9 @@ public: nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(place, buf); + nested_func->deserialize(place, buf, arena); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h index 182b9fbc123..f9dcb058f72 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h @@ -55,7 +55,7 @@ public: writeBinary(this->data(place).sum, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { readBinary(this->data(place).sum, buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 6abb9334447..25e7c4ce275 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -355,7 +355,7 @@ public: this->data(place).set.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).set.read(buf); } @@ -410,7 +410,7 @@ public: this->data(place).set.write(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).set.read(buf); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h index a0875c7356e..57234169e9c 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -166,7 +166,7 @@ public: this->data(place).write(buf, threshold); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf, threshold); } @@ -239,7 +239,7 @@ public: this->data(place).write(buf, threshold); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf, threshold); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h index fe84ff9eb11..7f8589b9b7f 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h @@ -59,7 +59,7 @@ public: this->data(place).value.write(buf, *type_val.get()); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).result.read(buf, *type_res.get()); this->data(place).value.read(buf, *type_val.get()); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h index 05780dc12ac..5a29b993aa5 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h @@ -664,7 +664,7 @@ public: this->data(place).write(buf, *type.get()); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf, *type.get()); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h index dde98f84157..d7aef519b6f 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h @@ -144,7 +144,7 @@ public: this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } @@ -412,7 +412,7 @@ public: this->data(place).serialize(buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } diff --git a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h index 64269194750..3ca2a43cad7 100644 --- a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h @@ -77,19 +77,23 @@ public: /// Как должна быть выровнена структура с данными. NOTE: Сейчас не используется (структуры с состоянием агрегации кладутся без выравнивания). virtual size_t alignOfData() const = 0; - /// Добавить значение. columns - столбцы, содержащие аргументы, row_num - номер строки в столбцах. + /** Adds a value into aggregation data on which place points to. + * columns points to columns containing arguments of aggregation function. + * row_num is number of row which should be added. + * Additional parameter arena should be used instead of standard memory allocator if the addition requires memory allocation. + */ virtual void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0; - /// Объединить состояние с другим состоянием. + /// Merges state (on which place points to) with other state of current aggregation function. virtual void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const = 0; - /// Сериализовать состояние (например, для передачи по сети). + /// Serializes state (to transmit it over the network, for example). virtual void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const = 0; - /// Десериализовать состояние. Вызывается для пустого (только что созданного) состояния. - virtual void deserialize(AggregateDataPtr place, ReadBuffer & buf) const = 0; + /// Deserializes state. This function is called only for empty (just created) states. + virtual void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const = 0; - /// Вставить результат в столбец. + /// Inserts results into a column. virtual void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const = 0; /** Возвращает true для агрегатных функций типа -State. @@ -145,11 +149,6 @@ public: { return __alignof__(Data); } - -// bool needArena() const override -// { -// return std::is_base_of(); -// } }; diff --git a/dbms/include/DB/Columns/ColumnAggregateFunction.h b/dbms/include/DB/Columns/ColumnAggregateFunction.h index 4897348831f..fe2874ef4fd 100644 --- a/dbms/include/DB/Columns/ColumnAggregateFunction.h +++ b/dbms/include/DB/Columns/ColumnAggregateFunction.h @@ -178,7 +178,7 @@ public: getData().push_back(arena.alloc(function->sizeOfData())); function->create(getData().back()); ReadBufferFromString read_buffer(x.get()); - function->deserialize(getData().back(), read_buffer); + function->deserialize(getData().back(), read_buffer, &arena); } void insertDefault() override diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index b78053c3fb2..e5c41451555 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -70,10 +70,6 @@ bool check(const T x) { return x == 0; } template void set(T & x) { x = 0; } -bool check(const std::string & x); - -void set(std::string & x); - }; diff --git a/dbms/include/DB/IO/ReadHelpers.h b/dbms/include/DB/IO/ReadHelpers.h index 03cfbb6b9cd..d11983452cc 100644 --- a/dbms/include/DB/IO/ReadHelpers.h +++ b/dbms/include/DB/IO/ReadHelpers.h @@ -14,8 +14,10 @@ #include #include +#include #include #include +#include #include #include @@ -126,6 +128,18 @@ inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t MAX_STRIN } +inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf) +{ + size_t size = 0; + readVarUInt(size, buf); + + char * data = arena.alloc(size); + buf.readStrict(data, size); + + return StringRef(data, size); +} + + template void readVectorBinary(std::vector & v, ReadBuffer & buf, size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE) { diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 15e0e05787f..45918235d2c 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -44,7 +44,7 @@ namespace ErrorCodes * Большинство структур данных существует в двух вариантах: обычном и двухуровневом (TwoLevel). * Двухуровневая хэш-таблица работает чуть медленнее при маленьком количестве различных ключей, * но при большом количестве различных ключей лучше масштабируется, так как позволяет - * распараллелить некоторые операции (слияние, пост-обработку) естественным образом. + * распараллелить некоторые операции (слияние, пост-обработку) естественным образом. * * Чтобы обеспечить эффективную работу в большом диапазоне условий, * сначала используются одноуровневые хэш-таблицы, @@ -977,7 +977,7 @@ protected: void initialize(const Block & block); /** Установить блок - пример результата, - * только если он ещё не был установлен. + * только если он ещё не был установлен. */ void setSampleBlock(const Block & block); @@ -986,7 +986,7 @@ protected: /** Создать состояния агрегатных функций для одного ключа. */ - void createAggregateStates(AggregateDataPtr & aggregate_data, Arena * arena) const; + void createAggregateStates(AggregateDataPtr & aggregate_data) const; /** Вызвать методы destroy для состояний агрегатных функций. * Используется в обработчике исключений при агрегации, так как RAII в данном случае не применим. diff --git a/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp b/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp index ff1e69a4956..ed00c6001dc 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp @@ -63,7 +63,7 @@ public: writeBinary(UInt8(0), buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { UInt8 tmp; readBinary(tmp, buf); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index efa89ede5af..2ef49afe4c4 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB { @@ -11,12 +10,12 @@ namespace static IAggregateFunction * createWithExtraTypes(const IDataType & argument_type) { - if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; - else if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; + if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; + else if (typeid_cast(&argument_type)) return new AggregateFunctionGroupUniqArray; else { /// Check that we can use plain version of AggreagteFunctionGroupUniqArrayGeneric - if (typeid_cast(&argument_type)) + if (typeid_cast(&argument_type) || typeid_cast(&argument_type)) return new AggreagteFunctionGroupUniqArrayGeneric; auto * array_type = typeid_cast(&argument_type); diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 1d09e77389a..1bb410ade44 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -62,13 +62,14 @@ void DataTypeAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & { ColumnAggregateFunction & column_concrete = static_cast(column); + Arena & arena = column_concrete.createOrGetArena(); size_t size_of_state = function->sizeOfData(); - AggregateDataPtr place = column_concrete.createOrGetArena().alloc(size_of_state); + AggregateDataPtr place = arena.alloc(size_of_state); function->create(place); try { - function->deserialize(place, istr); + function->deserialize(place, istr, &arena); } catch (...) { @@ -116,7 +117,7 @@ void DataTypeAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & try { - function->deserialize(place, istr); + function->deserialize(place, istr, &arena); } catch (...) { @@ -140,15 +141,16 @@ static void deserializeFromString(const AggregateFunctionPtr & function, IColumn { ColumnAggregateFunction & column_concrete = static_cast(column); + Arena & arena = column_concrete.createOrGetArena(); size_t size_of_state = function->sizeOfData(); - AggregateDataPtr place = column_concrete.createOrGetArena().alloc(size_of_state); + AggregateDataPtr place = arena.alloc(size_of_state); function->create(place); try { ReadBufferFromString istr(s); - function->deserialize(place, istr); + function->deserialize(place, istr, &arena); } catch (...) { diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index a591a3e214c..6f40160962f 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -432,7 +432,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColu } -void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data, Arena * arena) const +void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const { for (size_t j = 0; j < params.aggregates_size; ++j) { @@ -558,7 +558,7 @@ void NO_INLINE Aggregator::executeImplCase( method.onNewKey(*it, params.keys_size, i, keys, *aggregates_pool); AggregateDataPtr place = aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place, aggregates_pool); + createAggregateStates(place); aggregate_data = place; } else @@ -679,7 +679,7 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, if ((params.overflow_row || result.type == AggregatedDataVariants::Type::without_key) && !result.without_key) { AggregateDataPtr place = result.aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place, result.aggregates_pool); + createAggregateStates(place); result.without_key = place; } @@ -1005,7 +1005,6 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( ColumnPlainPtrs & final_aggregate_columns, const Sizes & key_sizes) const { - //LOG_DEBUG(log, "convertToBlockImplFinal start"); for (const auto & value : data) { method.insertKeyIntoColumns(value, key_columns, params.keys_size, key_sizes); @@ -1017,7 +1016,6 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( } destroyImpl(method, data); /// NOTE Можно сделать лучше. - //LOG_DEBUG(log, "convertToBlockImplFinal exit"); } template @@ -1028,7 +1026,6 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( AggregateColumnsData & aggregate_columns, const Sizes & key_sizes) const { - //LOG_DEBUG(log, "convertToBlockImplFinal start"); for (auto & value : data) { @@ -1040,7 +1037,6 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( Method::getAggregateData(value.second) = nullptr; } - //LOG_DEBUG(log, "convertToBlockImplFinal exit"); } @@ -1165,7 +1161,6 @@ BlocksList Aggregator::prepareBlocksAndFillSingleLevel(AggregatedDataVariants & const Sizes & key_sizes, bool final) { - //LOG_DEBUG(log, "prepareBlocksAndFillSingleLevel start"); #define M(NAME) \ else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \ @@ -1176,7 +1171,6 @@ BlocksList Aggregator::prepareBlocksAndFillSingleLevel(AggregatedDataVariants & #undef M else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - //LOG_DEBUG(log, "prepareBlocksAndFillSingleLevel exit"); }; BlocksList blocks; @@ -1346,8 +1340,6 @@ void NO_INLINE Aggregator::mergeDataImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); - - //LOG_DEBUG(log, "mergeDataImpl"); } else { @@ -1384,8 +1376,6 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( aggregate_functions[i]->destroy( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); - //LOG_DEBUG(log, "mergeDataNoMoreKeysImpl"); - Method::getAggregateData(it->second) = nullptr; } @@ -1415,8 +1405,6 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( aggregate_functions[i]->destroy( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); - //LOG_DEBUG(log, "mergeDataOnlyExistingKeysImpl"); - Method::getAggregateData(it->second) = nullptr; } @@ -1441,8 +1429,6 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy(current_data + offsets_of_aggregate_states[i]); - //LOG_DEBUG(log, "mergeWithoutKeyDataImpl"); - current_data = nullptr; } } @@ -1458,8 +1444,6 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( /// Все результаты агрегации соединяем с первым. for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) { - //LOG_DEBUG(log, "mergeSingleLevelDataImpl for_begin " << i << "/" << size-1); - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) break; @@ -1481,10 +1465,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( /// current не будет уничтожать состояния агрегатных функций в деструкторе current.aggregator = nullptr; - - //LOG_DEBUG(log, "mergeSingleLevelDataImpl for_end " << i << "/" << size-1); } - //LOG_DEBUG(log, "mergeSingleLevelDataImpl exit"); } @@ -1817,7 +1798,7 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( method.onNewKey(*it, params.keys_size, i, keys, *aggregates_pool); AggregateDataPtr place = aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place, aggregates_pool); + createAggregateStates(place); aggregate_data = place; } else @@ -1830,8 +1811,6 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( aggregate_functions[j]->merge( value + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]); - - LOG_DEBUG(log, "mergeStreamsImplCase"); } /// Пораньше освобождаем память. @@ -1869,7 +1848,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( if (!res) { AggregateDataPtr place = result.aggregates_pool->alloc(total_size_of_aggregate_states); - createAggregateStates(place, result.aggregates_pool); + createAggregateStates(place); res = place; } @@ -2308,8 +2287,6 @@ void NO_INLINE Aggregator::destroyImpl( Method & method, Table & table) const { - //LOG_DEBUG(log, "destroyImpl start"); - for (auto elem : table) { AggregateDataPtr & data = Method::getAggregateData(elem.second); @@ -2327,8 +2304,6 @@ void NO_INLINE Aggregator::destroyImpl( data = nullptr; } - - //LOG_DEBUG(log, "destroyImpl start"); } From 80f037b12e5dfca144491edd5e2fe5fbf45eeaf8 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Sat, 24 Sep 2016 02:33:17 +0300 Subject: [PATCH 06/15] Added arena parameter into merge() method of aggregation function states. [#METR-22071] --- .../AggregateFunctionArray.h | 4 +- .../AggregateFunctions/AggregateFunctionAvg.h | 2 +- .../AggregateFunctionCount.h | 2 +- .../AggregateFunctionGroupArray.h | 4 +- .../AggregateFunctionGroupUniqArray.h | 18 ++++++--- .../AggregateFunctions/AggregateFunctionIf.h | 4 +- .../AggregateFunctionMerge.h | 8 ++-- .../AggregateFunctionQuantile.h | 4 +- .../AggregateFunctionQuantileDeterministic.h | 4 +- .../AggregateFunctionQuantileExact.h | 4 +- .../AggregateFunctionQuantileExactWeighted.h | 4 +- .../AggregateFunctionQuantileTDigest.h | 8 ++-- .../AggregateFunctionQuantileTiming.h | 8 ++-- .../AggregateFunctionSequenceMatch.h | 2 +- .../AggregateFunctionState.h | 4 +- .../AggregateFunctions/AggregateFunctionSum.h | 2 +- .../AggregateFunctionUniq.h | 4 +- .../AggregateFunctionUniqUpTo.h | 4 +- .../AggregateFunctionsArgMinMax.h | 2 +- .../AggregateFunctionsMinMaxAny.h | 2 +- .../AggregateFunctionsStatistics.h | 4 +- .../AggregateFunctions/IAggregateFunction.h | 2 +- .../DB/Columns/ColumnAggregateFunction.h | 2 +- .../DB/Functions/FunctionsMiscellaneous.h | 2 +- dbms/include/DB/Interpreters/Aggregator.h | 9 +++-- .../AggregateFunctionDebug.cpp | 2 +- .../TotalsHavingBlockInputStream.cpp | 4 +- dbms/src/Interpreters/Aggregator.cpp | 40 +++++++++++++------ 28 files changed, 91 insertions(+), 68 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h index 8e1d0b808b2..c632d4481ee 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionArray.h @@ -100,9 +100,9 @@ public: nested_func->add(place, nested, i, nullptr); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(place, rhs); + nested_func->merge(place, rhs, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h index 9bfd7730f59..77fd0ad15ec 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h @@ -48,7 +48,7 @@ public: ++this->data(place).count; } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).sum += this->data(rhs).sum; this->data(place).count += this->data(rhs).count; diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h index ccf9584ebb4..d3f3392ef8f 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionCount.h @@ -36,7 +36,7 @@ public: ++data(place).count; } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { data(place).count += data(rhs).count; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h index 23a801d2d7e..b8336f9159c 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupArray.h @@ -56,7 +56,7 @@ public: this->data(place).value.push_back(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).value.insert(this->data(rhs).value.begin(), this->data(rhs).value.end()); } @@ -134,7 +134,7 @@ public: column.get(row_num, data(place).value.back()); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { data(place).value.insert(data(place).value.end(), data(rhs).value.begin(), data(rhs).value.end()); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h index a4b2329f311..acb904d784b 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -61,7 +61,7 @@ public: this->data(place).value.insert(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).value.merge(this->data(rhs).value); } @@ -101,7 +101,7 @@ public: }; -/// Generic implementation +/// Generic implementation, it uses serialized representation as object descriptor. struct AggreagteFunctionGroupUniqArrayGenericData { using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory<16>>; @@ -154,8 +154,6 @@ public: readVarUInt(size, buf); //TODO: set.reserve(size); - arena = new Arena(size * 10); - for (size_t i = 0; i < size; i++) { set.insert(readStringBinaryInto(*arena, buf)); @@ -184,11 +182,19 @@ public: } } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { auto & cur_set = this->data(place).value; auto & rhs_set = this->data(rhs).value; - cur_set.merge(rhs_set); + + bool inserted; + State::Set::iterator it; + for (auto & rhs_elem : rhs_set) + { + cur_set.emplace(rhs_elem, it, inserted); + if (inserted) + it->data = arena->insert(it->data, it->size); + } } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h index f119b4b4e5d..f8c5e671e2c 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionIf.h @@ -83,9 +83,9 @@ public: nested_func->add(place, columns, row_num, nullptr); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(place, rhs); + nested_func->merge(place, rhs, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h index 70b460f7eb9..9698c50e817 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionMerge.h @@ -79,14 +79,14 @@ public: return nested_func->alignOfData(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - nested_func->merge(place, static_cast(*columns[0]).getData()[row_num]); + nested_func->merge(place, static_cast(*columns[0]).getData()[row_num], arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(place, rhs); + nested_func->merge(place, rhs, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h index c460ceef46d..e6d88de086a 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h @@ -73,7 +73,7 @@ public: this->data(place).sample.insert(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).sample.merge(this->data(rhs).sample); } @@ -150,7 +150,7 @@ public: this->data(place).sample.insert(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).sample.merge(this->data(rhs).sample); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h index f05fdbef37a..6fa4ac0b5f1 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h @@ -80,7 +80,7 @@ public: determinator.get64(row_num)); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).sample.merge(this->data(rhs).sample); } @@ -164,7 +164,7 @@ public: determinator.get64(row_num)); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).sample.merge(this->data(rhs).sample); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h index 9f614e1fe2e..da02b4473a4 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExact.h @@ -73,7 +73,7 @@ public: this->data(place).array.push_back(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).array.insert(this->data(rhs).array.begin(), this->data(rhs).array.end()); } @@ -155,7 +155,7 @@ public: this->data(place).array.push_back(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).array.insert(this->data(rhs).array.begin(), this->data(rhs).array.end()); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h index aee687801c7..0f4a963b1c4 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileExactWeighted.h @@ -81,7 +81,7 @@ public: += static_cast &>(column_weight).getData()[row_num]; } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { auto & map = this->data(place).map; const auto & rhs_map = this->data(rhs).map; @@ -196,7 +196,7 @@ public: += static_cast &>(column_weight).getData()[row_num]; } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { auto & map = this->data(place).map; const auto & rhs_map = this->data(rhs).map; diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h index 0d64d9724f9..7f82de744ec 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTDigest.h @@ -387,7 +387,7 @@ public: this->data(place).digest.add(params, static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).digest.merge(params, this->data(rhs).digest); } @@ -456,7 +456,7 @@ public: static_cast &>(column_weight).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).digest.merge(params, this->data(rhs).digest); } @@ -518,7 +518,7 @@ public: this->data(place).digest.add(params, static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).digest.merge(params, this->data(rhs).digest); } @@ -600,7 +600,7 @@ public: static_cast &>(column_weight).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).digest.merge(params, this->data(rhs).digest); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h index be4cedd4e1e..b659d4b7282 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileTiming.h @@ -820,7 +820,7 @@ public: this->data(place).insert(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs)); } @@ -880,7 +880,7 @@ public: static_cast &>(column_weight).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs)); } @@ -935,7 +935,7 @@ public: this->data(place).insert(static_cast &>(column).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs)); } @@ -998,7 +998,7 @@ public: static_cast &>(column_weight).getData()[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs)); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h index ff0eb7ce7a0..b59f1621de2 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -206,7 +206,7 @@ public: data(place).add(timestamp, events); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { data(place).merge(data(rhs)); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h index ce5818dd7e8..dae17053301 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionState.h @@ -77,9 +77,9 @@ public: nested_func->add(place, columns, row_num, arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(place, rhs); + nested_func->merge(place, rhs, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h index f9dcb058f72..983ebd7eaad 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h @@ -45,7 +45,7 @@ public: this->data(place).sum += static_cast &>(column).getData()[row_num]; } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).sum += this->data(rhs).sum; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 25e7c4ce275..ecc6c0f4978 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -345,7 +345,7 @@ public: detail::OneAdder::addImpl(this->data(place), column, row_num); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).set.merge(this->data(rhs).set); } @@ -400,7 +400,7 @@ public: this->data(place).set.insert(UniqVariadicHash::apply(num_args, columns, row_num)); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).set.merge(this->data(rhs).set); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h index 57234169e9c..b8dabf972db 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -156,7 +156,7 @@ public: this->data(place).addImpl(column, row_num, threshold); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs), threshold); } @@ -229,7 +229,7 @@ public: this->data(place).insert(UniqVariadicHash::apply(num_args, columns, row_num), threshold); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).merge(this->data(rhs), threshold); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h index 7f8589b9b7f..60a2343b895 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h @@ -47,7 +47,7 @@ public: this->data(place).result.change(column_arg, row_num); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { if (this->data(place).value.changeIfBetter(this->data(rhs).value)) this->data(place).result.change(this->data(rhs).result); diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h index 5a29b993aa5..3a133fac183 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h @@ -654,7 +654,7 @@ public: this->data(place).changeIfBetter(column, row_num); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).changeIfBetter(this->data(rhs)); } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h index d7aef519b6f..e2ce2e8a755 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsStatistics.h @@ -134,7 +134,7 @@ public: this->data(place).update(column, row_num); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).mergeWith(this->data(rhs)); } @@ -402,7 +402,7 @@ public: this->data(place).update(column_left, column_right, row_num); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { this->data(place).mergeWith(this->data(rhs)); } diff --git a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h index 3ca2a43cad7..34d5a4daf30 100644 --- a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h @@ -85,7 +85,7 @@ public: virtual void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0; /// Merges state (on which place points to) with other state of current aggregation function. - virtual void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const = 0; + virtual void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const = 0; /// Serializes state (to transmit it over the network, for example). virtual void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const = 0; diff --git a/dbms/include/DB/Columns/ColumnAggregateFunction.h b/dbms/include/DB/Columns/ColumnAggregateFunction.h index fe2874ef4fd..f087a3e980d 100644 --- a/dbms/include/DB/Columns/ColumnAggregateFunction.h +++ b/dbms/include/DB/Columns/ColumnAggregateFunction.h @@ -159,7 +159,7 @@ public: /// Объединить состояние в последней строке с заданным void insertMergeFrom(const IColumn & src, size_t n) { - func->merge(getData().back(), static_cast(src).getData()[n]); + func->merge(getData().back(), static_cast(src).getData()[n], &createOrGetArena()); } Arena & createOrGetArena() diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 6a9f9682503..7a07c82045e 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -1245,7 +1245,7 @@ public: const auto & states = column_with_states->getData(); for (const auto & state_to_add : states) { - agg_func.merge(place.get(), state_to_add); + agg_func.merge(place.get(), state_to_add, nullptr); /// Empty arena! agg_func.insertResultInto(place.get(), result_column); } } diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 45918235d2c..252033b4bc3 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -1073,20 +1073,23 @@ protected: template void mergeDataImpl( Table & table_dst, - Table & table_src) const; + Table & table_src, + Arena * arena) const; /// Слить данные из хэш-таблицы src в dst, но только для ключей, которые уже есть в dst. В остальных случаях, слить данные в overflows. template void mergeDataNoMoreKeysImpl( Table & table_dst, AggregatedDataWithoutKey & overflows, - Table & table_src) const; + Table & table_src, + Arena * arena) const; /// То же самое, но игнорирует остальные ключи. template void mergeDataOnlyExistingKeysImpl( Table & table_dst, - Table & table_src) const; + Table & table_src, + Arena * arena) const; void mergeWithoutKeyDataImpl( ManyAggregatedDataVariants & non_empty_data) const; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp b/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp index ed00c6001dc..6ba6a8536b5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionDebug.cpp @@ -54,7 +54,7 @@ public: { } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { } diff --git a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp index 56dddc0d725..c23f58356d2 100644 --- a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp +++ b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp @@ -208,12 +208,12 @@ void TotalsHavingBlockInputStream::addToTotals(Block & totals, Block & block, co { for (size_t j = 0; j < size; ++j) if ((*filter)[j]) - function->merge(data, vec[j]); + function->merge(data, vec[j], arena.get()); } else { for (size_t j = 0; j < size; ++j) - function->merge(data, vec[j]); + function->merge(data, vec[j], arena.get()); } } } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 6f40160962f..0ff62804c73 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1322,7 +1322,8 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b template void NO_INLINE Aggregator::mergeDataImpl( Table & table_dst, - Table & table_src) const + Table & table_src, + Arena * arena) const { for (auto it = table_src.begin(); it != table_src.end(); ++it) { @@ -1335,7 +1336,8 @@ void NO_INLINE Aggregator::mergeDataImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->merge( Method::getAggregateData(res_it->second) + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], + arena); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy( @@ -1357,7 +1359,8 @@ template void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( Table & table_dst, AggregatedDataWithoutKey & overflows, - Table & table_src) const + Table & table_src, + Arena * arena) const { for (auto it = table_src.begin(); it != table_src.end(); ++it) { @@ -1370,7 +1373,8 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->merge( res_data + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], + arena); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy( @@ -1385,7 +1389,8 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( template void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( Table & table_dst, - Table & table_src) const + Table & table_src, + Arena * arena) const { for (auto it = table_src.begin(); it != table_src.end(); ++it) { @@ -1399,7 +1404,8 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->merge( res_data + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); + Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], + arena); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy( @@ -1424,7 +1430,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( AggregatedDataWithoutKey & current_data = non_empty_data[i]->without_key; for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(res_data + offsets_of_aggregate_states[i], current_data + offsets_of_aggregate_states[i]); + aggregate_functions[i]->merge(res_data + offsets_of_aggregate_states[i], current_data + offsets_of_aggregate_states[i], res->aggregates_pool); for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_functions[i]->destroy(current_data + offsets_of_aggregate_states[i]); @@ -1452,16 +1458,19 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( if (!no_more_keys) mergeDataImpl( getDataVariant(*res).data, - getDataVariant(current).data); + getDataVariant(current).data, + res->aggregates_pool); else if (res->without_key) mergeDataNoMoreKeysImpl( getDataVariant(*res).data, res->without_key, - getDataVariant(current).data); + getDataVariant(current).data, + res->aggregates_pool); else mergeDataOnlyExistingKeysImpl( getDataVariant(*res).data, - getDataVariant(current).data); + getDataVariant(current).data, + res->aggregates_pool); /// current не будет уничтожать состояния агрегатных функций в деструкторе current.aggregator = nullptr; @@ -1479,9 +1488,13 @@ void NO_INLINE Aggregator::mergeBucketImpl( { AggregatedDataVariants & current = *data[i]; + /// Select Arena to avoid race conditions + Arena * arena = res->aggregates_pools.at(static_cast(bucket) % size).get(); + mergeDataImpl( getDataVariant(*res).data.impls[bucket], - getDataVariant(current).data.impls[bucket]); + getDataVariant(current).data.impls[bucket], + arena); } } @@ -1810,7 +1823,8 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( for (size_t j = 0; j < params.aggregates_size; ++j) aggregate_functions[j]->merge( value + offsets_of_aggregate_states[j], - (*aggregate_columns[j])[i]); + (*aggregate_columns[j])[i], + aggregates_pool); } /// Пораньше освобождаем память. @@ -1854,7 +1868,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( /// Добавляем значения for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(res + offsets_of_aggregate_states[i], (*aggregate_columns[i])[0]); + aggregate_functions[i]->merge(res + offsets_of_aggregate_states[i], (*aggregate_columns[i])[0], result.aggregates_pool); /// Пораньше освобождаем память. block.clear(); From 7103157b1bed03e6735703ba82d71abbb97300a1 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 26 Sep 2016 15:40:28 +0300 Subject: [PATCH 07/15] Added simple test for groupUniqArray(String). [#METR-22071] Also reverted Benchmark.cpp it will be added in separate PR. --- dbms/src/Client/Benchmark.cpp | 29 ++++--------------- .../00373_group_uniq_array_simple.reference | 10 +++++++ .../00373_group_uniq_array_simple.sql | 10 +++++++ 3 files changed, 25 insertions(+), 24 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference create mode 100644 dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql diff --git a/dbms/src/Client/Benchmark.cpp b/dbms/src/Client/Benchmark.cpp index 8cb34839f94..eb2f20b0256 100644 --- a/dbms/src/Client/Benchmark.cpp +++ b/dbms/src/Client/Benchmark.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -59,11 +58,11 @@ public: Benchmark(unsigned concurrency_, double delay_, const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, const String & stage, - bool randomize_, size_t num_repetions_, double max_time_, + bool randomize_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), connections(concurrency, host_, port_, default_database_, user_, password_), - randomize(randomize_), num_repetions(num_repetions_), max_time(max_time_), + randomize(randomize_), settings(settings_), pool(concurrency) { std::cerr << std::fixed << std::setprecision(3); @@ -95,8 +94,6 @@ private: ConnectionPool connections; bool randomize; - size_t num_repetions; - double max_time; Settings settings; QueryProcessingStage::Enum query_processing_stage; @@ -186,7 +183,7 @@ private: Stopwatch watch; /// В цикле, кладём все запросы в очередь. - for (size_t i = 0; !interrupt_listener.check() && (!(num_repetions > 0) || i < num_repetions); ++i) + for (size_t i = 0; !interrupt_listener.check(); ++i) { if (i >= queries.size()) i = 0; @@ -197,7 +194,7 @@ private: queue.push(queries[query_index]); - if (delay > 0 && watch.elapsedSeconds() > delay) + if (watch.elapsedSeconds() > delay) { auto total_queries = 0; { @@ -209,18 +206,6 @@ private: report(info_per_interval); watch.restart(); } - - if (num_repetions > 0 && info_total.queries >= num_repetions) - { - std::cout << "The execution is broken since request number of loops is reached\n"; - break; - } - - if (max_time > 0 && info_total.watch.elapsedSeconds() >= max_time) - { - std::cout << "The execution is broken since requested time limit is reached\n"; - break; - } } /// Попросим потоки завершиться. @@ -361,15 +346,13 @@ int main(int argc, char ** argv) desc.add_options() ("help", "produce help message") ("concurrency,c", boost::program_options::value()->default_value(1), "number of parallel queries") - ("delay,d", boost::program_options::value()->default_value(1), "delay between reports in seconds (set 0 to disable)") + ("delay,d", boost::program_options::value()->default_value(1), "delay between reports in seconds") ("host,h", boost::program_options::value()->default_value("localhost"), "") ("port", boost::program_options::value()->default_value(9000), "") ("user", boost::program_options::value()->default_value("default"), "") ("password", boost::program_options::value()->default_value(""), "") ("database", boost::program_options::value()->default_value("default"), "") ("stage", boost::program_options::value()->default_value("complete"), "request query processing up to specified stage") - ("loops,l", boost::program_options::value()->default_value(0), "number of tests repetions") - ("timelimit,t", boost::program_options::value()->default_value(0.), "stop repeating after specified time limit") ("randomize,r", boost::program_options::value()->default_value(false), "randomize order of execution") #define DECLARE_SETTING(TYPE, NAME, DEFAULT) (#NAME, boost::program_options::value (), "Settings.h") #define DECLARE_LIMIT(TYPE, NAME, DEFAULT) (#NAME, boost::program_options::value (), "Limits.h") @@ -409,8 +392,6 @@ int main(int argc, char ** argv) options["password"].as(), options["stage"].as(), options["randomize"].as(), - options["loops"].as(), - options["timelimit"].as(), settings); } catch (const Exception & e) diff --git a/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference b/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference new file mode 100644 index 00000000000..20cf64b60a3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference @@ -0,0 +1,10 @@ +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 diff --git a/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql b/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql new file mode 100644 index 00000000000..12c475f4521 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test.group_uniq; +CREATE TABLE test.group_uniq ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000; + +INSERT INTO test.group_uniq SELECT 2 as id, toString(number % 100) as v FROM system.numbers LIMIT 1000000; +INSERT INTO test.group_uniq SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; + +SELECT length(groupUniqArray(v)) FROM test.group_uniq GROUP BY id ORDER BY id; +--SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq') GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS test.group_uniq; \ No newline at end of file From a3d72db2aafcfa74bc9384569e1269d203a753b3 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 26 Sep 2016 19:50:13 +0300 Subject: [PATCH 08/15] Added allocatesMemoryInArena() method for aggregate functions. Fixed runningAccumulate, now it works properly for complex functions. More accurate threads handling in Aggregator. --- .../AggregateFunctionGroupUniqArray.h | 10 +++++++- .../AggregateFunctions/IAggregateFunction.h | 6 +++++ .../DB/Columns/ColumnAggregateFunction.h | 3 ++- dbms/include/DB/Columns/ColumnString.h | 1 + .../DB/Functions/FunctionsMiscellaneous.h | 12 +++++++++- dbms/include/DB/Functions/ObjectPool.h | 1 + dbms/include/DB/Interpreters/Aggregator.h | 4 ++-- dbms/src/Interpreters/Aggregator.cpp | 24 +++++++++++++------ 8 files changed, 49 insertions(+), 12 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h index acb904d784b..92c8ec8d602 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -104,7 +104,10 @@ public: /// Generic implementation, it uses serialized representation as object descriptor. struct AggreagteFunctionGroupUniqArrayGenericData { - using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory<16>>; + static constexpr size_t INIT_ELEMS = 2; /// adjustable + static constexpr size_t ELEM_SIZE = sizeof(HashSetCellWithSavedHash); + using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory>; + Set value; }; @@ -136,6 +139,11 @@ public: return std::make_shared(input_data_type->clone()); } + bool allocatesMemoryInArena() const override + { + return true; + } + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { auto & set = this->data(place).value; diff --git a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h index 34d5a4daf30..61cbb7e9f55 100644 --- a/dbms/include/DB/AggregateFunctions/IAggregateFunction.h +++ b/dbms/include/DB/AggregateFunctions/IAggregateFunction.h @@ -93,6 +93,12 @@ public: /// Deserializes state. This function is called only for empty (just created) states. virtual void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const = 0; + /// Returns true if a function requires Arena to handle own states (see add(), merge(), deserialize()). + virtual bool allocatesMemoryInArena() const + { + return false; + } + /// Inserts results into a column. virtual void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const = 0; diff --git a/dbms/include/DB/Columns/ColumnAggregateFunction.h b/dbms/include/DB/Columns/ColumnAggregateFunction.h index f087a3e980d..f60a6129402 100644 --- a/dbms/include/DB/Columns/ColumnAggregateFunction.h +++ b/dbms/include/DB/Columns/ColumnAggregateFunction.h @@ -159,7 +159,8 @@ public: /// Объединить состояние в последней строке с заданным void insertMergeFrom(const IColumn & src, size_t n) { - func->merge(getData().back(), static_cast(src).getData()[n], &createOrGetArena()); + Arena & arena = createOrGetArena(); + func->merge(getData().back(), static_cast(src).getData()[n], &arena); } Arena & createOrGetArena() diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index 84b6d3f73b4..b7846f443d6 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -12,6 +12,7 @@ #include #include + namespace DB { diff --git a/dbms/include/DB/Functions/FunctionsMiscellaneous.h b/dbms/include/DB/Functions/FunctionsMiscellaneous.h index 7a07c82045e..dde32298635 100644 --- a/dbms/include/DB/Functions/FunctionsMiscellaneous.h +++ b/dbms/include/DB/Functions/FunctionsMiscellaneous.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -1242,13 +1243,22 @@ public: IColumn & result_column = *result_column_ptr; result_column.reserve(column_with_states->size()); + auto arena = (agg_func.allocatesMemoryInArena()) ? + arenas_pool.get(0, []{ return new Arena(); }) : + nullptr; + const auto & states = column_with_states->getData(); for (const auto & state_to_add : states) { - agg_func.merge(place.get(), state_to_add, nullptr); /// Empty arena! + /// Will pass empty arena if agg_func does not allocate memory in arena + agg_func.merge(place.get(), state_to_add, arena.get()); agg_func.insertResultInto(place.get(), result_column); } } + +private: + + ObjectPool arenas_pool; /// Used only for complex functions }; diff --git a/dbms/include/DB/Functions/ObjectPool.h b/dbms/include/DB/Functions/ObjectPool.h index d6bf82c4062..039822de206 100644 --- a/dbms/include/DB/Functions/ObjectPool.h +++ b/dbms/include/DB/Functions/ObjectPool.h @@ -1,3 +1,4 @@ +#pragma once #include #include #include diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 252033b4bc3..90e2f3771af 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -384,7 +384,7 @@ struct AggregationMethodConcat { /** Исправление, если все ключи - пустые массивы. Для них в хэш-таблицу записывается StringRef нулевой длины, но с ненулевым указателем. * Но при вставке в хэш-таблицу, такой StringRef оказывается равен другому ключу нулевой длины, - * у которого указатель на данные может быть любым мусором и использовать его нельзя. + * у которого указатель на данные может быть любым мусором и использовать его нельзя. */ for (size_t i = 0; i < keys_size; ++i) key_columns[i]->insertDefault(); @@ -1174,7 +1174,7 @@ protected: template void mergeBucketImpl( - ManyAggregatedDataVariants & data, Int32 bucket) const; + ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena) const; template void convertBlockToTwoLevelImpl( diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 0ff62804c73..d5fd5b8c9bd 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1480,7 +1480,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( template void NO_INLINE Aggregator::mergeBucketImpl( - ManyAggregatedDataVariants & data, Int32 bucket) const + ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena) const { /// Все результаты агрегации соединяем с первым. AggregatedDataVariantsPtr & res = data[0]; @@ -1488,9 +1488,6 @@ void NO_INLINE Aggregator::mergeBucketImpl( { AggregatedDataVariants & current = *data[i]; - /// Select Arena to avoid race conditions - Arena * arena = res->aggregates_pools.at(static_cast(bucket) % size).get(); - mergeDataImpl( getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], @@ -1511,7 +1508,16 @@ public: * которые все либо являются одноуровневыми, либо являются двухуровневыми. */ MergingAndConvertingBlockInputStream(const Aggregator & aggregator_, ManyAggregatedDataVariants & data_, bool final_, size_t threads_) - : aggregator(aggregator_), data(data_), final(final_), threads(threads_) {} + : aggregator(aggregator_), data(data_), final(final_), threads(threads_) + { + /// At least we need one arena in first data item per thread + if (!data.empty() && threads > data[0]->aggregates_pools.size()) + { + Arenas & first_pool = data[0]->aggregates_pools; + for (size_t j = first_pool.size(); j < threads; j++) + first_pool.emplace_back(std::make_shared()); + } + } String getName() const override { return "MergingAndConverting"; } @@ -1653,17 +1659,21 @@ private: try { - /// TODO Возможно, поддержать no_more_keys + /// TODO: add no_more_keys support maybe auto & merged_data = *data[0]; auto method = merged_data.type; Block block; + /// Select Arena to avoid race conditions + size_t thread_number = static_cast(bucket_num) % threads; + Arena * arena = merged_data.aggregates_pools.at(thread_number).get(); + if (false) {} #define M(NAME) \ else if (method == AggregatedDataVariants::Type::NAME) \ { \ - aggregator.mergeBucketImpl(data, bucket_num); \ + aggregator.mergeBucketImpl(data, bucket_num, arena); \ block = aggregator.convertOneBucketToBlock(merged_data, *merged_data.NAME, final, bucket_num); \ } From 745c2e068643f51cdccb45ae78cb6f1658029c6b Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 28 Sep 2016 19:49:59 +0300 Subject: [PATCH 09/15] Added clickhouse-benchmark enhancements. --- dbms/src/Client/Benchmark.cpp | 134 ++++++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 31 deletions(-) diff --git a/dbms/src/Client/Benchmark.cpp b/dbms/src/Client/Benchmark.cpp index eb2f20b0256..eef964ef960 100644 --- a/dbms/src/Client/Benchmark.cpp +++ b/dbms/src/Client/Benchmark.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -38,8 +39,8 @@ #include "InterruptListener.h" -/** Инструмент для измерения производительности ClickHouse - * при выполнении запросов с фиксированным количеством одновременных запросов. +/** A tool for evaluating ClickHouse performance. + * The tool emulates a case with fixed amount of simultaneously executing queries. */ namespace DB @@ -58,12 +59,13 @@ public: Benchmark(unsigned concurrency_, double delay_, const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, const String & stage, - bool randomize_, - const Settings & settings_) - : concurrency(concurrency_), delay(delay_), queue(concurrency), + bool randomize_, size_t max_iterations_, double max_time_, + const String & json_path_, const Settings & settings_) + : + concurrency(concurrency_), delay(delay_), queue(concurrency), connections(concurrency, host_, port_, default_database_, user_, password_), - randomize(randomize_), - settings(settings_), pool(concurrency) + randomize(randomize_), max_iterations(max_iterations_), max_time(max_time_), + json_path(json_path_), settings(settings_), pool(concurrency) { std::cerr << std::fixed << std::setprecision(3); @@ -76,6 +78,11 @@ public: else throw Exception("Unknown query processing stage: " + stage, ErrorCodes::BAD_ARGUMENTS); + if (!json_path.empty() && Poco::File(json_path).exists()) /// Clear file with previous results + { + Poco::File(json_path).remove(); + } + readQueries(); run(); } @@ -94,6 +101,9 @@ private: ConnectionPool connections; bool randomize; + size_t max_iterations; + double max_time; + String json_path; Settings settings; QueryProcessingStage::Enum query_processing_stage; @@ -183,18 +193,13 @@ private: Stopwatch watch; /// В цикле, кладём все запросы в очередь. - for (size_t i = 0; !interrupt_listener.check(); ++i) + for (size_t i = 0; !(max_iterations > 0) || i < max_iterations; ++i) { - if (i >= queries.size()) - i = 0; - - size_t query_index = randomize - ? distribution(generator) - : i; + size_t query_index = randomize ? distribution(generator) : i % queries.size(); queue.push(queries[query_index]); - if (watch.elapsedSeconds() > delay) + if (delay > 0 && watch.elapsedSeconds() > delay) { auto total_queries = 0; { @@ -206,6 +211,18 @@ private: report(info_per_interval); watch.restart(); } + + if (max_time > 0 && info_total.watch.elapsedSeconds() >= max_time) + { + std::cout << "Stopping launch of queries. Requested time limit is exhausted.\n"; + break; + } + + if (interrupt_listener.check()) + { + std::cout << "Stopping launch of queries. SIGINT recieved.\n"; + break; + } } /// Попросим потоки завершиться. @@ -214,6 +231,9 @@ private: pool.wait(); + info_total.watch.stop(); + if (!json_path.empty()) + reportJSON(info_total, json_path); printNumberOfQueriesExecuted(info_total.queries); report(info_total); } @@ -320,17 +340,60 @@ private: << "result MiB/s: " << (info.result_bytes / seconds / 1048576) << "." << "\n"; - for (size_t percent = 0; percent <= 90; percent += 10) + auto print_percentile = [&](double percent) + { std::cerr << percent << "%\t" << info.sampler.quantileInterpolated(percent / 100.0) << " sec." << std::endl; + }; - std::cerr << "95%\t" << info.sampler.quantileInterpolated(0.95) << " sec.\n"; - std::cerr << "99%\t" << info.sampler.quantileInterpolated(0.99) << " sec.\n"; - std::cerr << "99.9%\t" << info.sampler.quantileInterpolated(0.999) << " sec.\n"; - std::cerr << "99.99%\t" << info.sampler.quantileInterpolated(0.9999) << " sec.\n"; - std::cerr << "100%\t" << info.sampler.quantileInterpolated(1) << " sec.\n"; + for (int percent = 0; percent <= 90; percent += 10) + print_percentile(percent); + print_percentile(95); + print_percentile(99); + print_percentile(99.9); + print_percentile(99.99); info.clear(); } + + void reportJSON(Stats & info, const std::string & filename) + { + std::ofstream jout(filename); + if (!jout.is_open()) + throw Exception("Can't write JSON data"); + + std::lock_guard lock(mutex); + + double seconds = info.watch.elapsedSeconds(); + + jout << "{\n"; + + jout << "\"statistics\": {\n" + << "\"QPS\": " << (info.queries / seconds) << ",\n" + << "\"RPS\": " << (info.read_rows / seconds) << ",\n" + << "\"MiBPS\": " << (info.read_bytes / seconds / 1048576) << ",\n" + << "\"RPS_result\": " << (info.result_rows / seconds) << ",\n" + << "\"MiBPS_result\": " << (info.result_bytes / seconds / 1048576) << ",\n" + << "\"num_queries\": " << info.queries << "\n" + << "},\n"; + + auto print_percentile = [&](auto percent, bool with_comma = true) + { + jout << "\"" << percent << "\":\t" << info.sampler.quantileInterpolated(percent / 100.0) << (with_comma ? ",\n" : "\n"); + }; + + jout << "\"query_time_percentiles\": {\n"; + for (int percent = 0; percent <= 90; percent += 10) + print_percentile(percent); + print_percentile(95); + print_percentile(99); + print_percentile(99.9); + print_percentile(99.99, false); + jout << "}\n"; + + jout << "}\n"; + + jout.close(); + } }; } @@ -342,18 +405,24 @@ int main(int argc, char ** argv) try { + using boost::program_options::value; + boost::program_options::options_description desc("Allowed options"); desc.add_options() - ("help", "produce help message") - ("concurrency,c", boost::program_options::value()->default_value(1), "number of parallel queries") - ("delay,d", boost::program_options::value()->default_value(1), "delay between reports in seconds") - ("host,h", boost::program_options::value()->default_value("localhost"), "") - ("port", boost::program_options::value()->default_value(9000), "") - ("user", boost::program_options::value()->default_value("default"), "") - ("password", boost::program_options::value()->default_value(""), "") - ("database", boost::program_options::value()->default_value("default"), "") - ("stage", boost::program_options::value()->default_value("complete"), "request query processing up to specified stage") - ("randomize,r", boost::program_options::value()->default_value(false), "randomize order of execution") + ("help", "produce help message") + ("concurrency,c", value()->default_value(1), "number of parallel queries") + ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("stage", value()->default_value("complete"), "request query processing up to specified stage") + ("iterations,i", value()->default_value(0), "amount of queries to be executed") + ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") + ("randomize,r", value()->default_value(false), "randomize order of execution") + ("json", value()->default_value(""), "write final report to specified file in JSON format") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("database", value()->default_value("default"), "") + #define DECLARE_SETTING(TYPE, NAME, DEFAULT) (#NAME, boost::program_options::value (), "Settings.h") #define DECLARE_LIMIT(TYPE, NAME, DEFAULT) (#NAME, boost::program_options::value (), "Limits.h") APPLY_FOR_SETTINGS(DECLARE_SETTING) @@ -392,6 +461,9 @@ int main(int argc, char ** argv) options["password"].as(), options["stage"].as(), options["randomize"].as(), + options["iterations"].as(), + options["timelimit"].as(), + options["json"].as(), settings); } catch (const Exception & e) From 0ad504e4dd654fafdec9fe0cdaa2719a7a3f7593 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 29 Sep 2016 13:00:37 +0300 Subject: [PATCH 10/15] dbms: Server: Bug fix. Now the functions toStartOfHour/toStartOfMinute/toStartOfFiveMinute correctly take into account the time zone parameter. [#METR-22935] --- dbms/include/DB/Functions/FunctionsDateTime.h | 42 +++++++++++- .../0_stateless/00189_time_zones.reference | 68 +++++++++---------- 2 files changed, 73 insertions(+), 37 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsDateTime.h b/dbms/include/DB/Functions/FunctionsDateTime.h index df8259af5be..a2a56d76068 100644 --- a/dbms/include/DB/Functions/FunctionsDateTime.h +++ b/dbms/include/DB/Functions/FunctionsDateTime.h @@ -170,7 +170,19 @@ struct ToStartOfMinuteImpl { static inline UInt32 execute(UInt32 t, const DateLUTImpl & remote_date_lut, const DateLUTImpl & local_date_lut) { - return remote_date_lut.toStartOfMinuteInaccurate(t); + if (&remote_date_lut == &local_date_lut) + return local_date_lut.toStartOfMinuteInaccurate(t); + else + { + time_t remote_ts = remote_date_lut.toTimeInaccurate(t) + 86400; + remote_ts = remote_date_lut.toStartOfMinuteInaccurate(remote_ts); + + const auto & values = remote_date_lut.getValues(t); + return local_date_lut.makeDateTime(values.year, values.month, values.day_of_month, + remote_date_lut.toHourInaccurate(remote_ts), + remote_date_lut.toMinuteInaccurate(remote_ts), + remote_date_lut.toSecondInaccurate(remote_ts)); + } } static inline UInt32 execute(UInt16 d, const DateLUTImpl & remote_date_lut, const DateLUTImpl & local_date_lut) { @@ -184,7 +196,19 @@ struct ToStartOfFiveMinuteImpl { static inline UInt32 execute(UInt32 t, const DateLUTImpl & remote_date_lut, const DateLUTImpl & local_date_lut) { - return remote_date_lut.toStartOfFiveMinuteInaccurate(t); + if (&remote_date_lut == &local_date_lut) + return local_date_lut.toStartOfFiveMinuteInaccurate(t); + else + { + time_t remote_ts = remote_date_lut.toTimeInaccurate(t) + 86400; + remote_ts = remote_date_lut.toStartOfFiveMinuteInaccurate(remote_ts); + + const auto & values = remote_date_lut.getValues(t); + return local_date_lut.makeDateTime(values.year, values.month, values.day_of_month, + remote_date_lut.toHourInaccurate(remote_ts), + remote_date_lut.toMinuteInaccurate(remote_ts), + remote_date_lut.toSecondInaccurate(remote_ts)); + } } static inline UInt32 execute(UInt16 d, const DateLUTImpl & remote_date_lut, const DateLUTImpl & local_date_lut) { @@ -198,7 +222,19 @@ struct ToStartOfHourImpl { static inline UInt32 execute(UInt32 t, const DateLUTImpl & remote_date_lut, const DateLUTImpl & local_date_lut) { - return remote_date_lut.toStartOfHourInaccurate(t); + if (&remote_date_lut == &local_date_lut) + return local_date_lut.toStartOfHourInaccurate(t); + else + { + time_t remote_ts = remote_date_lut.toTimeInaccurate(t) + 86400; + remote_ts = remote_date_lut.toStartOfHourInaccurate(remote_ts); + + const auto & values = remote_date_lut.getValues(t); + return local_date_lut.makeDateTime(values.year, values.month, values.day_of_month, + remote_date_lut.toHourInaccurate(remote_ts), + remote_date_lut.toMinuteInaccurate(remote_ts), + remote_date_lut.toSecondInaccurate(remote_ts)); + } } static inline UInt32 execute(UInt16 d, const DateLUTImpl & remote_date_lut, const DateLUTImpl & local_date_lut) { diff --git a/dbms/tests/queries/0_stateless/00189_time_zones.reference b/dbms/tests/queries/0_stateless/00189_time_zones.reference index d4b61d3da17..df12bf203ba 100644 --- a/dbms/tests/queries/0_stateless/00189_time_zones.reference +++ b/dbms/tests/queries/0_stateless/00189_time_zones.reference @@ -59,15 +59,15 @@ 0 0 2014-09-30 23:50:00 -2014-09-30 23:50:00 -2014-09-30 23:50:00 -2014-09-30 23:50:00 -2014-09-30 23:50:00 -2014-09-30 23:00:00 -2014-09-30 23:00:00 -2014-09-30 23:00:00 -2014-09-30 23:00:00 +2014-09-30 21:50:00 +2014-09-30 20:50:00 +2014-10-01 04:50:00 +2014-09-30 11:20:00 2014-09-30 23:00:00 +2014-09-30 21:00:00 +2014-09-30 20:00:00 +2014-10-01 04:00:00 +2014-09-30 11:00:00 2014 2014 2014 @@ -123,16 +123,16 @@ 2015 1 1 4 2014 9 30 2 2015 3 15 7 -19 30 0 2015-07-15 13:30:00 -21 0 0 2014-12-29 00:00:00 +19 30 0 2015-07-15 19:30:00 +21 0 0 2014-12-28 21:00:00 12 0 0 2015-01-01 12:00:00 -21 50 0 2014-09-30 23:50:00 -2 30 0 2015-03-15 13:30:00 -2015-07-15 13:00:00 2015 24187 2375 -2014-12-29 00:00:00 2014 24180 2346 +21 50 0 2014-09-30 21:50:00 +2 30 0 2015-03-15 02:00:00 +2015-07-15 19:00:00 2015 24187 2375 +2014-12-28 21:00:00 2014 24180 2346 2015-01-01 12:00:00 2015 24181 2347 -2014-09-30 23:00:00 2014 24177 2334 -2015-03-15 13:00:00 2015 24183 2357 +2014-09-30 21:00:00 2014 24177 2334 +2015-03-15 02:00:00 2015 24183 2357 16631 399154 23949270 1436956200 16432 394389 23663340 1419800400 16436 394473 23668380 1420102800 @@ -153,16 +153,16 @@ 2015 1 1 4 2014 9 1 2 2015 3 15 7 -12 30 0 2015-07-15 13:30:00 -22 0 0 2014-12-29 00:00:00 -10 0 0 2015-01-01 12:00:00 -21 50 0 2014-09-30 23:50:00 -11 30 0 2015-03-15 13:30:00 -2015-07-15 13:00:00 2015 24187 2375 -2014-12-29 00:00:00 2014 24180 2346 -2015-01-01 12:00:00 2015 24181 2347 -2014-09-30 23:00:00 2014 24178 2334 -2015-03-15 13:00:00 2015 24183 2357 +12 30 0 2015-07-15 02:00:00 +22 0 0 2014-12-28 13:30:00 +10 0 0 2015-01-01 01:30:00 +21 50 0 2014-09-30 11:20:00 +11 30 0 2015-03-15 02:00:00 +2015-07-15 12:00:00 2015 24187 2375 +2014-12-28 22:00:00 2014 24180 2346 +2015-01-01 10:00:00 2015 24181 2347 +2014-09-30 21:00:00 2014 24178 2334 +2015-03-15 11:00:00 2015 24183 2357 16631 399154 23949270 1436956200 16432 394389 23663340 1419800400 16436 394473 23668380 1420102800 @@ -183,16 +183,16 @@ 2015 3 15 7 2015 3 15 7 2015 3 15 7 -19 30 0 2015-03-15 13:30:00 -10 30 0 2015-03-15 13:30:00 +19 30 0 2015-03-15 19:30:00 +10 30 0 2015-03-15 10:30:00 13 30 0 2015-03-15 13:30:00 -11 30 0 2015-03-15 13:30:00 -2 30 0 2015-03-15 13:30:00 -2015-03-15 13:00:00 2015 24183 2357 -2015-03-15 13:00:00 2015 24183 2357 -2015-03-15 13:00:00 2015 24183 2357 -2015-03-15 13:00:00 2015 24183 2357 +11 30 0 2015-03-15 11:30:00 +2 30 0 2015-03-15 02:00:00 +2015-03-15 19:00:00 2015 24183 2357 +2015-03-15 10:00:00 2015 24183 2357 2015-03-15 13:00:00 2015 24183 2357 +2015-03-15 11:00:00 2015 24183 2357 +2015-03-15 02:00:00 2015 24183 2357 16509 396226 23773590 1426415400 16509 396226 23773590 1426415400 16509 396226 23773590 1426415400 From c2d5595c3d9c806f531b703233e9fb4c676ef1ce Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 29 Sep 2016 16:16:46 +0300 Subject: [PATCH 11/15] Added more tests for groupUniqArray() function. [#METR-22071] --- .../00373_group_uniq_array_simple.sql | 10 ---------- ...0375_group_uniq_array_of_string.reference} | 10 ++++++++++ .../00375_group_uniq_array_of_string.sql | 11 ++++++++++ ...76_group_uniq_array_of_int_array.reference | 20 +++++++++++++++++++ .../00376_group_uniq_array_of_int_array.sql | 10 ++++++++++ ...group_uniq_array_of_string_array.reference | 20 +++++++++++++++++++ ...00377_group_uniq_array_of_string_array.sql | 10 ++++++++++ 7 files changed, 81 insertions(+), 10 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql rename dbms/tests/queries/0_stateless/{00373_group_uniq_array_simple.reference => 00375_group_uniq_array_of_string.reference} (50%) create mode 100644 dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql create mode 100644 dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.reference create mode 100644 dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql create mode 100644 dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.reference create mode 100644 dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql diff --git a/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql b/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql deleted file mode 100644 index 12c475f4521..00000000000 --- a/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.sql +++ /dev/null @@ -1,10 +0,0 @@ -DROP TABLE IF EXISTS test.group_uniq; -CREATE TABLE test.group_uniq ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000; - -INSERT INTO test.group_uniq SELECT 2 as id, toString(number % 100) as v FROM system.numbers LIMIT 1000000; -INSERT INTO test.group_uniq SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; - -SELECT length(groupUniqArray(v)) FROM test.group_uniq GROUP BY id ORDER BY id; ---SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq') GROUP BY id ORDER BY id; - -DROP TABLE IF EXISTS test.group_uniq; \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference b/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.reference similarity index 50% rename from dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference rename to dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.reference index 20cf64b60a3..44d059430cd 100644 --- a/dbms/tests/queries/0_stateless/00373_group_uniq_array_simple.reference +++ b/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.reference @@ -8,3 +8,13 @@ 1000 1000 1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 diff --git a/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql b/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql new file mode 100644 index 00000000000..56573d6f816 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test.group_uniq_str; +CREATE TABLE test.group_uniq_str ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000; + +INSERT INTO test.group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM system.numbers LIMIT 1000000; +INSERT INTO test.group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; + +SELECT length(groupUniqArray(v)) FROM test.group_uniq_str GROUP BY id ORDER BY id +UNION ALL +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq_str') GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS test.group_uniq_str; diff --git a/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.reference b/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.reference new file mode 100644 index 00000000000..932499cb209 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.reference @@ -0,0 +1,20 @@ +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 diff --git a/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql b/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql new file mode 100644 index 00000000000..e406b4a3e7c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test.group_uniq_array_int; +CREATE TABLE test.group_uniq_arr_int ENGINE = Memory AS + SELECT g as id, if(c == 0, [v], if(c == 1, emptyArrayInt64(), [v, v])) as v FROM + (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); + +SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_int GROUP BY id ORDER BY id +UNION ALL +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq_arr_int') GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS test.group_uniq_arr_int; diff --git a/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.reference b/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.reference new file mode 100644 index 00000000000..932499cb209 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.reference @@ -0,0 +1,20 @@ +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 +20001 diff --git a/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql b/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql new file mode 100644 index 00000000000..f7a9453036c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test.group_uniq_arr_str; +CREATE TABLE test.group_uniq_arr_str ENGINE = Memory AS + SELECT hex(intHash32(g)) as id, if(c == 0, [hex(v)], if(c == 1, emptyArrayString(), [hex(v), hex(v)])) as v FROM + (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); + +SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_str GROUP BY id ORDER BY id +UNION ALL +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq_arr_str') GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS test.group_uniq_arr_str; From 861d5e9755fc25dfe56d01aa5c25c2c149a1e911 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 30 Sep 2016 15:39:18 +0300 Subject: [PATCH 12/15] Small refinements of benchmark's code. --- dbms/src/Client/Benchmark.cpp | 52 ++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/dbms/src/Client/Benchmark.cpp b/dbms/src/Client/Benchmark.cpp index eef964ef960..1ce58e9968d 100644 --- a/dbms/src/Client/Benchmark.cpp +++ b/dbms/src/Client/Benchmark.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -27,8 +26,10 @@ #include #include +#include #include #include +#include #include @@ -193,7 +194,7 @@ private: Stopwatch watch; /// В цикле, кладём все запросы в очередь. - for (size_t i = 0; !(max_iterations > 0) || i < max_iterations; ++i) + for (size_t i = 0; !max_iterations || i < max_iterations; ++i) { size_t query_index = randomize ? distribution(generator) : i % queries.size(); @@ -347,6 +348,7 @@ private: for (int percent = 0; percent <= 90; percent += 10) print_percentile(percent); + print_percentile(95); print_percentile(99); print_percentile(99.9); @@ -357,42 +359,48 @@ private: void reportJSON(Stats & info, const std::string & filename) { - std::ofstream jout(filename); - if (!jout.is_open()) - throw Exception("Can't write JSON data"); + WriteBufferFromFile json_out(filename); std::lock_guard lock(mutex); - double seconds = info.watch.elapsedSeconds(); - - jout << "{\n"; - - jout << "\"statistics\": {\n" - << "\"QPS\": " << (info.queries / seconds) << ",\n" - << "\"RPS\": " << (info.read_rows / seconds) << ",\n" - << "\"MiBPS\": " << (info.read_bytes / seconds / 1048576) << ",\n" - << "\"RPS_result\": " << (info.result_rows / seconds) << ",\n" - << "\"MiBPS_result\": " << (info.result_bytes / seconds / 1048576) << ",\n" - << "\"num_queries\": " << info.queries << "\n" - << "},\n"; + auto print_key_value = [&](auto key, auto value, bool with_comma = true) + { + json_out << double_quote << key << ": " << value << (with_comma ? ",\n" : "\n"); + }; auto print_percentile = [&](auto percent, bool with_comma = true) { - jout << "\"" << percent << "\":\t" << info.sampler.quantileInterpolated(percent / 100.0) << (with_comma ? ",\n" : "\n"); + json_out << "\"" << percent << "\"" << ": " << info.sampler.quantileInterpolated(percent / 100.0) << (with_comma ? ",\n" : "\n"); }; - jout << "\"query_time_percentiles\": {\n"; + json_out << "{\n"; + + json_out << double_quote << "statistics" << ": {\n"; + + double seconds = info.watch.elapsedSeconds(); + print_key_value("QPS", info.queries / seconds); + print_key_value("RPS", info.queries / seconds); + print_key_value("MiBPS", info.queries / seconds); + print_key_value("RPS_result", info.queries / seconds); + print_key_value("MiBPS_result", info.queries / seconds); + print_key_value("num_queries", info.queries / seconds, false); + + json_out << "},\n"; + + + json_out << double_quote << "query_time_percentiles" << ": {\n"; + for (int percent = 0; percent <= 90; percent += 10) print_percentile(percent); + print_percentile(95); print_percentile(99); print_percentile(99.9); print_percentile(99.99, false); - jout << "}\n"; - jout << "}\n"; + json_out << "}\n"; - jout.close(); + json_out << "}\n"; } }; From 6375e4999c3589145e4f17dc316cb90ac63c706a Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 30 Sep 2016 16:40:26 +0300 Subject: [PATCH 13/15] Small refinement of recently added tests. --- .../00374_json_each_row_input_with_noisy_fields.sh | 5 +++-- .../queries/0_stateless/00375_group_uniq_array_of_string.sql | 3 +-- .../0_stateless/00376_group_uniq_array_of_int_array.sql | 3 +-- .../0_stateless/00377_group_uniq_array_of_string_array.sql | 3 +-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh b/dbms/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh index 795957d1287..588fd501373 100755 --- a/dbms/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh +++ b/dbms/tests/queries/0_stateless/00374_json_each_row_input_with_noisy_fields.sh @@ -1,4 +1,5 @@ -clickhouse-client -n --query "DROP TABLE IF EXISTS test.json_noisy; CREATE TABLE test.json_noisy (d1 UInt8, d2 String) ENGINE = Memory" +#!/bin/bash +clickhouse-client -n --query "DROP TABLE IF EXISTS test.json_noisy; CREATE TABLE test.json_noisy (d1 UInt8, d2 String) ENGINE = Memory" echo '{"d1" : 1, "d2" : "ok"} { } @@ -7,4 +8,4 @@ echo '{"d1" : 1, "d2" : "ok"} {"d2":"ok","t1":[[[]],true, null, false, "1","2", 0.03, 1], "d1":"1", "t2":["1","2"]}' \ | clickhouse-client -n --query "SET input_format_skip_unknown_fields = 1; INSERT INTO test.json_noisy FORMAT JSONEachRow" -clickhouse-client -n --query "SELECT * FROM test.json_noisy; DROP TABLE IF EXISTS test.json_noisy;" \ No newline at end of file +clickhouse-client -n --query "SELECT * FROM test.json_noisy; DROP TABLE IF EXISTS test.json_noisy;" diff --git a/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql b/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql index 56573d6f816..b0547691c19 100644 --- a/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql +++ b/dbms/tests/queries/0_stateless/00375_group_uniq_array_of_string.sql @@ -4,8 +4,7 @@ CREATE TABLE test.group_uniq_str ENGINE = Memory AS SELECT number % 10 as id, to INSERT INTO test.group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM system.numbers LIMIT 1000000; INSERT INTO test.group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; -SELECT length(groupUniqArray(v)) FROM test.group_uniq_str GROUP BY id ORDER BY id -UNION ALL +SELECT length(groupUniqArray(v)) FROM test.group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq_str') GROUP BY id ORDER BY id; DROP TABLE IF EXISTS test.group_uniq_str; diff --git a/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql b/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql index e406b4a3e7c..6a652e75bae 100644 --- a/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql +++ b/dbms/tests/queries/0_stateless/00376_group_uniq_array_of_int_array.sql @@ -3,8 +3,7 @@ CREATE TABLE test.group_uniq_arr_int ENGINE = Memory AS SELECT g as id, if(c == 0, [v], if(c == 1, emptyArrayInt64(), [v, v])) as v FROM (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); -SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_int GROUP BY id ORDER BY id -UNION ALL +SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq_arr_int') GROUP BY id ORDER BY id; DROP TABLE IF EXISTS test.group_uniq_arr_int; diff --git a/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql b/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql index f7a9453036c..7ffe6d3a754 100644 --- a/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql +++ b/dbms/tests/queries/0_stateless/00377_group_uniq_array_of_string_array.sql @@ -3,8 +3,7 @@ CREATE TABLE test.group_uniq_arr_str ENGINE = Memory AS SELECT hex(intHash32(g)) as id, if(c == 0, [hex(v)], if(c == 1, emptyArrayString(), [hex(v), hex(v)])) as v FROM (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); -SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_str GROUP BY id ORDER BY id -UNION ALL +SELECT length(groupUniqArray(v)) FROM test.group_uniq_arr_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{1,2,3,4}', 'test', 'group_uniq_arr_str') GROUP BY id ORDER BY id; DROP TABLE IF EXISTS test.group_uniq_arr_str; From 4c242e8a4da8b47bdddd9191564da3cb5fa4ab3b Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 30 Sep 2016 19:02:16 +0300 Subject: [PATCH 14/15] Added output_format_json_quote_64bit_integers new config option. --- .../DataStreams/JSONCompactRowOutputStream.h | 2 +- .../DataStreams/JSONEachRowRowOutputStream.h | 3 +- .../DB/DataStreams/JSONRowOutputStream.h | 3 +- .../DB/DataTypes/DataTypeAggregateFunction.h | 2 +- dbms/include/DB/DataTypes/DataTypeArray.h | 2 +- dbms/include/DB/DataTypes/DataTypeDate.h | 2 +- dbms/include/DB/DataTypes/DataTypeDateTime.h | 2 +- dbms/include/DB/DataTypes/DataTypeEnum.h | 2 +- .../DB/DataTypes/DataTypeFixedString.h | 2 +- dbms/include/DB/DataTypes/DataTypeString.h | 2 +- dbms/include/DB/DataTypes/DataTypeTuple.h | 2 +- dbms/include/DB/DataTypes/IDataType.h | 5 +- dbms/include/DB/DataTypes/IDataTypeDummy.h | 2 +- dbms/include/DB/DataTypes/IDataTypeNumber.h | 26 +- dbms/include/DB/Interpreters/Settings.h | 5 +- dbms/src/DataStreams/FormatFactory.cpp | 7 +- .../JSONCompactRowOutputStream.cpp | 16 +- .../JSONEachRowRowOutputStream.cpp | 6 +- dbms/src/DataStreams/JSONRowOutputStream.cpp | 16 +- .../DataTypes/DataTypeAggregateFunction.cpp | 2 +- dbms/src/DataTypes/DataTypeArray.cpp | 4 +- dbms/src/DataTypes/DataTypeEnum.cpp | 2 +- dbms/src/DataTypes/DataTypeFixedString.cpp | 2 +- dbms/src/DataTypes/DataTypeString.cpp | 2 +- dbms/src/DataTypes/DataTypeTuple.cpp | 4 +- .../00378_json_quote_64bit_integers.reference | 264 ++++++++++++++++++ .../00378_json_quote_64bit_integers.sql | 12 + 27 files changed, 343 insertions(+), 56 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference create mode 100644 dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql diff --git a/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h b/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h index baf1a880fcd..30b10920c58 100644 --- a/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h +++ b/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h @@ -14,7 +14,7 @@ namespace DB class JSONCompactRowOutputStream : public JSONRowOutputStream { public: - JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_); + JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; diff --git a/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h b/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h index 6ca1b33563f..a4686c7e078 100644 --- a/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h +++ b/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h @@ -14,7 +14,7 @@ namespace DB class JSONEachRowRowOutputStream : public IRowOutputStream { public: - JSONEachRowRowOutputStream(WriteBuffer & ostr_, const Block & sample); + JSONEachRowRowOutputStream(WriteBuffer & ostr_, const Block & sample, bool force_quoting_ = true); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; @@ -30,6 +30,7 @@ private: WriteBuffer & ostr; size_t field_number = 0; Names fields; + bool force_quoting; }; } diff --git a/dbms/include/DB/DataStreams/JSONRowOutputStream.h b/dbms/include/DB/DataStreams/JSONRowOutputStream.h index e1da63a80ea..da4cd50a458 100644 --- a/dbms/include/DB/DataStreams/JSONRowOutputStream.h +++ b/dbms/include/DB/DataStreams/JSONRowOutputStream.h @@ -16,7 +16,7 @@ class JSONRowOutputStream : public IRowOutputStream { public: JSONRowOutputStream(WriteBuffer & ostr_, const Block & sample_, - bool write_statistics_); + bool write_statistics_, bool force_quoting_ = true); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; @@ -68,6 +68,7 @@ protected: Progress progress; Stopwatch watch; bool write_statistics; + bool force_quoting; }; } diff --git a/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h b/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h index e811a919cce..b081f964226 100644 --- a/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/include/DB/DataTypes/DataTypeAggregateFunction.h @@ -53,7 +53,7 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; diff --git a/dbms/include/DB/DataTypes/DataTypeArray.h b/dbms/include/DB/DataTypes/DataTypeArray.h index d6553213c3c..2d70c42a6a2 100644 --- a/dbms/include/DB/DataTypes/DataTypeArray.h +++ b/dbms/include/DB/DataTypes/DataTypeArray.h @@ -46,7 +46,7 @@ public: void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; diff --git a/dbms/include/DB/DataTypes/DataTypeDate.h b/dbms/include/DB/DataTypes/DataTypeDate.h index f9d21e0442f..1509f00b987 100644 --- a/dbms/include/DB/DataTypes/DataTypeDate.h +++ b/dbms/include/DB/DataTypes/DataTypeDate.h @@ -58,7 +58,7 @@ public: static_cast(column).getData().push_back(x); /// Важно делать это в конце - для exception safety. } - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override { writeChar('"', ostr); serializeText(column, row_num, ostr); diff --git a/dbms/include/DB/DataTypes/DataTypeDateTime.h b/dbms/include/DB/DataTypes/DataTypeDateTime.h index a944b67aa1a..b4f9071da37 100644 --- a/dbms/include/DB/DataTypes/DataTypeDateTime.h +++ b/dbms/include/DB/DataTypes/DataTypeDateTime.h @@ -58,7 +58,7 @@ public: static_cast(column).getData().push_back(x); /// Важно делать это в конце - для exception safety. } - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override { writeChar('"', ostr); serializeText(column, row_num, ostr); diff --git a/dbms/include/DB/DataTypes/DataTypeEnum.h b/dbms/include/DB/DataTypes/DataTypeEnum.h index 2b9071c1533..69b3db87cb5 100644 --- a/dbms/include/DB/DataTypes/DataTypeEnum.h +++ b/dbms/include/DB/DataTypes/DataTypeEnum.h @@ -81,7 +81,7 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; diff --git a/dbms/include/DB/DataTypes/DataTypeFixedString.h b/dbms/include/DB/DataTypes/DataTypeFixedString.h index 76c2c7918fb..3fd85e5ae06 100644 --- a/dbms/include/DB/DataTypes/DataTypeFixedString.h +++ b/dbms/include/DB/DataTypes/DataTypeFixedString.h @@ -52,7 +52,7 @@ public: void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; diff --git a/dbms/include/DB/DataTypes/DataTypeString.h b/dbms/include/DB/DataTypes/DataTypeString.h index 8271fad42b3..f230594c2a5 100644 --- a/dbms/include/DB/DataTypes/DataTypeString.h +++ b/dbms/include/DB/DataTypes/DataTypeString.h @@ -39,7 +39,7 @@ public: void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; diff --git a/dbms/include/DB/DataTypes/DataTypeTuple.h b/dbms/include/DB/DataTypes/DataTypeTuple.h index 59cd8f1f1c2..d26f684490f 100644 --- a/dbms/include/DB/DataTypes/DataTypeTuple.h +++ b/dbms/include/DB/DataTypes/DataTypeTuple.h @@ -31,7 +31,7 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; diff --git a/dbms/include/DB/DataTypes/IDataType.h b/dbms/include/DB/DataTypes/IDataType.h index 88a01121c98..7ba9055b488 100644 --- a/dbms/include/DB/DataTypes/IDataType.h +++ b/dbms/include/DB/DataTypes/IDataType.h @@ -87,9 +87,10 @@ public: */ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0; - /** Текстовая сериализация в виде литерала для использования в формате JSON. + /** Text serialization intended for using in JSON format. + * If values can be serizlized without quotes, force_quoting parameter forces to brace them into quotes (make sense for Int64 types). */ - virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0; + virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const = 0; /** Текстовая сериализация для подстановки в формат XML. diff --git a/dbms/include/DB/DataTypes/IDataTypeDummy.h b/dbms/include/DB/DataTypes/IDataTypeDummy.h index 33daf8af761..1cbae93974e 100644 --- a/dbms/include/DB/DataTypes/IDataTypeDummy.h +++ b/dbms/include/DB/DataTypes/IDataTypeDummy.h @@ -38,7 +38,7 @@ public: void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override { throwNoSerialization(); } void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override { throwNoSerialization(); } - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override { throwNoSerialization(); } + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override { throwNoSerialization(); } void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override { throwNoSerialization(); } void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override { throwNoSerialization(); } diff --git a/dbms/include/DB/DataTypes/IDataTypeNumber.h b/dbms/include/DB/DataTypes/IDataTypeNumber.h index c65fdf7acae..234d294eeab 100644 --- a/dbms/include/DB/DataTypes/IDataTypeNumber.h +++ b/dbms/include/DB/DataTypes/IDataTypeNumber.h @@ -56,7 +56,7 @@ public: deserializeText(column, istr); } - inline void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + inline void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override { @@ -121,7 +121,7 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr) const override {} void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override {} void deserializeTextQuoted(IColumn & column, ReadBuffer & istr) const override {} - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override {} + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const override {} void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const override {} void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override {} void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const char delimiter) const override {} @@ -129,26 +129,30 @@ public: Field getDefault() const override { return {}; } }; -template inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +template inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { serializeText(column, row_num, ostr); } -template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const { - writeChar('"', ostr); + if (force_quoting) + writeChar('"', ostr); serializeText(column, row_num, ostr); - writeChar('"', ostr); + if (force_quoting) + writeChar('"', ostr); } -template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const { - writeChar('"', ostr); + if (force_quoting) + writeChar('"', ostr); serializeText(column, row_num, ostr); - writeChar('"', ostr); + if (force_quoting) + writeChar('"', ostr); } -template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { auto x = static_cast(column).getData()[row_num]; if (likely(std::isfinite(x))) @@ -157,7 +161,7 @@ template <> inline void IDataTypeNumber::serializeTextJSON(const IColum writeCString("null", ostr); } -template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { auto x = static_cast(column).getData()[row_num]; if (likely(std::isfinite(x))) diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index 6124be139a3..9e1577d9a74 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -225,7 +225,10 @@ struct Settings M(SettingBool, add_http_cors_header, false) \ \ /** Skip columns with unknown names from input data (it works for JSONEachRow and TSKV formats). */ \ - M(SettingBool, input_format_skip_unknown_fields, false) + M(SettingBool, input_format_skip_unknown_fields, false) \ + \ + /** Controls quoting of 64-bit integers in JSON output format. */ \ + M(SettingBool, output_format_json_quote_64bit_integers, true) /// Всевозможные ограничения на выполнение запроса. Limits limits; diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index 671efc310a7..71729c27bb7 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -135,12 +135,13 @@ static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf return std::make_shared(std::make_shared(buf)); else if (name == "JSON") return std::make_shared(std::make_shared(buf, sample, - context.getSettingsRef().output_format_write_statistics)); + context.getSettingsRef().output_format_write_statistics, context.getSettingsRef().output_format_json_quote_64bit_integers)); else if (name == "JSONCompact") return std::make_shared(std::make_shared(buf, sample, - context.getSettingsRef().output_format_write_statistics)); + context.getSettingsRef().output_format_write_statistics, context.getSettingsRef().output_format_json_quote_64bit_integers)); else if (name == "JSONEachRow") - return std::make_shared(std::make_shared(buf, sample)); + return std::make_shared(std::make_shared(buf, sample, + context.getSettingsRef().output_format_json_quote_64bit_integers)); else if (name == "XML") return std::make_shared(std::make_shared(buf, sample, context.getSettingsRef().output_format_write_statistics)); diff --git a/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp b/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp index 6b3ea17be0e..81b1c22180a 100644 --- a/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp +++ b/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp @@ -6,15 +6,15 @@ namespace DB { -JSONCompactRowOutputStream::JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_) - : JSONRowOutputStream(ostr_, sample_, write_statistics_) +JSONCompactRowOutputStream::JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_) + : JSONRowOutputStream(ostr_, sample_, write_statistics_, force_quoting_) { } void JSONCompactRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { - type.serializeTextJSON(column, row_num, *ostr); + type.serializeTextJSON(column, row_num, *ostr, force_quoting); ++field_number; } @@ -56,7 +56,7 @@ void JSONCompactRowOutputStream::writeTotals() writeChar(',', *ostr); const ColumnWithTypeAndName & column = totals.getByPosition(i); - column.type->serializeTextJSON(*column.column.get(), 0, *ostr); + column.type->serializeTextJSON(*column.column.get(), 0, *ostr, force_quoting); } writeChar(']', *ostr); @@ -64,7 +64,7 @@ void JSONCompactRowOutputStream::writeTotals() } -static void writeExtremesElement(const char * title, const Block & extremes, size_t row_num, WriteBuffer & ostr) +static void writeExtremesElement(const char * title, const Block & extremes, size_t row_num, WriteBuffer & ostr, bool force_quoting) { writeCString("\t\t\"", ostr); writeCString(title, ostr); @@ -77,7 +77,7 @@ static void writeExtremesElement(const char * title, const Block & extremes, siz writeChar(',', ostr); const ColumnWithTypeAndName & column = extremes.getByPosition(i); - column.type->serializeTextJSON(*column.column.get(), row_num, ostr); + column.type->serializeTextJSON(*column.column.get(), row_num, ostr, force_quoting); } writeChar(']', ostr); @@ -92,9 +92,9 @@ void JSONCompactRowOutputStream::writeExtremes() writeCString("\t\"extremes\":\n", *ostr); writeCString("\t{\n", *ostr); - writeExtremesElement("min", extremes, 0, *ostr); + writeExtremesElement("min", extremes, 0, *ostr, force_quoting); writeCString(",\n", *ostr); - writeExtremesElement("max", extremes, 1, *ostr); + writeExtremesElement("max", extremes, 1, *ostr, force_quoting); writeChar('\n', *ostr); writeCString("\t}", *ostr); diff --git a/dbms/src/DataStreams/JSONEachRowRowOutputStream.cpp b/dbms/src/DataStreams/JSONEachRowRowOutputStream.cpp index 1b1f00b07e9..abf8519a9b1 100644 --- a/dbms/src/DataStreams/JSONEachRowRowOutputStream.cpp +++ b/dbms/src/DataStreams/JSONEachRowRowOutputStream.cpp @@ -7,8 +7,8 @@ namespace DB { -JSONEachRowRowOutputStream::JSONEachRowRowOutputStream(WriteBuffer & ostr_, const Block & sample) - : ostr(ostr_) +JSONEachRowRowOutputStream::JSONEachRowRowOutputStream(WriteBuffer & ostr_, const Block & sample, bool force_quoting_) + : ostr(ostr_), force_quoting(force_quoting_) { size_t columns = sample.columns(); fields.resize(columns); @@ -25,7 +25,7 @@ void JSONEachRowRowOutputStream::writeField(const IColumn & column, const IDataT { writeString(fields[field_number], ostr); writeChar(':', ostr); - type.serializeTextJSON(column, row_num, ostr); + type.serializeTextJSON(column, row_num, ostr, force_quoting); ++field_number; } diff --git a/dbms/src/DataStreams/JSONRowOutputStream.cpp b/dbms/src/DataStreams/JSONRowOutputStream.cpp index 04014cc335e..c804bd03160 100644 --- a/dbms/src/DataStreams/JSONRowOutputStream.cpp +++ b/dbms/src/DataStreams/JSONRowOutputStream.cpp @@ -7,8 +7,8 @@ namespace DB { -JSONRowOutputStream::JSONRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_) - : dst_ostr(ostr_), write_statistics(write_statistics_) +JSONRowOutputStream::JSONRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_) + : dst_ostr(ostr_), write_statistics(write_statistics_), force_quoting(force_quoting_) { NamesAndTypesList columns(sample_.getColumnsList()); fields.assign(columns.begin(), columns.end()); @@ -72,7 +72,7 @@ void JSONRowOutputStream::writeField(const IColumn & column, const IDataType & t writeCString("\t\t\t", *ostr); writeString(fields[field_number].name, *ostr); writeCString(": ", *ostr); - type.serializeTextJSON(column, row_num, *ostr); + type.serializeTextJSON(column, row_num, *ostr, force_quoting); ++field_number; } @@ -152,7 +152,7 @@ void JSONRowOutputStream::writeTotals() writeCString("\t\t", *ostr); writeJSONString(column.name, *ostr); writeCString(": ", *ostr); - column.type->serializeTextJSON(*column.column.get(), 0, *ostr); + column.type->serializeTextJSON(*column.column.get(), 0, *ostr, force_quoting); } writeChar('\n', *ostr); @@ -161,7 +161,7 @@ void JSONRowOutputStream::writeTotals() } -static void writeExtremesElement(const char * title, const Block & extremes, size_t row_num, WriteBuffer & ostr) +static void writeExtremesElement(const char * title, const Block & extremes, size_t row_num, WriteBuffer & ostr, bool force_quoting) { writeCString("\t\t\"", ostr); writeCString(title, ostr); @@ -179,7 +179,7 @@ static void writeExtremesElement(const char * title, const Block & extremes, siz writeCString("\t\t\t", ostr); writeJSONString(column.name, ostr); writeCString(": ", ostr); - column.type->serializeTextJSON(*column.column.get(), row_num, ostr); + column.type->serializeTextJSON(*column.column.get(), row_num, ostr, force_quoting); } writeChar('\n', ostr); @@ -195,9 +195,9 @@ void JSONRowOutputStream::writeExtremes() writeCString("\t\"extremes\":\n", *ostr); writeCString("\t{\n", *ostr); - writeExtremesElement("min", extremes, 0, *ostr); + writeExtremesElement("min", extremes, 0, *ostr, force_quoting); writeCString(",\n", *ostr); - writeExtremesElement("max", extremes, 1, *ostr); + writeExtremesElement("max", extremes, 1, *ostr, force_quoting); writeChar('\n', *ostr); writeCString("\t}", *ostr); diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 1bb410ade44..48214fb33db 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -195,7 +195,7 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff } -void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { writeJSONString(serializeToString(function, column, row_num), ostr); } diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index 738719ab7b1..e458317bdd7 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -288,7 +288,7 @@ void DataTypeArray::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) c } -void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const { const ColumnArray & column_array = static_cast(column); const ColumnArray::Offsets_t & offsets = column_array.getOffsets(); @@ -303,7 +303,7 @@ void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, Wr { if (i != offset) writeChar(',', ostr); - nested->serializeTextJSON(nested_column, i, ostr); + nested->serializeTextJSON(nested_column, i, ostr, force_quoting); } writeChar(']', ostr); } diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index 208e9a470fc..c298cd1feb0 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -170,7 +170,7 @@ void DataTypeEnum::deserializeTextQuoted(IColumn & column, ReadBuffer & is } template -void DataTypeEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void DataTypeEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { writeJSONString(getNameForValue(static_cast(column).getData()[row_num]), ostr); } diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index 1ec5e0379d9..f0b67d36aae 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -158,7 +158,7 @@ void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & i } -void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { const char * pos = reinterpret_cast(&static_cast(column).getChars()[n * row_num]); writeJSONString(pos, pos + n, ostr); diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 4f0bad798ea..4a80971c288 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -266,7 +266,7 @@ void DataTypeString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) } -void DataTypeString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void DataTypeString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool) const { writeJSONString(static_cast(column).getDataAt(row_num), ostr); } diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 5f2a4ae64c0..99b83a4e7d1 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -144,14 +144,14 @@ void DataTypeTuple::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) c deserializeText(column, istr); } -void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const { writeChar('[', ostr); for (const auto i : ext::range(0, ext::size(elems))) { if (i != 0) writeChar(',', ostr); - elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr); + elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr, force_quoting); } writeChar(']', ostr); } diff --git a/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference b/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference new file mode 100644 index 00000000000..8240d0b21f6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.reference @@ -0,0 +1,264 @@ +{ + "meta": + [ + { + "name": "i0", + "type": "Int64" + }, + { + "name": "u0", + "type": "UInt64" + }, + { + "name": "ip", + "type": "Int64" + }, + { + "name": "in", + "type": "Int64" + }, + { + "name": "up", + "type": "UInt64" + }, + { + "name": "arr", + "type": "Array(Int64)" + }, + { + "name": "tuple", + "type": "Tuple(UInt64, UInt64)" + } + ], + + "data": + [ + { + "i0": "0", + "u0": "0", + "ip": "9223372036854775807", + "in": "-9223372036854775808", + "up": "18446744073709551615", + "arr": ["0"], + "tuple": ["0","0"] + } + ], + + "totals": + { + "i0": "0", + "u0": "0", + "ip": "0", + "in": "0", + "up": "0", + "arr": [], + "tuple": ["0","0"] + }, + + "extremes": + { + "min": + { + "i0": "0", + "u0": "0", + "ip": "9223372036854775807", + "in": "-9223372036854775808", + "up": "18446744073709551615", + "arr": [], + "tuple": ["0","0"] + }, + "max": + { + "i0": "0", + "u0": "0", + "ip": "9223372036854775807", + "in": "-9223372036854775808", + "up": "18446744073709551615", + "arr": [], + "tuple": ["0","0"] + } + }, + + "rows": 1 +} +{ + "meta": + [ + { + "name": "i0", + "type": "Int64" + }, + { + "name": "u0", + "type": "UInt64" + }, + { + "name": "ip", + "type": "Int64" + }, + { + "name": "in", + "type": "Int64" + }, + { + "name": "up", + "type": "UInt64" + }, + { + "name": "arr", + "type": "Array(Int64)" + }, + { + "name": "tuple", + "type": "Tuple(UInt64, UInt64)" + } + ], + + "data": + [ + ["0", "0", "9223372036854775807", "-9223372036854775808", "18446744073709551615", ["0"], ["0","0"]] + ], + + "totals": ["0","0","0","0","0",[],["0","0"]], + + "extremes": + { + "min": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",[],["0","0"]], + "max": ["0","0","9223372036854775807","-9223372036854775808","18446744073709551615",[],["0","0"]] + }, + + "rows": 1 +} +{"i0":"0","u0":"0","ip":"9223372036854775807","in":"-9223372036854775808","up":"18446744073709551615","arr":["0"],"tuple":["0","0"]} +{ + "meta": + [ + { + "name": "i0", + "type": "Int64" + }, + { + "name": "u0", + "type": "UInt64" + }, + { + "name": "ip", + "type": "Int64" + }, + { + "name": "in", + "type": "Int64" + }, + { + "name": "up", + "type": "UInt64" + }, + { + "name": "arr", + "type": "Array(Int64)" + }, + { + "name": "tuple", + "type": "Tuple(UInt64, UInt64)" + } + ], + + "data": + [ + { + "i0": 0, + "u0": 0, + "ip": 9223372036854775807, + "in": -9223372036854775808, + "up": 18446744073709551615, + "arr": [0], + "tuple": [0,0] + } + ], + + "totals": + { + "i0": 0, + "u0": 0, + "ip": 0, + "in": 0, + "up": 0, + "arr": [], + "tuple": [0,0] + }, + + "extremes": + { + "min": + { + "i0": 0, + "u0": 0, + "ip": 9223372036854775807, + "in": -9223372036854775808, + "up": 18446744073709551615, + "arr": [], + "tuple": [0,0] + }, + "max": + { + "i0": 0, + "u0": 0, + "ip": 9223372036854775807, + "in": -9223372036854775808, + "up": 18446744073709551615, + "arr": [], + "tuple": [0,0] + } + }, + + "rows": 1 +} +{ + "meta": + [ + { + "name": "i0", + "type": "Int64" + }, + { + "name": "u0", + "type": "UInt64" + }, + { + "name": "ip", + "type": "Int64" + }, + { + "name": "in", + "type": "Int64" + }, + { + "name": "up", + "type": "UInt64" + }, + { + "name": "arr", + "type": "Array(Int64)" + }, + { + "name": "tuple", + "type": "Tuple(UInt64, UInt64)" + } + ], + + "data": + [ + [0, 0, 9223372036854775807, -9223372036854775808, 18446744073709551615, [0], [0,0]] + ], + + "totals": [0,0,0,0,0,[],[0,0]], + + "extremes": + { + "min": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[],[0,0]], + "max": [0,0,9223372036854775807,-9223372036854775808,18446744073709551615,[],[0,0]] + }, + + "rows": 1 +} +{"i0":0,"u0":0,"ip":9223372036854775807,"in":-9223372036854775808,"up":18446744073709551615,"arr":[0],"tuple":[0,0]} diff --git a/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql b/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql new file mode 100644 index 00000000000..261a044c711 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00378_json_quote_64bit_integers.sql @@ -0,0 +1,12 @@ +SET output_format_write_statistics = 0; +SET extremes = 1; + +SET output_format_json_quote_64bit_integers = 1; +SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple WITH TOTALS FORMAT JSON; +SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple WITH TOTALS FORMAT JSONCompact; +SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple WITH TOTALS FORMAT JSONEachRow; + +SET output_format_json_quote_64bit_integers = 0; +SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple WITH TOTALS FORMAT JSON; +SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple WITH TOTALS FORMAT JSONCompact; +SELECT toInt64(0) as i0, toUInt64(0) as u0, toInt64(9223372036854775807) as ip, toInt64(-9223372036854775808) as in, toUInt64(18446744073709551615) as up, [toInt64(0)] as arr, (toUInt64(0), toUInt64(0)) as tuple WITH TOTALS FORMAT JSONEachRow; \ No newline at end of file From c3617aaa32fd8b5522f06c298d5a45045e09cc2b Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Fri, 30 Sep 2016 19:21:09 +0300 Subject: [PATCH 15/15] Added docs for output_format_json_quote_64bit_integers parameter. [#METR-22950] --- .../DB/DataStreams/JSONCompactRowOutputStream.h | 2 +- .../DB/DataStreams/JSONEachRowRowOutputStream.h | 4 ++-- .../include/DB/DataStreams/JSONRowOutputStream.h | 4 ++-- dbms/include/DB/DataTypes/IDataType.h | 4 ++-- dbms/include/DB/DataTypes/IDataTypeNumber.h | 12 ++++++------ .../DataStreams/JSONCompactRowOutputStream.cpp | 16 ++++++++-------- dbms/src/DataTypes/DataTypeArray.cpp | 4 ++-- dbms/src/DataTypes/DataTypeTuple.cpp | 4 ++-- doc/reference_en.html | 9 +++++++-- doc/reference_ru.html | 6 +++++- 10 files changed, 37 insertions(+), 28 deletions(-) diff --git a/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h b/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h index 30b10920c58..1f1cf9a632e 100644 --- a/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h +++ b/dbms/include/DB/DataStreams/JSONCompactRowOutputStream.h @@ -14,7 +14,7 @@ namespace DB class JSONCompactRowOutputStream : public JSONRowOutputStream { public: - JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_); + JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_64bit_integers_); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; diff --git a/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h b/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h index a4686c7e078..ba8ccd60910 100644 --- a/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h +++ b/dbms/include/DB/DataStreams/JSONEachRowRowOutputStream.h @@ -14,7 +14,7 @@ namespace DB class JSONEachRowRowOutputStream : public IRowOutputStream { public: - JSONEachRowRowOutputStream(WriteBuffer & ostr_, const Block & sample, bool force_quoting_ = true); + JSONEachRowRowOutputStream(WriteBuffer & ostr_, const Block & sample, bool force_quoting_64bit_integers_); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; @@ -30,7 +30,7 @@ private: WriteBuffer & ostr; size_t field_number = 0; Names fields; - bool force_quoting; + bool force_quoting_64bit_integers; }; } diff --git a/dbms/include/DB/DataStreams/JSONRowOutputStream.h b/dbms/include/DB/DataStreams/JSONRowOutputStream.h index da4cd50a458..811af014752 100644 --- a/dbms/include/DB/DataStreams/JSONRowOutputStream.h +++ b/dbms/include/DB/DataStreams/JSONRowOutputStream.h @@ -16,7 +16,7 @@ class JSONRowOutputStream : public IRowOutputStream { public: JSONRowOutputStream(WriteBuffer & ostr_, const Block & sample_, - bool write_statistics_, bool force_quoting_ = true); + bool write_statistics_, bool force_quoting_64bit_integers_); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; void writeFieldDelimiter() override; @@ -68,7 +68,7 @@ protected: Progress progress; Stopwatch watch; bool write_statistics; - bool force_quoting; + bool force_quoting_64bit_integers; }; } diff --git a/dbms/include/DB/DataTypes/IDataType.h b/dbms/include/DB/DataTypes/IDataType.h index 7ba9055b488..62b0f3c9dfd 100644 --- a/dbms/include/DB/DataTypes/IDataType.h +++ b/dbms/include/DB/DataTypes/IDataType.h @@ -88,9 +88,9 @@ public: virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0; /** Text serialization intended for using in JSON format. - * If values can be serizlized without quotes, force_quoting parameter forces to brace them into quotes (make sense for Int64 types). + * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes. */ - virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const = 0; + virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting_64bit_integers) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr) const = 0; /** Текстовая сериализация для подстановки в формат XML. diff --git a/dbms/include/DB/DataTypes/IDataTypeNumber.h b/dbms/include/DB/DataTypes/IDataTypeNumber.h index 234d294eeab..7d592b2ab86 100644 --- a/dbms/include/DB/DataTypes/IDataTypeNumber.h +++ b/dbms/include/DB/DataTypes/IDataTypeNumber.h @@ -134,21 +134,21 @@ template inline void IDataTypeNumber::serializeTextJSON( serializeText(column, row_num, ostr); } -template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const +template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting_64bit_integers) const { - if (force_quoting) + if (force_quoting_64bit_integers) writeChar('"', ostr); serializeText(column, row_num, ostr); - if (force_quoting) + if (force_quoting_64bit_integers) writeChar('"', ostr); } -template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const +template <> inline void IDataTypeNumber::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting_64bit_integers) const { - if (force_quoting) + if (force_quoting_64bit_integers) writeChar('"', ostr); serializeText(column, row_num, ostr); - if (force_quoting) + if (force_quoting_64bit_integers) writeChar('"', ostr); } diff --git a/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp b/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp index 81b1c22180a..f9ff633ad23 100644 --- a/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp +++ b/dbms/src/DataStreams/JSONCompactRowOutputStream.cpp @@ -6,15 +6,15 @@ namespace DB { -JSONCompactRowOutputStream::JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_) - : JSONRowOutputStream(ostr_, sample_, write_statistics_, force_quoting_) +JSONCompactRowOutputStream::JSONCompactRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool write_statistics_, bool force_quoting_64bit_integers_) + : JSONRowOutputStream(ostr_, sample_, write_statistics_, force_quoting_64bit_integers_) { } void JSONCompactRowOutputStream::writeField(const IColumn & column, const IDataType & type, size_t row_num) { - type.serializeTextJSON(column, row_num, *ostr, force_quoting); + type.serializeTextJSON(column, row_num, *ostr, force_quoting_64bit_integers); ++field_number; } @@ -56,7 +56,7 @@ void JSONCompactRowOutputStream::writeTotals() writeChar(',', *ostr); const ColumnWithTypeAndName & column = totals.getByPosition(i); - column.type->serializeTextJSON(*column.column.get(), 0, *ostr, force_quoting); + column.type->serializeTextJSON(*column.column.get(), 0, *ostr, force_quoting_64bit_integers); } writeChar(']', *ostr); @@ -64,7 +64,7 @@ void JSONCompactRowOutputStream::writeTotals() } -static void writeExtremesElement(const char * title, const Block & extremes, size_t row_num, WriteBuffer & ostr, bool force_quoting) +static void writeExtremesElement(const char * title, const Block & extremes, size_t row_num, WriteBuffer & ostr, bool force_quoting_64bit_integers) { writeCString("\t\t\"", ostr); writeCString(title, ostr); @@ -77,7 +77,7 @@ static void writeExtremesElement(const char * title, const Block & extremes, siz writeChar(',', ostr); const ColumnWithTypeAndName & column = extremes.getByPosition(i); - column.type->serializeTextJSON(*column.column.get(), row_num, ostr, force_quoting); + column.type->serializeTextJSON(*column.column.get(), row_num, ostr, force_quoting_64bit_integers); } writeChar(']', ostr); @@ -92,9 +92,9 @@ void JSONCompactRowOutputStream::writeExtremes() writeCString("\t\"extremes\":\n", *ostr); writeCString("\t{\n", *ostr); - writeExtremesElement("min", extremes, 0, *ostr, force_quoting); + writeExtremesElement("min", extremes, 0, *ostr, force_quoting_64bit_integers); writeCString(",\n", *ostr); - writeExtremesElement("max", extremes, 1, *ostr, force_quoting); + writeExtremesElement("max", extremes, 1, *ostr, force_quoting_64bit_integers); writeChar('\n', *ostr); writeCString("\t}", *ostr); diff --git a/dbms/src/DataTypes/DataTypeArray.cpp b/dbms/src/DataTypes/DataTypeArray.cpp index e458317bdd7..9e00702c4cb 100644 --- a/dbms/src/DataTypes/DataTypeArray.cpp +++ b/dbms/src/DataTypes/DataTypeArray.cpp @@ -288,7 +288,7 @@ void DataTypeArray::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) c } -void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const +void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting_64bit_integers) const { const ColumnArray & column_array = static_cast(column); const ColumnArray::Offsets_t & offsets = column_array.getOffsets(); @@ -303,7 +303,7 @@ void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, Wr { if (i != offset) writeChar(',', ostr); - nested->serializeTextJSON(nested_column, i, ostr, force_quoting); + nested->serializeTextJSON(nested_column, i, ostr, force_quoting_64bit_integers); } writeChar(']', ostr); } diff --git a/dbms/src/DataTypes/DataTypeTuple.cpp b/dbms/src/DataTypes/DataTypeTuple.cpp index 99b83a4e7d1..5f5ccf51131 100644 --- a/dbms/src/DataTypes/DataTypeTuple.cpp +++ b/dbms/src/DataTypes/DataTypeTuple.cpp @@ -144,14 +144,14 @@ void DataTypeTuple::deserializeTextQuoted(IColumn & column, ReadBuffer & istr) c deserializeText(column, istr); } -void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting) const +void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, bool force_quoting_64bit_integers) const { writeChar('[', ostr); for (const auto i : ext::range(0, ext::size(elems))) { if (i != 0) writeChar(',', ostr); - elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr, force_quoting); + elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr, force_quoting_64bit_integers); } writeChar(']', ostr); } diff --git a/doc/reference_en.html b/doc/reference_en.html index 0d1602e4d74..ea7e7a0e5f9 100644 --- a/doc/reference_en.html +++ b/doc/reference_en.html @@ -6616,12 +6616,17 @@ Allows setting a default sampling coefficient for all SELECT queries. (For tables that don't support sampling, an exception will be thrown.) If set to 1, default sampling is not performed. -====input_format_skip_unknown_fields== +==input_format_skip_unknown_fields== If the parameter is true, INSERT operation will skip columns with unknown names from input. -Otherwise, an exception will be generated. +Otherwise, an exception will be generated, it is default behavior. The parameter works only for JSONEachRow and TSKV input formats. +==output_format_json_quote_64bit_integers== + +If the parameter is true (default value), UInt64 and Int64 numbers are printed as quoted strings in all JSON output formats. +Such behavior is compatible with most JavaScript interpreters that stores all numbers as double-precision floating point numbers. +Otherwise, they are printed as regular numbers. ==Restrictions on query complexity== diff --git a/doc/reference_ru.html b/doc/reference_ru.html index a7c7325d60b..b3d15e3ff9c 100644 --- a/doc/reference_ru.html +++ b/doc/reference_ru.html @@ -3881,7 +3881,7 @@ Extremes: } %% -JSON совместим с JavaScript. Для этого, дополнительно эскейпятся некоторые символы: символ прямого слеша %%/%% экранируется в виде %%\/%%; альтернативные переводы строк %%U+2028%%, %%U+2029%%, на которых ломаются некоторые браузеры, экранируются в виде \uXXXX-последовательностей. Эскейпятся ASCII control characters: backspace, form feed, line feed, carriage return, horizontal tab в виде %%\b%%, %%\f%%, %%\n%%, %%\r%%, %%\t%% соответственно, а также остальные байты из диапазона 00-1F с помощью \uXXXX-последовательностей. Невалидные UTF-8 последовательности заменяются на replacement character %%�%% и, таким образом, выводимый текст будет состоять из валидных UTF-8 последовательностей. Числа типа UInt64 и Int64, для совместимости с JavaScript, выводятся в двойных кавычках. +JSON совместим с JavaScript. Для этого, дополнительно эскейпятся некоторые символы: символ прямого слеша %%/%% экранируется в виде %%\/%%; альтернативные переводы строк %%U+2028%%, %%U+2029%%, на которых ломаются некоторые браузеры, экранируются в виде \uXXXX-последовательностей. Эскейпятся ASCII control characters: backspace, form feed, line feed, carriage return, horizontal tab в виде %%\b%%, %%\f%%, %%\n%%, %%\r%%, %%\t%% соответственно, а также остальные байты из диапазона 00-1F с помощью \uXXXX-последовательностей. Невалидные UTF-8 последовательности заменяются на replacement character %%�%% и, таким образом, выводимый текст будет состоять из валидных UTF-8 последовательностей. Числа типа UInt64 и Int64, для совместимости с JavaScript, по-умолчанию выводятся в двойных кавычках, чтобы они выводились без кавычек можно установить конфигурационный параметр output_format_json_quote_64bit_integers равным 0. %%rows%% - общее количество выведенных строчек. %%rows_before_limit_at_least%% - не менее скольких строчек получилось бы, если бы не было LIMIT-а. Выводится только если запрос содержит LIMIT. @@ -6775,6 +6775,10 @@ regions_names_*.txt: TabSeparated (без заголовка), столбцы: Если значение истино, то при выполнении INSERT из входных данных пропускаются (не рассматриваются) колонки с неизвестными именами, иначе в данной ситуации будет сгенерировано исключение. Работает для форматов JSONEachRow и TSKV. +==output_format_json_quote_64bit_integers== + +Если значение истино, то при использовании JSON* форматов UInt64 и Int64 числа выводятся в кавычках (из соображений совместимости с большинством реализаций JavaScript), иначе - без кавычек. + ==Ограничения на сложность запроса==