diff --git a/dbms/include/DB/Functions/FunctionsArray.h b/dbms/include/DB/Functions/FunctionsArray.h index 49917ba103d..5867fdcd141 100644 --- a/dbms/include/DB/Functions/FunctionsArray.h +++ b/dbms/include/DB/Functions/FunctionsArray.h @@ -74,883 +74,76 @@ class FunctionArray : public IFunction { public: static constexpr auto name = "array"; - static FunctionPtr create(const Context & context) { return std::make_shared(context); } + static FunctionPtr create(const Context & context); - FunctionArray(const Context & context) : context(context) {} + FunctionArray(const Context & context); + + void setCaseMode(); + + /// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + + /// Выполнить функцию над блоком. + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + +private: + /// Получить имя функции. + String getName() const override; + + bool addField(DataTypePtr type_res, const Field & f, Array & arr) const; + static const DataTypePtr & getScalarType(const DataTypePtr & type); + DataTypeTraits::EnrichedDataTypePtr getLeastCommonType(const DataTypes & arguments) const; private: const Context & context; - - /// Получить имя функции. - String getName() const override - { - return is_case_mode ? "CASE" : name; - } - - template - bool tryAddField(DataTypePtr type_res, const Field & f, Array & arr) const - { - if (typeid_cast(type_res.get())) - { - arr.push_back(apply_visitor(FieldVisitorConvertToNumber(), f)); - return true; - } - return false; - } - - bool addField(DataTypePtr type_res, const Field & f, Array & arr) const - { - /// Иначе необходимо - if ( tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) - || tryAddField(type_res, f, arr) ) - return true; - else - { - if (is_case_mode) - throw Exception{"Illegal type encountered while processing the CASE construction.", - ErrorCodes::LOGICAL_ERROR}; - else - throw Exception{"Illegal result type " + type_res->getName() + " of function " + getName(), - ErrorCodes::LOGICAL_ERROR}; - } - } - - static const DataTypePtr & getScalarType(const DataTypePtr & type) - { - const auto array = typeid_cast(type.get()); - - if (!array) - return type; - - return getScalarType(array->getNestedType()); - } - - DataTypeTraits::EnrichedDataTypePtr getLeastCommonType(const DataTypes & arguments) const - { - DataTypeTraits::EnrichedDataTypePtr result_type; - - try - { - result_type = Conditional::getArrayType(arguments); - } - catch (const Conditional::CondException & ex) - { - /// Translate a context-free error into a contextual error. - if (is_case_mode) - { - if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_ILLEGAL_COLUMN_TYPE) - throw Exception{"Illegal type of column " + ex.getMsg1() + - " in CASE construction", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - else if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_UPSCALING_ERROR) - throw Exception{"THEN/ELSE clause parameters in CASE construction are not upscalable to a " - "common type without loss of precision: " + ex.getMsg1(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - else - throw Exception{"An unexpected error has occurred in CASE expression", - ErrorCodes::LOGICAL_ERROR}; - } - else - { - if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_ILLEGAL_COLUMN_TYPE) - throw Exception{"Illegal type of column " + ex.getMsg1() + - " in array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - else if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_UPSCALING_ERROR) - throw Exception("Arguments of function " + getName() + " are not upscalable " - "to a common type without loss of precision.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - else - throw Exception{"An unexpected error has occurred in function " + getName(), - ErrorCodes::LOGICAL_ERROR}; - } - } - - return result_type; - } - -public: - void setCaseMode() - { - is_case_mode = true; - } - - /// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.empty()) - { - if (is_case_mode) - throw Exception{"Either WHEN clauses or THEN clauses are missing " - "in the CASE construction.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - else - throw Exception{"Function array requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - } - - DataTypePtr result_type = arguments[0]; - - if (result_type->behavesAsNumber()) - { - /// Если тип числовой, пробуем выделить наименьший общий тип - auto enriched_result_type = getLeastCommonType(arguments); - return std::make_shared(enriched_result_type); - } - else - { - /// Иначе все аргументы должны быть одинаковыми - for (size_t i = 1, size = arguments.size(); i < size; ++i) - { - if (arguments[i]->getName() != arguments[0]->getName()) - { - if (is_case_mode) - throw Exception{"Found type discrepancy in either WHEN " - "clauses or THEN clauses of the CASE construction", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - else - throw Exception{"Arguments for function array must have same type or behave as number.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - } - } - - return std::make_shared(result_type); - } - } - - /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - size_t num_elements = arguments.size(); - bool is_const = true; - - for (const auto arg_num : arguments) - { - if (!block.getByPosition(arg_num).column->isConst()) - { - is_const = false; - break; - } - } - - const auto first_arg = block.getByPosition(arguments[0]); - DataTypePtr result_type = first_arg.type; - DataTypeTraits::EnrichedDataTypePtr enriched_result_type; - if (result_type->behavesAsNumber()) - { - /// If type is numeric, calculate least common type. - DataTypes types; - types.reserve(num_elements); - - for (const auto & argument : arguments) - types.push_back(block.getByPosition(argument).type); - - enriched_result_type = getLeastCommonType(types); - result_type = enriched_result_type.first; - } - - if (is_const) - { - Array arr; - for (const auto arg_num : arguments) - if (block.getByPosition(arg_num).type->getName() == result_type->getName()) - /// Если элемент такого же типа как результат, просто добавляем его в ответ - arr.push_back((*block.getByPosition(arg_num).column)[0]); - else - /// Иначе необходимо привести его к типу результата - addField(result_type, (*block.getByPosition(arg_num).column)[0], arr); - - block.getByPosition(result).column = std::make_shared( - first_arg.column->size(), arr, std::make_shared(result_type)); - } - else - { - size_t block_size = block.rowsInFirstColumn(); - - /** If part of columns have not same type as common type of all elements of array, - * then convert them to common type. - * If part of columns are constants, - * then convert them to full columns. - */ - - Columns columns_holder(num_elements); - const IColumn * columns[num_elements]; - - for (size_t i = 0; i < num_elements; ++i) - { - const auto & arg = block.getByPosition(arguments[i]); - - String result_type_name = result_type->getName(); - ColumnPtr preprocessed_column = arg.column; - - if (arg.type->getName() != result_type_name) - { - Block temporary_block - { - { - arg.column, - arg.type, - arg.name - }, - { - std::make_shared(block_size, result_type_name), - std::make_shared(), - "" - }, - { - nullptr, - result_type, - "" - } - }; - - FunctionCast func_cast(context); - - { - DataTypePtr unused_return_type; - ColumnsWithTypeAndName arguments{ temporary_block.unsafeGetByPosition(0), temporary_block.unsafeGetByPosition(1) }; - std::vector unused_prerequisites; - - /// Prepares function to execution. TODO It is not obvious. - func_cast.getReturnTypeAndPrerequisites(arguments, unused_return_type, unused_prerequisites); - } - - func_cast.execute(temporary_block, {0, 1}, 2); - preprocessed_column = temporary_block.unsafeGetByPosition(2).column; - } - - if (auto materialized_column = preprocessed_column->convertToFullColumnIfConst()) - preprocessed_column = materialized_column; - - columns_holder[i] = std::move(preprocessed_column); - columns[i] = columns_holder[i].get(); - } - - /** Create and fill the result array. - */ - - auto out = std::make_shared(result_type->createColumn()); - IColumn & out_data = out->getData(); - IColumn::Offsets_t & out_offsets = out->getOffsets(); - - out_data.reserve(block_size * num_elements); - out_offsets.resize(block_size); - - IColumn::Offset_t current_offset = 0; - for (size_t i = 0; i < block_size; ++i) - { - for (size_t j = 0; j < num_elements; ++j) - out_data.insertFrom(*columns[j], i); - - current_offset += num_elements; - out_offsets[i] = current_offset; - } - - block.getByPosition(result).column = out; - } - } - -private: bool is_case_mode = false; }; -template -struct ArrayElementNumImpl -{ - /** Implementation for constant index. - * If negative = false - index is from beginning of array, started from 1. - * If negative = true - index is from end of array, started from -1. - */ - template - static void vectorConst( - const PaddedPODArray & data, const ColumnArray::Offsets_t & offsets, - const ColumnArray::Offset_t index, - PaddedPODArray & result) - { - size_t size = offsets.size(); - result.resize(size); - - ColumnArray::Offset_t current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - - if (index < array_size) - result[i] = !negative ? data[current_offset + index] : data[offsets[i] - index - 1]; - else - result[i] = T(); - - current_offset = offsets[i]; - } - } - - /** Implementation for non-constant index. - */ - template - static void vector( - const PaddedPODArray & data, const ColumnArray::Offsets_t & offsets, - const PaddedPODArray & indices, - PaddedPODArray & result) - { - size_t size = offsets.size(); - result.resize(size); - - ColumnArray::Offset_t current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - - TIndex index = indices[i]; - if (index > 0 && static_cast(index) <= array_size) - result[i] = data[current_offset + index - 1]; - else if (index < 0 && static_cast(-index) <= array_size) - result[i] = data[offsets[i] + index]; - else - result[i] = T(); - - current_offset = offsets[i]; - } - } -}; - -struct ArrayElementStringImpl -{ - /** Implementation for constant index. - * If negative = false - index is from beginning of array, started from 1. - * If negative = true - index is from end of array, started from -1. - */ - template - static void vectorConst( - const ColumnString::Chars_t & data, const ColumnArray::Offsets_t & offsets, const ColumnString::Offsets_t & string_offsets, - const ColumnArray::Offset_t index, - ColumnString::Chars_t & result_data, ColumnArray::Offsets_t & result_offsets) - { - size_t size = offsets.size(); - result_offsets.resize(size); - result_data.reserve(data.size()); - - ColumnArray::Offset_t current_offset = 0; - ColumnArray::Offset_t current_result_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - - if (index < array_size) - { - size_t adjusted_index = !negative ? index : (array_size - index - 1); - - ColumnArray::Offset_t string_pos = current_offset == 0 && adjusted_index == 0 - ? 0 - : string_offsets[current_offset + adjusted_index - 1]; - - ColumnArray::Offset_t string_size = string_offsets[current_offset + adjusted_index] - string_pos; - - result_data.resize(current_result_offset + string_size); - memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size); - current_result_offset += string_size; - result_offsets[i] = current_result_offset; - } - else - { - /// Вставим пустую строку. - result_data.resize(current_result_offset + 1); - result_data[current_result_offset] = 0; - current_result_offset += 1; - result_offsets[i] = current_result_offset; - } - - current_offset = offsets[i]; - } - } - - /** Implementation for non-constant index. - */ - template - static void vector( - const ColumnString::Chars_t & data, const ColumnArray::Offsets_t & offsets, const ColumnString::Offsets_t & string_offsets, - const PaddedPODArray & indices, - ColumnString::Chars_t & result_data, ColumnArray::Offsets_t & result_offsets) - { - size_t size = offsets.size(); - result_offsets.resize(size); - result_data.reserve(data.size()); - - ColumnArray::Offset_t current_offset = 0; - ColumnArray::Offset_t current_result_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - size_t adjusted_index; /// index in array from zero - - TIndex index = indices[i]; - if (index > 0 && static_cast(index) <= array_size) - adjusted_index = index - 1; - else if (index < 0 && static_cast(-index) <= array_size) - adjusted_index = array_size + index; - else - adjusted_index = array_size; /// means no element should be taken - - if (adjusted_index < array_size) - { - ColumnArray::Offset_t string_pos = current_offset == 0 && adjusted_index == 0 - ? 0 - : string_offsets[current_offset + adjusted_index - 1]; - - ColumnArray::Offset_t string_size = string_offsets[current_offset + adjusted_index] - string_pos; - - result_data.resize(current_result_offset + string_size); - memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size); - current_result_offset += string_size; - result_offsets[i] = current_result_offset; - } - else - { - /// Insert empty string - result_data.resize(current_result_offset + 1); - result_data[current_result_offset] = 0; - current_result_offset += 1; - result_offsets[i] = current_result_offset; - } - - current_offset = offsets[i]; - } - } -}; - -/// Generic implementation for other nested types. -struct ArrayElementGenericImpl -{ - /** Implementation for constant index. - * If negative = false - index is from beginning of array, started from 1. - * If negative = true - index is from end of array, started from -1. - */ - template - static void vectorConst( - const IColumn & data, const ColumnArray::Offsets_t & offsets, - const ColumnArray::Offset_t index, - IColumn & result) - { - size_t size = offsets.size(); - result.reserve(size); - - ColumnArray::Offset_t current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - - if (index < array_size) - result.insertFrom(data, !negative ? current_offset + index : offsets[i] - index - 1); - else - result.insertDefault(); - - current_offset = offsets[i]; - } - } - - /** Implementation for non-constant index. - */ - template - static void vector( - const IColumn & data, const ColumnArray::Offsets_t & offsets, - const PaddedPODArray & indices, - IColumn & result) - { - size_t size = offsets.size(); - result.reserve(size); - - ColumnArray::Offset_t current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - - TIndex index = indices[i]; - if (index > 0 && static_cast(index) <= array_size) - result.insertFrom(data, current_offset + index - 1); - else if (index < 0 && static_cast(-index) <= array_size) - result.insertFrom(data, offsets[i] + index); - else - result.insertDefault(); - - current_offset = offsets[i]; - } - } -}; - class FunctionArrayElement : public IFunction { public: static constexpr auto name = "arrayElement"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); + + /// Получить имя функции. + String getName() const override; + + /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + + /// Выполнить функцию над блоком. + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: template - bool executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) - { - const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - - if (!col_array) - return false; - - const ColumnVector * col_nested = typeid_cast *>(&col_array->getData()); - - if (!col_nested) - return false; - - auto col_res = std::make_shared>(); - block.getByPosition(result).column = col_res; - - if (index.getType() == Field::Types::UInt64) - ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), safeGet(index) - 1, col_res->getData()); - else if (index.getType() == Field::Types::Int64) - ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), -safeGet(index) - 1, col_res->getData()); - else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); - - return true; - } + bool executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index); template - bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) - { - const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices); - if (!col_array) - return false; - - const ColumnVector * col_nested = typeid_cast *>(&col_array->getData()); - - if (!col_nested) - return false; - - auto col_res = std::make_shared>(); - block.getByPosition(result).column = col_res; - - ArrayElementNumImpl::template vector( - col_nested->getData(), col_array->getOffsets(), indices, col_res->getData()); - - return true; - } - - bool executeStringConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) - { - const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - - if (!col_array) - return false; - - const ColumnString * col_nested = typeid_cast(&col_array->getData()); - - if (!col_nested) - return false; - - std::shared_ptr col_res = std::make_shared(); - block.getByPosition(result).column = col_res; - - if (index.getType() == Field::Types::UInt64) - ArrayElementStringImpl::vectorConst( - col_nested->getChars(), - col_array->getOffsets(), - col_nested->getOffsets(), - safeGet(index) - 1, - col_res->getChars(), - col_res->getOffsets()); - else if (index.getType() == Field::Types::Int64) - ArrayElementStringImpl::vectorConst( - col_nested->getChars(), - col_array->getOffsets(), - col_nested->getOffsets(), - -safeGet(index) - 1, - col_res->getChars(), - col_res->getOffsets()); - else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); - - return true; - } + bool executeStringConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index); template - bool executeString(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) - { - const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + bool executeString(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices); - if (!col_array) - return false; - - const ColumnString * col_nested = typeid_cast(&col_array->getData()); - - if (!col_nested) - return false; - - std::shared_ptr col_res = std::make_shared(); - block.getByPosition(result).column = col_res; - - ArrayElementStringImpl::vector( - col_nested->getChars(), - col_array->getOffsets(), - col_nested->getOffsets(), - indices, - col_res->getChars(), - col_res->getOffsets()); - - return true; - } - - bool executeGenericConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) - { - const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - - if (!col_array) - return false; - - const auto & col_nested = col_array->getData(); - auto col_res = col_nested.cloneEmpty(); - block.getByPosition(result).column = col_res; - - if (index.getType() == Field::Types::UInt64) - ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), safeGet(index) - 1, *col_res); - else if (index.getType() == Field::Types::Int64) - ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), -safeGet(index) - 1, *col_res); - else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); - - return true; - } + bool executeGenericConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index); template - bool executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) - { - const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + bool executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices); - if (!col_array) - return false; - - const auto & col_nested = col_array->getData(); - auto col_res = col_nested.cloneEmpty(); - block.getByPosition(result).column = col_res; - - ArrayElementGenericImpl::vector( - col_nested, col_array->getOffsets(), indices, *col_res); - - return true; - } - - bool executeConstConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) - { - const ColumnConstArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - - if (!col_array) - return false; - - const DB::Array & array = col_array->getData(); - size_t array_size = array.size(); - size_t real_index = 0; - - if (index.getType() == Field::Types::UInt64) - real_index = safeGet(index) - 1; - else if (index.getType() == Field::Types::Int64) - real_index = array_size + safeGet(index); - else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); - - Field value = col_array->getData().at(real_index); - - block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn( - block.rowsInFirstColumn(), - value); - - return true; - } + bool executeConstConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index); template - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) - { - const ColumnConstArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - - if (!col_array) - return false; - - const DB::Array & array = col_array->getData(); - size_t array_size = array.size(); - - block.getByPosition(result).column = block.getByPosition(result).type->createColumn(); - - for (size_t i = 0; i < col_array->size(); ++i) - { - IndexType index = indices[i]; - if (index > 0 && static_cast(index) <= array_size) - block.getByPosition(result).column->insert(array[index - 1]); - else if (index < 0 && static_cast(-index) <= array_size) - block.getByPosition(result).column->insert(array[array_size + index]); - else - block.getByPosition(result).column->insertDefault(); - } - - return true; - } + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices); template - bool executeArgument(Block & block, const ColumnNumbers & arguments, size_t result) - { - auto index = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); - - if (!index) - return false; - - const auto & index_data = index->getData(); - - if (!( executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeNumber (block, arguments, result, index_data) - || executeConst (block, arguments, result, index_data) - || executeString (block, arguments, result, index_data) - || executeGeneric (block, arguments, result, index_data))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - - return true; - } + bool executeArgument(Block & block, const ColumnNumbers & arguments, size_t result); /** Для массива кортежей функция вычисляется покомпонентно - для каждого элемента кортежа. */ - bool executeTuple(Block & block, const ColumnNumbers & arguments, size_t result) - { - ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - - if (!col_array) - return false; - - ColumnTuple * col_nested = typeid_cast(&col_array->getData()); - - if (!col_nested) - return false; - - Block & tuple_block = col_nested->getData(); - size_t tuple_size = tuple_block.columns(); - - /** Будем вычислять функцию для кортежа внутренностей массива. - * Для этого создадим временный блок. - * Он будет состоять из следующих столбцов: - * - индекс массива, который нужно взять; - * - массив из первых элементов кортежей; - * - результат взятия элементов по индексу для массива из первых элементов кортежей; - * - массив из вторых элементов кортежей; - * - результат взятия элементов по индексу для массива из вторых элементов кортежей; - * ... - */ - Block block_of_temporary_results; - block_of_temporary_results.insert(block.getByPosition(arguments[1])); - - /// результаты взятия элементов по индексу для массивов из каждых элементов кортежей; - Block result_tuple_block; - - for (size_t i = 0; i < tuple_size; ++i) - { - ColumnWithTypeAndName array_of_tuple_section; - array_of_tuple_section.column = std::make_shared( - tuple_block.getByPosition(i).column, col_array->getOffsetsColumn()); - array_of_tuple_section.type = std::make_shared( - tuple_block.getByPosition(i).type); - block_of_temporary_results.insert(array_of_tuple_section); - - ColumnWithTypeAndName array_elements_of_tuple_section; - block_of_temporary_results.insert(array_elements_of_tuple_section); - - executeImpl(block_of_temporary_results, ColumnNumbers{i * 2 + 1, 0}, i * 2 + 2); - - result_tuple_block.insert(block_of_temporary_results.getByPosition(i * 2 + 2)); - } - - auto col_res = std::make_shared(result_tuple_block); - block.getByPosition(result).column = col_res; - - return true; - } -public: - /// Получить имя функции. - String getName() const override - { - return name; - } - - /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const DataTypeArray * array_type = typeid_cast(arguments[0].get()); - if (!array_type) - throw Exception("First argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!arguments[1]->isNumeric() - || (!startsWith(arguments[1]->getName(), "UInt") && !startsWith(arguments[1]->getName(), "Int"))) - throw Exception("Second argument for function " + getName() + " must have UInt or Int type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return array_type->getNestedType(); - } - - /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - if (executeTuple(block, arguments, result)) - { - } - else if (!block.getByPosition(arguments[1]).column->isConst()) - { - if (!( executeArgument (block, arguments, result) - || executeArgument (block, arguments, result) - || executeArgument (block, arguments, result) - || executeArgument (block, arguments, result) - || executeArgument (block, arguments, result) - || executeArgument (block, arguments, result) - || executeArgument (block, arguments, result) - || executeArgument (block, arguments, result))) - throw Exception("Second argument for function " + getName() + " must must have UInt or Int type.", - ErrorCodes::ILLEGAL_COLUMN); - } - else - { - Field index = (*block.getByPosition(arguments[1]).column)[0]; - - if (index == UInt64(0)) - throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); - - if (!( executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeNumberConst (block, arguments, result, index) - || executeConstConst (block, arguments, result, index) - || executeStringConst (block, arguments, result, index) - || executeGenericConst (block, arguments, result, index))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - } + bool executeTuple(Block & block, const ColumnNumbers & arguments, size_t result); }; @@ -1361,77 +554,21 @@ public: } }; + class FunctionArrayEnumerate : public IFunction { public: static constexpr auto name = "arrayEnumerate"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); /// Получить имя функции. - String getName() const override - { - return name; - } + String getName() const override; /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const DataTypeArray * array_type = typeid_cast(arguments[0].get()); - if (!array_type) - throw Exception("First argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(std::make_shared()); - } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - if (const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get())) - { - const ColumnArray::Offsets_t & offsets = array->getOffsets(); - - auto res_nested = std::make_shared(); - auto res_array = std::make_shared(res_nested, array->getOffsetsColumn()); - block.getByPosition(result).column = res_array; - - ColumnUInt32::Container_t & res_values = res_nested->getData(); - res_values.resize(array->getData().size()); - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - { - res_values[j] = j - prev_off + 1; - } - prev_off = off; - } - } - else if (const ColumnConstArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get())) - { - const Array & values = array->getData(); - - Array res_values(values.size()); - for (size_t i = 0; i < values.size(); ++i) - { - res_values[i] = i + 1; - } - - auto res_array = std::make_shared(array->size(), res_values, std::make_shared(std::make_shared())); - block.getByPosition(result).column = res_array; - } - else - { - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; }; @@ -1441,230 +578,37 @@ class FunctionArrayUniq : public IFunction { public: static constexpr auto name = "arrayUniq"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); /// Получить имя функции. - String getName() const override - { - return name; - } + String getName() const override; /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() == 0) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < arguments.size(); ++i) - { - const DataTypeArray * array_type = typeid_cast(arguments[i].get()); - if (!array_type) - throw Exception("All arguments for function " + getName() + " must be arrays; argument " + toString(i + 1) + " isn't.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - return std::make_shared(); - } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - if (arguments.size() == 1 && executeConst(block, arguments, result)) - return; - - Columns array_columns(arguments.size()); - const ColumnArray::Offsets_t * offsets = nullptr; - ConstColumnPlainPtrs data_columns(arguments.size()); - - for (size_t i = 0; i < arguments.size(); ++i) - { - ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; - const ColumnArray * array = typeid_cast(array_ptr.get()); - if (!array) - { - const ColumnConstArray * const_array = typeid_cast( - block.getByPosition(arguments[i]).column.get()); - if (!const_array) - throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() - + " of " + toString(i + 1) + "-th argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - array_ptr = const_array->convertToFullColumn(); - array = typeid_cast(array_ptr.get()); - } - array_columns[i] = array_ptr; - const ColumnArray::Offsets_t & offsets_i = array->getOffsets(); - if (!i) - offsets = &offsets_i; - else if (offsets_i != *offsets) - throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - data_columns[i] = &array->getData(); - } - - const ColumnArray * first_array = typeid_cast(array_columns[0].get()); - auto res = std::make_shared(); - block.getByPosition(result).column = res; - - ColumnUInt32::Container_t & res_values = res->getData(); - res_values.resize(offsets->size()); - - if (arguments.size() == 1) - { - if (!( executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeString (first_array, res_values))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - else - { - if (!execute128bit(*offsets, data_columns, res_values)) - executeHashed(*offsets, data_columns, res_values); - } - } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: /// Изначально выделить кусок памяти для 512 элементов. static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber(const ColumnArray * array, ColumnUInt32::Container_t & res_values) - { - const ColumnVector * nested = typeid_cast *>(&array->getData()); - if (!nested) - return false; - const ColumnArray::Offsets_t & offsets = array->getOffsets(); - const typename ColumnVector::Container_t & values = nested->getData(); + bool executeNumber(const ColumnArray * array, ColumnUInt32::Container_t & res_values); - typedef ClearableHashSet, HashTableGrower, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)> > Set; + bool executeString(const ColumnArray * array, ColumnUInt32::Container_t & res_values); - Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - set.insert(values[j]); - - res_values[i] = set.size(); - prev_off = off; - } - return true; - } - - bool executeString(const ColumnArray * array, ColumnUInt32::Container_t & res_values) - { - const ColumnString * nested = typeid_cast(&array->getData()); - if (!nested) - return false; - const ColumnArray::Offsets_t & offsets = array->getOffsets(); - - typedef ClearableHashSet, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)> > Set; - - Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - set.insert(nested->getDataAt(j)); - - res_values[i] = set.size(); - prev_off = off; - } - return true; - } - - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) - { - const ColumnConstArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - if (!array) - return false; - const Array & values = array->getData(); - - std::set set; - for (size_t i = 0; i < values.size(); ++i) - set.insert(values[i]); - - block.getByPosition(result).column = std::make_shared(array->size(), set.size()); - return true; - } + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result); bool execute128bit( const ColumnArray::Offsets_t & offsets, const ConstColumnPlainPtrs & columns, - ColumnUInt32::Container_t & res_values) - { - size_t count = columns.size(); - size_t keys_bytes = 0; - Sizes key_sizes(count); - for (size_t j = 0; j < count; ++j) - { - if (!columns[j]->isFixed()) - return false; - key_sizes[j] = columns[j]->sizeOfField(); - keys_bytes += key_sizes[j]; - } - if (keys_bytes > 16) - return false; - - typedef ClearableHashSet, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > Set; - - Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - set.insert(packFixed(j, count, columns, key_sizes)); - - res_values[i] = set.size(); - prev_off = off; - } - - return true; - } + ColumnUInt32::Container_t & res_values); void executeHashed( const ColumnArray::Offsets_t & offsets, const ConstColumnPlainPtrs & columns, - ColumnUInt32::Container_t & res_values) - { - size_t count = columns.size(); - - typedef ClearableHashSet, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > Set; - - Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - set.insert(hash128(j, count, columns)); - - res_values[i] = set.size(); - prev_off = off; - } - } + ColumnUInt32::Container_t & res_values); }; @@ -1672,237 +616,37 @@ class FunctionArrayEnumerateUniq : public IFunction { public: static constexpr auto name = "arrayEnumerateUniq"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); /// Получить имя функции. - String getName() const override - { - return name; - } + String getName() const override; /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() == 0) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < arguments.size(); ++i) - { - const DataTypeArray * array_type = typeid_cast(arguments[i].get()); - if (!array_type) - throw Exception("All arguments for function " + getName() + " must be arrays; argument " + toString(i + 1) + " isn't.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - return std::make_shared(std::make_shared()); - } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - if (arguments.size() == 1 && executeConst(block, arguments, result)) - return; - - Columns array_columns(arguments.size()); - const ColumnArray::Offsets_t * offsets = nullptr; - ConstColumnPlainPtrs data_columns(arguments.size()); - - for (size_t i = 0; i < arguments.size(); ++i) - { - ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; - const ColumnArray * array = typeid_cast(array_ptr.get()); - if (!array) - { - const ColumnConstArray * const_array = typeid_cast( - block.getByPosition(arguments[i]).column.get()); - if (!const_array) - throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() - + " of " + toString(i + 1) + "-th argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - array_ptr = const_array->convertToFullColumn(); - array = typeid_cast(array_ptr.get()); - } - array_columns[i] = array_ptr; - const ColumnArray::Offsets_t & offsets_i = array->getOffsets(); - if (!i) - offsets = &offsets_i; - else if (offsets_i != *offsets) - throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - data_columns[i] = &array->getData(); - } - - const ColumnArray * first_array = typeid_cast(array_columns[0].get()); - auto res_nested = std::make_shared(); - auto res_array = std::make_shared(res_nested, first_array->getOffsetsColumn()); - block.getByPosition(result).column = res_array; - - ColumnUInt32::Container_t & res_values = res_nested->getData(); - if (!offsets->empty()) - res_values.resize(offsets->back()); - - if (arguments.size() == 1) - { - if (!( executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeNumber (first_array, res_values) - || executeString (first_array, res_values))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - else - { - if (!execute128bit(*offsets, data_columns, res_values)) - executeHashed(*offsets, data_columns, res_values); - } - } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: /// Изначально выделить кусок памяти для 512 элементов. static constexpr size_t INITIAL_SIZE_DEGREE = 9; template - bool executeNumber(const ColumnArray * array, ColumnUInt32::Container_t & res_values) - { - const ColumnVector * nested = typeid_cast *>(&array->getData()); - if (!nested) - return false; - const ColumnArray::Offsets_t & offsets = array->getOffsets(); - const typename ColumnVector::Container_t & values = nested->getData(); + bool executeNumber(const ColumnArray * array, ColumnUInt32::Container_t & res_values); - typedef ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)> > ValuesToIndices; + bool executeString(const ColumnArray * array, ColumnUInt32::Container_t & res_values); - ValuesToIndices indices; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - indices.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - { - res_values[j] = ++indices[values[j]]; - } - prev_off = off; - } - return true; - } - - bool executeString(const ColumnArray * array, ColumnUInt32::Container_t & res_values) - { - const ColumnString * nested = typeid_cast(&array->getData()); - if (!nested) - return false; - const ColumnArray::Offsets_t & offsets = array->getOffsets(); - - size_t prev_off = 0; - typedef ClearableHashMap, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)> > ValuesToIndices; - - ValuesToIndices indices; - for (size_t i = 0; i < offsets.size(); ++i) - { - indices.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - { - res_values[j] = ++indices[nested->getDataAt(j)]; - } - prev_off = off; - } - return true; - } - - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) - { - const ColumnConstArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - if (!array) - return false; - const Array & values = array->getData(); - - Array res_values(values.size()); - std::map indices; - for (size_t i = 0; i < values.size(); ++i) - { - res_values[i] = static_cast(++indices[values[i]]); - } - - auto res_array = std::make_shared(array->size(), res_values, std::make_shared(std::make_shared())); - block.getByPosition(result).column = res_array; - - return true; - } + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result); bool execute128bit( const ColumnArray::Offsets_t & offsets, const ConstColumnPlainPtrs & columns, - ColumnUInt32::Container_t & res_values) - { - size_t count = columns.size(); - size_t keys_bytes = 0; - Sizes key_sizes(count); - for (size_t j = 0; j < count; ++j) - { - if (!columns[j]->isFixed()) - return false; - key_sizes[j] = columns[j]->sizeOfField(); - keys_bytes += key_sizes[j]; - } - if (keys_bytes > 16) - return false; - - typedef ClearableHashMap, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > ValuesToIndices; - - ValuesToIndices indices; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - indices.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - { - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; - } - prev_off = off; - } - - return true; - } + ColumnUInt32::Container_t & res_values); void executeHashed( const ColumnArray::Offsets_t & offsets, const ConstColumnPlainPtrs & columns, - ColumnUInt32::Container_t & res_values) - { - size_t count = columns.size(); - - typedef ClearableHashMap, - HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > ValuesToIndices; - - ValuesToIndices indices; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - indices.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - { - res_values[j] = ++indices[hash128(j, count, columns)]; - } - prev_off = off; - } - } + ColumnUInt32::Container_t & res_values); }; @@ -2093,249 +837,32 @@ class FunctionEmptyArrayToSingle : public IFunction { public: static constexpr auto name = "emptyArrayToSingle"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); /// Получить имя функции. - String getName() const override - { - return name; - } + String getName() const override; /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const DataTypeArray * array_type = typeid_cast(arguments[0].get()); - if (!array_type) - throw Exception("Argument for function " + getName() + " must be array.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return arguments[0]->clone(); - } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - if (executeConst(block, arguments, result)) - return; - - const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - if (!array) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - ColumnPtr res_ptr = array->cloneEmpty(); - block.getByPosition(result).column = res_ptr; - ColumnArray & res = static_cast(*res_ptr); - - const IColumn & src_data = array->getData(); - const ColumnArray::Offsets_t & src_offsets = array->getOffsets(); - IColumn & res_data = res.getData(); - ColumnArray::Offsets_t & res_offsets = res.getOffsets(); - - if (!( executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeNumber (src_data, src_offsets, res_data, res_offsets) - || executeString (src_data, src_offsets, res_data, res_offsets) - || executeFixedString (src_data, src_offsets, res_data, res_offsets))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) - { - if (const ColumnConstArray * const_array = typeid_cast(block.getByPosition(arguments[0]).column.get())) - { - if (const_array->getData().empty()) - { - auto nested_type = typeid_cast(*block.getByPosition(arguments[0]).type).getNestedType(); - - block.getByPosition(result).column = std::make_shared( - block.rowsInFirstColumn(), - Array{nested_type->getDefault()}, - nested_type->clone()); - } - else - block.getByPosition(result).column = block.getByPosition(arguments[0]).column; - - return true; - } - else - return false; - } + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result); template bool executeNumber( const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, - IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets) - { - if (const ColumnVector * src_data_concrete = typeid_cast *>(&src_data)) - { - const PaddedPODArray & src_data = src_data_concrete->getData(); - PaddedPODArray & res_data = typeid_cast &>(res_data_col).getData(); - size_t size = src_offsets.size(); - res_offsets.resize(size); - res_data.reserve(src_data.size()); - - ColumnArray::Offset_t src_prev_offset = 0; - ColumnArray::Offset_t res_prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - if (src_offsets[i] != src_prev_offset) - { - size_t size_to_write = src_offsets[i] - src_prev_offset; - size_t prev_res_data_size = res_data.size(); - res_data.resize(prev_res_data_size + size_to_write); - memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset], size_to_write * sizeof(T)); - res_prev_offset += size_to_write; - res_offsets[i] = res_prev_offset; - } - else - { - res_data.push_back(T()); - ++res_prev_offset; - res_offsets[i] = res_prev_offset; - } - - src_prev_offset = src_offsets[i]; - } - - return true; - } - else - return false; - } + IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets); bool executeFixedString( const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, - IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets) - { - if (const ColumnFixedString * src_data_concrete = typeid_cast(&src_data)) - { - const size_t n = src_data_concrete->getN(); - const ColumnFixedString::Chars_t & src_data = src_data_concrete->getChars(); - ColumnFixedString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); - size_t size = src_offsets.size(); - res_offsets.resize(size); - res_data.reserve(src_data.size()); - - ColumnArray::Offset_t src_prev_offset = 0; - ColumnArray::Offset_t res_prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - if (src_offsets[i] != src_prev_offset) - { - size_t size_to_write = src_offsets[i] - src_prev_offset; - size_t prev_res_data_size = res_data.size(); - res_data.resize(prev_res_data_size + size_to_write * n); - memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset], size_to_write * n); - res_prev_offset += size_to_write; - res_offsets[i] = res_prev_offset; - } - else - { - size_t prev_res_data_size = res_data.size(); - res_data.resize(prev_res_data_size + n); - memset(&res_data[prev_res_data_size], 0, n); - ++res_prev_offset; - res_offsets[i] = res_prev_offset; - } - - src_prev_offset = src_offsets[i]; - } - - return true; - } - else - return false; - } + IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets); bool executeString( const IColumn & src_data, const ColumnArray::Offsets_t & src_array_offsets, - IColumn & res_data_col, ColumnArray::Offsets_t & res_array_offsets) - { - if (const ColumnString * src_data_concrete = typeid_cast(&src_data)) - { - const ColumnString::Offsets_t & src_string_offsets = src_data_concrete->getOffsets(); - ColumnString::Offsets_t & res_string_offsets = typeid_cast(res_data_col).getOffsets(); - - const ColumnString::Chars_t & src_data = src_data_concrete->getChars(); - ColumnString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); - - size_t size = src_array_offsets.size(); - res_array_offsets.resize(size); - res_string_offsets.reserve(src_string_offsets.size()); - res_data.reserve(src_data.size()); - - ColumnArray::Offset_t src_array_prev_offset = 0; - ColumnArray::Offset_t res_array_prev_offset = 0; - - ColumnString::Offset_t src_string_prev_offset = 0; - ColumnString::Offset_t res_string_prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - if (src_array_offsets[i] != src_array_prev_offset) - { - size_t array_size = src_array_offsets[i] - src_array_prev_offset; - - size_t bytes_to_copy = 0; - size_t from_string_prev_offset_local = src_string_prev_offset; - for (size_t j = 0; j < array_size; ++j) - { - size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local; - - res_string_prev_offset += string_size; - res_string_offsets.push_back(res_string_prev_offset); - - from_string_prev_offset_local += string_size; - bytes_to_copy += string_size; - } - - size_t res_data_old_size = res_data.size(); - res_data.resize(res_data_old_size + bytes_to_copy); - memcpy(&res_data[res_data_old_size], &src_data[src_string_prev_offset], bytes_to_copy); - - res_array_prev_offset += array_size; - res_array_offsets[i] = res_array_prev_offset; - } - else - { - res_data.push_back(0); /// Пустая строка, включая ноль на конце. - - ++res_string_prev_offset; - res_string_offsets.push_back(res_string_prev_offset); - - ++res_array_prev_offset; - res_array_offsets[i] = res_array_prev_offset; - } - - src_array_prev_offset = src_array_offsets[i]; - - if (src_array_prev_offset) - src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1]; - } - - return true; - } - else - return false; - } + IColumn & res_data_col, ColumnArray::Offsets_t & res_array_offsets); }; @@ -2343,218 +870,32 @@ class FunctionArrayReverse : public IFunction { public: static constexpr auto name = "reverse"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); /// Получить имя функции. - String getName() const override - { - return name; - } + String getName() const override; /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const DataTypeArray * array_type = typeid_cast(arguments[0].get()); - if (!array_type) - throw Exception("Argument for function " + getName() + " must be array.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return arguments[0]->clone(); - } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; /// Выполнить функцию над блоком. - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - if (executeConst(block, arguments, result)) - return; - - const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); - if (!array) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - ColumnPtr res_ptr = array->cloneEmpty(); - block.getByPosition(result).column = res_ptr; - ColumnArray & res = static_cast(*res_ptr); - - const IColumn & src_data = array->getData(); - const ColumnArray::Offsets_t & offsets = array->getOffsets(); - IColumn & res_data = res.getData(); - res.getOffsetsColumn() = array->getOffsetsColumn(); - - if (!( executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeNumber (src_data, offsets, res_data) - || executeString (src_data, offsets, res_data) - || executeFixedString (src_data, offsets, res_data))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) - { - if (const ColumnConstArray * const_array = typeid_cast(block.getByPosition(arguments[0]).column.get())) - { - const Array & arr = const_array->getData(); - - size_t size = arr.size(); - Array res(size); - - for (size_t i = 0; i < size; ++i) - res[i] = arr[size - i - 1]; - - block.getByPosition(result).column = std::make_shared( - block.rowsInFirstColumn(), - res, - block.getByPosition(arguments[0]).type->clone()); - - return true; - } - else - return false; - } + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result); template bool executeNumber( const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, - IColumn & res_data_col) - { - if (const ColumnVector * src_data_concrete = typeid_cast *>(&src_data)) - { - const PaddedPODArray & src_data = src_data_concrete->getData(); - PaddedPODArray & res_data = typeid_cast &>(res_data_col).getData(); - size_t size = src_offsets.size(); - res_data.resize(src_data.size()); - - ColumnArray::Offset_t src_prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - const T * src = &src_data[src_prev_offset]; - const T * src_end = &src_data[src_offsets[i]]; - - if (src == src_end) - continue; - - T * dst = &res_data[src_offsets[i] - 1]; - - while (src < src_end) - { - *dst = *src; - ++src; - --dst; - } - - src_prev_offset = src_offsets[i]; - } - - return true; - } - else - return false; - } + IColumn & res_data_col); bool executeFixedString( const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, - IColumn & res_data_col) - { - if (const ColumnFixedString * src_data_concrete = typeid_cast(&src_data)) - { - const size_t n = src_data_concrete->getN(); - const ColumnFixedString::Chars_t & src_data = src_data_concrete->getChars(); - ColumnFixedString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); - size_t size = src_offsets.size(); - res_data.resize(src_data.size()); - - ColumnArray::Offset_t src_prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - const UInt8 * src = &src_data[src_prev_offset * n]; - const UInt8 * src_end = &src_data[src_offsets[i] * n]; - - if (src == src_end) - continue; - - UInt8 * dst = &res_data[src_offsets[i] * n - n]; - - while (src < src_end) - { - memcpySmallAllowReadWriteOverflow15(dst, src, n); - src += n; - dst -= n; - } - - src_prev_offset = src_offsets[i]; - } - - return true; - } - else - return false; - } + IColumn & res_data_col); bool executeString( const IColumn & src_data, const ColumnArray::Offsets_t & src_array_offsets, - IColumn & res_data_col) - { - if (const ColumnString * src_data_concrete = typeid_cast(&src_data)) - { - const ColumnString::Offsets_t & src_string_offsets = src_data_concrete->getOffsets(); - ColumnString::Offsets_t & res_string_offsets = typeid_cast(res_data_col).getOffsets(); - - const ColumnString::Chars_t & src_data = src_data_concrete->getChars(); - ColumnString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); - - size_t size = src_array_offsets.size(); - res_string_offsets.resize(src_string_offsets.size()); - res_data.resize(src_data.size()); - - ColumnArray::Offset_t src_array_prev_offset = 0; - ColumnString::Offset_t res_string_prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - if (src_array_offsets[i] != src_array_prev_offset) - { - size_t array_size = src_array_offsets[i] - src_array_prev_offset; - - for (size_t j = 0; j < array_size; ++j) - { - size_t j_reversed = array_size - j - 1; - - auto src_pos = src_array_prev_offset + j_reversed == 0 ? 0 : src_string_offsets[src_array_prev_offset + j_reversed - 1]; - size_t string_size = src_string_offsets[src_array_prev_offset + j_reversed] - src_pos; - - memcpySmallAllowReadWriteOverflow15(&res_data[res_string_prev_offset], &src_data[src_pos], string_size); - - res_string_prev_offset += string_size; - res_string_offsets[src_array_prev_offset + j] = res_string_prev_offset; - } - } - - src_array_prev_offset = src_array_offsets[i]; - } - - return true; - } - else - return false; - } + IColumn & res_data_col); }; @@ -2565,171 +906,17 @@ class FunctionArrayReduce : public IFunction { public: static constexpr auto name = "arrayReduce"; - static FunctionPtr create(const Context & context) { return std::make_shared(); } + static FunctionPtr create(const Context & context); /// Получить имя функции. - String getName() const override - { - return name; - } + String getName() const override; void getReturnTypeAndPrerequisitesImpl( const ColumnsWithTypeAndName & arguments, DataTypePtr & out_return_type, - std::vector & out_prerequisites) override - { - /// Первый аргумент - константная строка с именем агрегатной функции (возможно, с параметрами в скобках, например: "quantile(0.99)"). - - if (arguments.size() < 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const ColumnConstString * aggregate_function_name_column = typeid_cast(arguments[0].column.get()); - if (!aggregate_function_name_column) - throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - DataTypes argument_types(arguments.size() - 1); - for (size_t i = 1, size = arguments.size(); i < size; ++i) - { - const DataTypeArray * arg = typeid_cast(arguments[i].type.get()); - if (!arg) - throw Exception("Argument " + toString(i) + " for function " + getName() + " must be array.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - argument_types[i - 1] = arg->getNestedType()->clone(); - } - - if (!aggregate_function) - { - const String & aggregate_function_name_with_params = aggregate_function_name_column->getData(); - - if (aggregate_function_name_with_params.empty()) - throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", - ErrorCodes::BAD_ARGUMENTS); - - bool has_parameters = ')' == aggregate_function_name_with_params.back(); - - String aggregate_function_name = aggregate_function_name_with_params; - String parameters; - Array params_row; - - if (has_parameters) - { - size_t pos = aggregate_function_name_with_params.find('('); - if (pos == std::string::npos || pos + 2 >= aggregate_function_name_with_params.size()) - throw Exception("First argument for function " + getName() + " doesn't look like aggregate function name.", - ErrorCodes::BAD_ARGUMENTS); - - aggregate_function_name = aggregate_function_name_with_params.substr(0, pos); - parameters = aggregate_function_name_with_params.substr(pos + 1, aggregate_function_name_with_params.size() - pos - 2); - - if (aggregate_function_name.empty()) - throw Exception("First argument for function " + getName() + " doesn't look like aggregate function name.", - ErrorCodes::BAD_ARGUMENTS); - - ParserExpressionList params_parser(false); - ASTPtr args_ast = parseQuery(params_parser, - parameters.data(), parameters.data() + parameters.size(), - "parameters of aggregate function"); - - ASTExpressionList & args_list = typeid_cast(*args_ast); - - if (args_list.children.empty()) - throw Exception("Incorrect list of parameters to aggregate function " - + aggregate_function_name, ErrorCodes::BAD_ARGUMENTS); - - params_row.reserve(args_list.children.size()); - for (const auto & child : args_list.children) - { - const ASTLiteral * lit = typeid_cast(child.get()); - if (!lit) - throw Exception("Parameters to aggregate functions must be literals", - ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS); - - params_row.push_back(lit->value); - } - } - - aggregate_function = AggregateFunctionFactory().get(aggregate_function_name, argument_types); - - /// Потому что владение состояниями агрегатных функций никуда не отдаётся. - if (aggregate_function->isState()) - throw Exception("Using aggregate function with -State modifier in function arrayReduce is not supported", ErrorCodes::BAD_ARGUMENTS); - - if (has_parameters) - aggregate_function->setParameters(params_row); - aggregate_function->setArguments(argument_types); - } - - out_return_type = aggregate_function->getReturnType(); - } - - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override - { - IAggregateFunction & agg_func = *aggregate_function.get(); - std::unique_ptr place_holder { new char[agg_func.sizeOfData()] }; - AggregateDataPtr place = place_holder.get(); - - size_t rows = block.rowsInFirstColumn(); - - /// Агрегатные функции не поддерживают константные столбцы. Поэтому, материализуем их. - std::vector materialized_columns; - - std::vector aggregate_arguments_vec(arguments.size() - 1); - - for (size_t i = 0, size = arguments.size() - 1; i < size; ++i) - { - const IColumn * col = block.unsafeGetByPosition(arguments[i + 1]).column.get(); - if (const ColumnArray * arr = typeid_cast(col)) - { - aggregate_arguments_vec[i] = arr->getDataPtr().get(); - } - else if (const ColumnConstArray * arr = typeid_cast(col)) - { - materialized_columns.emplace_back(arr->convertToFullColumn()); - aggregate_arguments_vec[i] = typeid_cast(*materialized_columns.back().get()).getDataPtr().get(); - } - else - throw Exception("Illegal column " + col->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - - } - const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); - - const ColumnArray::Offsets_t & offsets = typeid_cast(!materialized_columns.empty() - ? *materialized_columns.front().get() - : *block.unsafeGetByPosition(arguments[1]).column.get()).getOffsets(); - - ColumnPtr result_holder = block.getByPosition(result).type->createColumn(); - block.getByPosition(result).column = result_holder; - IColumn & res_col = *result_holder.get(); - - ColumnArray::Offset_t current_offset = 0; - for (size_t i = 0; i < rows; ++i) - { - agg_func.create(place); - ColumnArray::Offset_t next_offset = offsets[i]; - - try - { - for (size_t j = current_offset; j < next_offset; ++j) - agg_func.add(place, aggregate_arguments, j); - - agg_func.insertResultInto(place, res_col); - } - catch (...) - { - agg_func.destroy(place); - throw; - } - - agg_func.destroy(place); - current_offset = next_offset; - } - } + std::vector & out_prerequisites) override; + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: AggregateFunctionPtr aggregate_function; }; diff --git a/dbms/include/DB/Functions/FunctionsConditional.h b/dbms/include/DB/Functions/FunctionsConditional.h index a23c176d3e9..9ffcc166acd 100644 --- a/dbms/include/DB/Functions/FunctionsConditional.h +++ b/dbms/include/DB/Functions/FunctionsConditional.h @@ -15,9 +15,6 @@ #include #include -/// The following include is needed for the function multiIf. -#include - namespace DB { @@ -1458,6 +1455,12 @@ public: } }; +namespace Conditional +{ + +class CondException; + +} /// Function multiIf, which generalizes the function if. /// @@ -1471,6 +1474,9 @@ public: /// - dates with time; /// - strings; /// - arrays of such types. +/// +/// Additionally the arguments, conditions or branches, support nullable types +/// and the NULL value. class FunctionMultiIf final : public IFunction { public: diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index 2313f60cd37..9786d935b3a 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -32,4 +32,2030 @@ void registerFunctionsArray(FunctionFactory & factory) factory.registerFunction(); } +/// Implementation of FunctionArray. + +FunctionPtr FunctionArray::create(const Context & context) +{ + return std::make_shared(context); +} + +FunctionArray::FunctionArray(const Context & context) + : context(context) +{ +} + +String FunctionArray::getName() const +{ + return is_case_mode ? "CASE" : name; +} + +namespace +{ + +template +bool tryAddField(DataTypePtr type_res, const Field & f, Array & arr) +{ + if (typeid_cast(type_res.get())) + { + arr.push_back(apply_visitor(FieldVisitorConvertToNumber(), f)); + return true; + } + return false; +} + +} + +bool FunctionArray::addField(DataTypePtr type_res, const Field & f, Array & arr) const +{ + /// Иначе необходимо + if ( tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) + || tryAddField(type_res, f, arr) ) + return true; + else + { + if (is_case_mode) + throw Exception{"Illegal type encountered while processing the CASE construction.", + ErrorCodes::LOGICAL_ERROR}; + else + throw Exception{"Illegal result type " + type_res->getName() + " of function " + getName(), + ErrorCodes::LOGICAL_ERROR}; + } +} + +const DataTypePtr & FunctionArray::getScalarType(const DataTypePtr & type) +{ + const auto array = typeid_cast(type.get()); + + if (!array) + return type; + + return getScalarType(array->getNestedType()); +} + +DataTypeTraits::EnrichedDataTypePtr FunctionArray::getLeastCommonType(const DataTypes & arguments) const +{ + DataTypeTraits::EnrichedDataTypePtr result_type; + + try + { + result_type = Conditional::getArrayType(arguments); + } + catch (const Conditional::CondException & ex) + { + /// Translate a context-free error into a contextual error. + if (is_case_mode) + { + if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_ILLEGAL_COLUMN_TYPE) + throw Exception{"Illegal type of column " + ex.getMsg1() + + " in CASE construction", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + else if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_UPSCALING_ERROR) + throw Exception{"THEN/ELSE clause parameters in CASE construction are not upscalable to a " + "common type without loss of precision: " + ex.getMsg1(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + else + throw Exception{"An unexpected error has occurred in CASE expression", + ErrorCodes::LOGICAL_ERROR}; + } + else + { + if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_ILLEGAL_COLUMN_TYPE) + throw Exception{"Illegal type of column " + ex.getMsg1() + + " in array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + else if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_UPSCALING_ERROR) + throw Exception("Arguments of function " + getName() + " are not upscalable " + "to a common type without loss of precision.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + else + throw Exception{"An unexpected error has occurred in function " + getName(), + ErrorCodes::LOGICAL_ERROR}; + } + } + + return result_type; +} + +void FunctionArray::setCaseMode() +{ + is_case_mode = true; +} + +DataTypePtr FunctionArray::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.empty()) + { + if (is_case_mode) + throw Exception{"Either WHEN clauses or THEN clauses are missing " + "in the CASE construction.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + else + throw Exception{"Function array requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + } + + DataTypePtr result_type = arguments[0]; + + if (result_type->behavesAsNumber()) + { + /// Если тип числовой, пробуем выделить наименьший общий тип + auto enriched_result_type = getLeastCommonType(arguments); + return std::make_shared(enriched_result_type); + } + else + { + /// Иначе все аргументы должны быть одинаковыми + for (size_t i = 1, size = arguments.size(); i < size; ++i) + { + if (arguments[i]->getName() != arguments[0]->getName()) + { + if (is_case_mode) + throw Exception{"Found type discrepancy in either WHEN " + "clauses or THEN clauses of the CASE construction", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + else + throw Exception{"Arguments for function array must have same type or behave as number.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + } + } + + return std::make_shared(result_type); + } +} + +void FunctionArray::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + size_t num_elements = arguments.size(); + bool is_const = true; + + for (const auto arg_num : arguments) + { + if (!block.getByPosition(arg_num).column->isConst()) + { + is_const = false; + break; + } + } + + const auto first_arg = block.getByPosition(arguments[0]); + DataTypePtr result_type = first_arg.type; + DataTypeTraits::EnrichedDataTypePtr enriched_result_type; + if (result_type->behavesAsNumber()) + { + /// If type is numeric, calculate least common type. + DataTypes types; + types.reserve(num_elements); + + for (const auto & argument : arguments) + types.push_back(block.getByPosition(argument).type); + + enriched_result_type = getLeastCommonType(types); + result_type = enriched_result_type.first; + } + + if (is_const) + { + Array arr; + for (const auto arg_num : arguments) + if (block.getByPosition(arg_num).type->getName() == result_type->getName()) + /// Если элемент такого же типа как результат, просто добавляем его в ответ + arr.push_back((*block.getByPosition(arg_num).column)[0]); + else + /// Иначе необходимо привести его к типу результата + addField(result_type, (*block.getByPosition(arg_num).column)[0], arr); + + block.getByPosition(result).column = std::make_shared( + first_arg.column->size(), arr, std::make_shared(result_type)); + } + else + { + size_t block_size = block.rowsInFirstColumn(); + + /** If part of columns have not same type as common type of all elements of array, + * then convert them to common type. + * If part of columns are constants, + * then convert them to full columns. + */ + + Columns columns_holder(num_elements); + const IColumn * columns[num_elements]; + + for (size_t i = 0; i < num_elements; ++i) + { + const auto & arg = block.getByPosition(arguments[i]); + + String result_type_name = result_type->getName(); + ColumnPtr preprocessed_column = arg.column; + + if (arg.type->getName() != result_type_name) + { + Block temporary_block + { + { + arg.column, + arg.type, + arg.name + }, + { + std::make_shared(block_size, result_type_name), + std::make_shared(), + "" + }, + { + nullptr, + result_type, + "" + } + }; + + FunctionCast func_cast(context); + + { + DataTypePtr unused_return_type; + ColumnsWithTypeAndName arguments{ temporary_block.unsafeGetByPosition(0), temporary_block.unsafeGetByPosition(1) }; + std::vector unused_prerequisites; + + /// Prepares function to execution. TODO It is not obvious. + func_cast.getReturnTypeAndPrerequisites(arguments, unused_return_type, unused_prerequisites); + } + + func_cast.execute(temporary_block, {0, 1}, 2); + preprocessed_column = temporary_block.unsafeGetByPosition(2).column; + } + + if (auto materialized_column = preprocessed_column->convertToFullColumnIfConst()) + preprocessed_column = materialized_column; + + columns_holder[i] = std::move(preprocessed_column); + columns[i] = columns_holder[i].get(); + } + + /** Create and fill the result array. + */ + + auto out = std::make_shared(result_type->createColumn()); + IColumn & out_data = out->getData(); + IColumn::Offsets_t & out_offsets = out->getOffsets(); + + out_data.reserve(block_size * num_elements); + out_offsets.resize(block_size); + + IColumn::Offset_t current_offset = 0; + for (size_t i = 0; i < block_size; ++i) + { + for (size_t j = 0; j < num_elements; ++j) + out_data.insertFrom(*columns[j], i); + + current_offset += num_elements; + out_offsets[i] = current_offset; + } + + block.getByPosition(result).column = out; + } +} + +/// Implementation of FunctionArrayElement. + +namespace +{ + +template +struct ArrayElementNumImpl +{ + /** Implementation for constant index. + * If negative = false - index is from beginning of array, started from 1. + * If negative = true - index is from end of array, started from -1. + */ + template + static void vectorConst( + const PaddedPODArray & data, const ColumnArray::Offsets_t & offsets, + const ColumnArray::Offset_t index, + PaddedPODArray & result) + { + size_t size = offsets.size(); + result.resize(size); + + ColumnArray::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + + if (index < array_size) + result[i] = !negative ? data[current_offset + index] : data[offsets[i] - index - 1]; + else + result[i] = T(); + + current_offset = offsets[i]; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const PaddedPODArray & data, const ColumnArray::Offsets_t & offsets, + const PaddedPODArray & indices, + PaddedPODArray & result) + { + size_t size = offsets.size(); + result.resize(size); + + ColumnArray::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + result[i] = data[current_offset + index - 1]; + else if (index < 0 && static_cast(-index) <= array_size) + result[i] = data[offsets[i] + index]; + else + result[i] = T(); + + current_offset = offsets[i]; + } + } +}; + +struct ArrayElementStringImpl +{ + /** Implementation for constant index. + * If negative = false - index is from beginning of array, started from 1. + * If negative = true - index is from end of array, started from -1. + */ + template + static void vectorConst( + const ColumnString::Chars_t & data, const ColumnArray::Offsets_t & offsets, const ColumnString::Offsets_t & string_offsets, + const ColumnArray::Offset_t index, + ColumnString::Chars_t & result_data, ColumnArray::Offsets_t & result_offsets) + { + size_t size = offsets.size(); + result_offsets.resize(size); + result_data.reserve(data.size()); + + ColumnArray::Offset_t current_offset = 0; + ColumnArray::Offset_t current_result_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + + if (index < array_size) + { + size_t adjusted_index = !negative ? index : (array_size - index - 1); + + ColumnArray::Offset_t string_pos = current_offset == 0 && adjusted_index == 0 + ? 0 + : string_offsets[current_offset + adjusted_index - 1]; + + ColumnArray::Offset_t string_size = string_offsets[current_offset + adjusted_index] - string_pos; + + result_data.resize(current_result_offset + string_size); + memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size); + current_result_offset += string_size; + result_offsets[i] = current_result_offset; + } + else + { + /// Вставим пустую строку. + result_data.resize(current_result_offset + 1); + result_data[current_result_offset] = 0; + current_result_offset += 1; + result_offsets[i] = current_result_offset; + } + + current_offset = offsets[i]; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const ColumnString::Chars_t & data, const ColumnArray::Offsets_t & offsets, const ColumnString::Offsets_t & string_offsets, + const PaddedPODArray & indices, + ColumnString::Chars_t & result_data, ColumnArray::Offsets_t & result_offsets) + { + size_t size = offsets.size(); + result_offsets.resize(size); + result_data.reserve(data.size()); + + ColumnArray::Offset_t current_offset = 0; + ColumnArray::Offset_t current_result_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + size_t adjusted_index; /// index in array from zero + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + adjusted_index = index - 1; + else if (index < 0 && static_cast(-index) <= array_size) + adjusted_index = array_size + index; + else + adjusted_index = array_size; /// means no element should be taken + + if (adjusted_index < array_size) + { + ColumnArray::Offset_t string_pos = current_offset == 0 && adjusted_index == 0 + ? 0 + : string_offsets[current_offset + adjusted_index - 1]; + + ColumnArray::Offset_t string_size = string_offsets[current_offset + adjusted_index] - string_pos; + + result_data.resize(current_result_offset + string_size); + memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size); + current_result_offset += string_size; + result_offsets[i] = current_result_offset; + } + else + { + /// Insert empty string + result_data.resize(current_result_offset + 1); + result_data[current_result_offset] = 0; + current_result_offset += 1; + result_offsets[i] = current_result_offset; + } + + current_offset = offsets[i]; + } + } +}; + +/// Generic implementation for other nested types. +struct ArrayElementGenericImpl +{ + /** Implementation for constant index. + * If negative = false - index is from beginning of array, started from 1. + * If negative = true - index is from end of array, started from -1. + */ + template + static void vectorConst( + const IColumn & data, const ColumnArray::Offsets_t & offsets, + const ColumnArray::Offset_t index, + IColumn & result) + { + size_t size = offsets.size(); + result.reserve(size); + + ColumnArray::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + + if (index < array_size) + result.insertFrom(data, !negative ? current_offset + index : offsets[i] - index - 1); + else + result.insertDefault(); + + current_offset = offsets[i]; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const IColumn & data, const ColumnArray::Offsets_t & offsets, + const PaddedPODArray & indices, + IColumn & result) + { + size_t size = offsets.size(); + result.reserve(size); + + ColumnArray::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + result.insertFrom(data, current_offset + index - 1); + else if (index < 0 && static_cast(-index) <= array_size) + result.insertFrom(data, offsets[i] + index); + else + result.insertDefault(); + + current_offset = offsets[i]; + } + } +}; + +} + + +FunctionPtr FunctionArrayElement::create(const Context & context) +{ + return std::make_shared(); +} + + +template +bool FunctionArrayElement::executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) +{ + const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const ColumnVector * col_nested = typeid_cast *>(&col_array->getData()); + + if (!col_nested) + return false; + + auto col_res = std::make_shared>(); + block.getByPosition(result).column = col_res; + + if (index.getType() == Field::Types::UInt64) + ArrayElementNumImpl::template vectorConst( + col_nested->getData(), col_array->getOffsets(), safeGet(index) - 1, col_res->getData()); + else if (index.getType() == Field::Types::Int64) + ArrayElementNumImpl::template vectorConst( + col_nested->getData(), col_array->getOffsets(), -safeGet(index) - 1, col_res->getData()); + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + + return true; +} + +template +bool FunctionArrayElement::executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) +{ + const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const ColumnVector * col_nested = typeid_cast *>(&col_array->getData()); + + if (!col_nested) + return false; + + auto col_res = std::make_shared>(); + block.getByPosition(result).column = col_res; + + ArrayElementNumImpl::template vector( + col_nested->getData(), col_array->getOffsets(), indices, col_res->getData()); + + return true; +} + +bool FunctionArrayElement::executeStringConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) +{ + const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const ColumnString * col_nested = typeid_cast(&col_array->getData()); + + if (!col_nested) + return false; + + std::shared_ptr col_res = std::make_shared(); + block.getByPosition(result).column = col_res; + + if (index.getType() == Field::Types::UInt64) + ArrayElementStringImpl::vectorConst( + col_nested->getChars(), + col_array->getOffsets(), + col_nested->getOffsets(), + safeGet(index) - 1, + col_res->getChars(), + col_res->getOffsets()); + else if (index.getType() == Field::Types::Int64) + ArrayElementStringImpl::vectorConst( + col_nested->getChars(), + col_array->getOffsets(), + col_nested->getOffsets(), + -safeGet(index) - 1, + col_res->getChars(), + col_res->getOffsets()); + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + + return true; +} + +template +bool FunctionArrayElement::executeString(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) +{ + const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const ColumnString * col_nested = typeid_cast(&col_array->getData()); + + if (!col_nested) + return false; + + std::shared_ptr col_res = std::make_shared(); + block.getByPosition(result).column = col_res; + + ArrayElementStringImpl::vector( + col_nested->getChars(), + col_array->getOffsets(), + col_nested->getOffsets(), + indices, + col_res->getChars(), + col_res->getOffsets()); + + return true; +} + +bool FunctionArrayElement::executeGenericConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) +{ + const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const auto & col_nested = col_array->getData(); + auto col_res = col_nested.cloneEmpty(); + block.getByPosition(result).column = col_res; + + if (index.getType() == Field::Types::UInt64) + ArrayElementGenericImpl::vectorConst( + col_nested, col_array->getOffsets(), safeGet(index) - 1, *col_res); + else if (index.getType() == Field::Types::Int64) + ArrayElementGenericImpl::vectorConst( + col_nested, col_array->getOffsets(), -safeGet(index) - 1, *col_res); + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + + return true; +} + +template +bool FunctionArrayElement::executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) +{ + const ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const auto & col_nested = col_array->getData(); + auto col_res = col_nested.cloneEmpty(); + block.getByPosition(result).column = col_res; + + ArrayElementGenericImpl::vector( + col_nested, col_array->getOffsets(), indices, *col_res); + + return true; +} + +bool FunctionArrayElement::executeConstConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) +{ + const ColumnConstArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const DB::Array & array = col_array->getData(); + size_t array_size = array.size(); + size_t real_index = 0; + + if (index.getType() == Field::Types::UInt64) + real_index = safeGet(index) - 1; + else if (index.getType() == Field::Types::Int64) + real_index = array_size + safeGet(index); + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + + Field value = col_array->getData().at(real_index); + + block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn( + block.rowsInFirstColumn(), + value); + + return true; +} + +template +bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray & indices) +{ + const ColumnConstArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + const DB::Array & array = col_array->getData(); + size_t array_size = array.size(); + + block.getByPosition(result).column = block.getByPosition(result).type->createColumn(); + + for (size_t i = 0; i < col_array->size(); ++i) + { + IndexType index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + block.getByPosition(result).column->insert(array[index - 1]); + else if (index < 0 && static_cast(-index) <= array_size) + block.getByPosition(result).column->insert(array[array_size + index]); + else + block.getByPosition(result).column->insertDefault(); + } + + return true; +} + +template +bool FunctionArrayElement::executeArgument(Block & block, const ColumnNumbers & arguments, size_t result) +{ + auto index = typeid_cast *>(block.getByPosition(arguments[1]).column.get()); + + if (!index) + return false; + + const auto & index_data = index->getData(); + + if (!( executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeNumber (block, arguments, result, index_data) + || executeConst (block, arguments, result, index_data) + || executeString (block, arguments, result, index_data) + || executeGeneric (block, arguments, result, index_data))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + return true; +} + +bool FunctionArrayElement::executeTuple(Block & block, const ColumnNumbers & arguments, size_t result) +{ + ColumnArray * col_array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + + if (!col_array) + return false; + + ColumnTuple * col_nested = typeid_cast(&col_array->getData()); + + if (!col_nested) + return false; + + Block & tuple_block = col_nested->getData(); + size_t tuple_size = tuple_block.columns(); + + /** Будем вычислять функцию для кортежа внутренностей массива. + * Для этого создадим временный блок. + * Он будет состоять из следующих столбцов: + * - индекс массива, который нужно взять; + * - массив из первых элементов кортежей; + * - результат взятия элементов по индексу для массива из первых элементов кортежей; + * - массив из вторых элементов кортежей; + * - результат взятия элементов по индексу для массива из вторых элементов кортежей; + * ... + */ + Block block_of_temporary_results; + block_of_temporary_results.insert(block.getByPosition(arguments[1])); + + /// результаты взятия элементов по индексу для массивов из каждых элементов кортежей; + Block result_tuple_block; + + for (size_t i = 0; i < tuple_size; ++i) + { + ColumnWithTypeAndName array_of_tuple_section; + array_of_tuple_section.column = std::make_shared( + tuple_block.getByPosition(i).column, col_array->getOffsetsColumn()); + array_of_tuple_section.type = std::make_shared( + tuple_block.getByPosition(i).type); + block_of_temporary_results.insert(array_of_tuple_section); + + ColumnWithTypeAndName array_elements_of_tuple_section; + block_of_temporary_results.insert(array_elements_of_tuple_section); + + executeImpl(block_of_temporary_results, ColumnNumbers{i * 2 + 1, 0}, i * 2 + 2); + + result_tuple_block.insert(block_of_temporary_results.getByPosition(i * 2 + 2)); + } + + auto col_res = std::make_shared(result_tuple_block); + block.getByPosition(result).column = col_res; + + return true; +} + +String FunctionArrayElement::getName() const +{ + return name; +} + +DataTypePtr FunctionArrayElement::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeArray * array_type = typeid_cast(arguments[0].get()); + if (!array_type) + throw Exception("First argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!arguments[1]->isNumeric() + || (!startsWith(arguments[1]->getName(), "UInt") && !startsWith(arguments[1]->getName(), "Int"))) + throw Exception("Second argument for function " + getName() + " must have UInt or Int type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return array_type->getNestedType(); +} + +/// Выполнить функцию над блоком. +void FunctionArrayElement::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (executeTuple(block, arguments, result)) + { + } + else if (!block.getByPosition(arguments[1]).column->isConst()) + { + if (!( executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result))) + throw Exception("Second argument for function " + getName() + " must must have UInt or Int type.", + ErrorCodes::ILLEGAL_COLUMN); + } + else + { + Field index = (*block.getByPosition(arguments[1]).column)[0]; + + if (index == UInt64(0)) + throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + + if (!( executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeConstConst (block, arguments, result, index) + || executeStringConst (block, arguments, result, index) + || executeGenericConst (block, arguments, result, index))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +} + +/// Implementation of FunctionArrayEnumerate. + +FunctionPtr FunctionArrayEnumerate::create(const Context & context) +{ + return std::make_shared(); +} + +String FunctionArrayEnumerate::getName() const +{ + return name; +} + +DataTypePtr FunctionArrayEnumerate::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeArray * array_type = typeid_cast(arguments[0].get()); + if (!array_type) + throw Exception("First argument for function " + getName() + " must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(std::make_shared()); +} + +void FunctionArrayEnumerate::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get())) + { + const ColumnArray::Offsets_t & offsets = array->getOffsets(); + + auto res_nested = std::make_shared(); + auto res_array = std::make_shared(res_nested, array->getOffsetsColumn()); + block.getByPosition(result).column = res_array; + + ColumnUInt32::Container_t & res_values = res_nested->getData(); + res_values.resize(array->getData().size()); + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + res_values[j] = j - prev_off + 1; + } + prev_off = off; + } + } + else if (const ColumnConstArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get())) + { + const Array & values = array->getData(); + + Array res_values(values.size()); + for (size_t i = 0; i < values.size(); ++i) + { + res_values[i] = i + 1; + } + + auto res_array = std::make_shared(array->size(), res_values, std::make_shared(std::make_shared())); + block.getByPosition(result).column = res_array; + } + else + { + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +} + +/// Implementation of FunctionArrayUniq. + +FunctionPtr FunctionArrayUniq::create(const Context & context) { return std::make_shared(); } + +String FunctionArrayUniq::getName() const +{ + return name; +} + +DataTypePtr FunctionArrayUniq::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() == 0) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be at least 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (size_t i = 0; i < arguments.size(); ++i) + { + const DataTypeArray * array_type = typeid_cast(arguments[i].get()); + if (!array_type) + throw Exception("All arguments for function " + getName() + " must be arrays; argument " + toString(i + 1) + " isn't.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(); +} + +void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (arguments.size() == 1 && executeConst(block, arguments, result)) + return; + + Columns array_columns(arguments.size()); + const ColumnArray::Offsets_t * offsets = nullptr; + ConstColumnPlainPtrs data_columns(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + { + ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; + const ColumnArray * array = typeid_cast(array_ptr.get()); + if (!array) + { + const ColumnConstArray * const_array = typeid_cast( + block.getByPosition(arguments[i]).column.get()); + if (!const_array) + throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() + + " of " + toString(i + 1) + "-th argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + array_ptr = const_array->convertToFullColumn(); + array = typeid_cast(array_ptr.get()); + } + array_columns[i] = array_ptr; + const ColumnArray::Offsets_t & offsets_i = array->getOffsets(); + if (!i) + offsets = &offsets_i; + else if (offsets_i != *offsets) + throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + data_columns[i] = &array->getData(); + } + + const ColumnArray * first_array = typeid_cast(array_columns[0].get()); + auto res = std::make_shared(); + block.getByPosition(result).column = res; + + ColumnUInt32::Container_t & res_values = res->getData(); + res_values.resize(offsets->size()); + + if (arguments.size() == 1) + { + if (!( executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeString (first_array, res_values))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + else + { + if (!execute128bit(*offsets, data_columns, res_values)) + executeHashed(*offsets, data_columns, res_values); + } +} + +template +bool FunctionArrayUniq::executeNumber(const ColumnArray * array, ColumnUInt32::Container_t & res_values) +{ + const ColumnVector * nested = typeid_cast *>(&array->getData()); + if (!nested) + return false; + const ColumnArray::Offsets_t & offsets = array->getOffsets(); + const typename ColumnVector::Container_t & values = nested->getData(); + + typedef ClearableHashSet, HashTableGrower, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)> > Set; + + Set set; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + set.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + set.insert(values[j]); + + res_values[i] = set.size(); + prev_off = off; + } + return true; +} + +bool FunctionArrayUniq::executeString(const ColumnArray * array, ColumnUInt32::Container_t & res_values) +{ + const ColumnString * nested = typeid_cast(&array->getData()); + if (!nested) + return false; + const ColumnArray::Offsets_t & offsets = array->getOffsets(); + + typedef ClearableHashSet, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)> > Set; + + Set set; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + set.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + set.insert(nested->getDataAt(j)); + + res_values[i] = set.size(); + prev_off = off; + } + return true; +} + +bool FunctionArrayUniq::executeConst(Block & block, const ColumnNumbers & arguments, size_t result) +{ + const ColumnConstArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + if (!array) + return false; + const Array & values = array->getData(); + + std::set set; + for (size_t i = 0; i < values.size(); ++i) + set.insert(values[i]); + + block.getByPosition(result).column = std::make_shared(array->size(), set.size()); + return true; +} + +bool FunctionArrayUniq::execute128bit( + const ColumnArray::Offsets_t & offsets, + const ConstColumnPlainPtrs & columns, + ColumnUInt32::Container_t & res_values) +{ + size_t count = columns.size(); + size_t keys_bytes = 0; + Sizes key_sizes(count); + for (size_t j = 0; j < count; ++j) + { + if (!columns[j]->isFixed()) + return false; + key_sizes[j] = columns[j]->sizeOfField(); + keys_bytes += key_sizes[j]; + } + if (keys_bytes > 16) + return false; + + typedef ClearableHashSet, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > Set; + + Set set; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + set.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + set.insert(packFixed(j, count, columns, key_sizes)); + + res_values[i] = set.size(); + prev_off = off; + } + + return true; +} + +void FunctionArrayUniq::executeHashed( + const ColumnArray::Offsets_t & offsets, + const ConstColumnPlainPtrs & columns, + ColumnUInt32::Container_t & res_values) +{ + size_t count = columns.size(); + + typedef ClearableHashSet, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > Set; + + Set set; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + set.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + set.insert(hash128(j, count, columns)); + + res_values[i] = set.size(); + prev_off = off; + } +} + +/// Implementation of FunctionArrayEnumerateUniq. + +FunctionPtr FunctionArrayEnumerateUniq::create(const Context & context) +{ + return std::make_shared(); +} + +/// Получить имя функции. +String FunctionArrayEnumerateUniq::getName() const +{ + return name; +} + +DataTypePtr FunctionArrayEnumerateUniq::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() == 0) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be at least 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (size_t i = 0; i < arguments.size(); ++i) + { + const DataTypeArray * array_type = typeid_cast(arguments[i].get()); + if (!array_type) + throw Exception("All arguments for function " + getName() + " must be arrays; argument " + toString(i + 1) + " isn't.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(std::make_shared()); +} + +void FunctionArrayEnumerateUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (arguments.size() == 1 && executeConst(block, arguments, result)) + return; + + Columns array_columns(arguments.size()); + const ColumnArray::Offsets_t * offsets = nullptr; + ConstColumnPlainPtrs data_columns(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + { + ColumnPtr array_ptr = block.getByPosition(arguments[i]).column; + const ColumnArray * array = typeid_cast(array_ptr.get()); + if (!array) + { + const ColumnConstArray * const_array = typeid_cast( + block.getByPosition(arguments[i]).column.get()); + if (!const_array) + throw Exception("Illegal column " + block.getByPosition(arguments[i]).column->getName() + + " of " + toString(i + 1) + "-th argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + array_ptr = const_array->convertToFullColumn(); + array = typeid_cast(array_ptr.get()); + } + array_columns[i] = array_ptr; + const ColumnArray::Offsets_t & offsets_i = array->getOffsets(); + if (!i) + offsets = &offsets_i; + else if (offsets_i != *offsets) + throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + data_columns[i] = &array->getData(); + } + + const ColumnArray * first_array = typeid_cast(array_columns[0].get()); + auto res_nested = std::make_shared(); + auto res_array = std::make_shared(res_nested, first_array->getOffsetsColumn()); + block.getByPosition(result).column = res_array; + + ColumnUInt32::Container_t & res_values = res_nested->getData(); + if (!offsets->empty()) + res_values.resize(offsets->back()); + + if (arguments.size() == 1) + { + if (!( executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeNumber (first_array, res_values) + || executeString (first_array, res_values))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + else + { + if (!execute128bit(*offsets, data_columns, res_values)) + executeHashed(*offsets, data_columns, res_values); + } +} + +template +bool FunctionArrayEnumerateUniq::executeNumber(const ColumnArray * array, ColumnUInt32::Container_t & res_values) +{ + const ColumnVector * nested = typeid_cast *>(&array->getData()); + if (!nested) + return false; + const ColumnArray::Offsets_t & offsets = array->getOffsets(); + const typename ColumnVector::Container_t & values = nested->getData(); + + typedef ClearableHashMap, HashTableGrower, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(T)> > ValuesToIndices; + + ValuesToIndices indices; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + res_values[j] = ++indices[values[j]]; + } + prev_off = off; + } + return true; +} + +bool FunctionArrayEnumerateUniq::executeString(const ColumnArray * array, ColumnUInt32::Container_t & res_values) +{ + const ColumnString * nested = typeid_cast(&array->getData()); + if (!nested) + return false; + const ColumnArray::Offsets_t & offsets = array->getOffsets(); + + size_t prev_off = 0; + typedef ClearableHashMap, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(StringRef)> > ValuesToIndices; + + ValuesToIndices indices; + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + res_values[j] = ++indices[nested->getDataAt(j)]; + } + prev_off = off; + } + return true; +} + +bool FunctionArrayEnumerateUniq::executeConst(Block & block, const ColumnNumbers & arguments, size_t result) +{ + const ColumnConstArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + if (!array) + return false; + const Array & values = array->getData(); + + Array res_values(values.size()); + std::map indices; + for (size_t i = 0; i < values.size(); ++i) + { + res_values[i] = static_cast(++indices[values[i]]); + } + + auto res_array = std::make_shared(array->size(), res_values, std::make_shared(std::make_shared())); + block.getByPosition(result).column = res_array; + + return true; +} + +bool FunctionArrayEnumerateUniq::execute128bit( + const ColumnArray::Offsets_t & offsets, + const ConstColumnPlainPtrs & columns, + ColumnUInt32::Container_t & res_values) +{ + size_t count = columns.size(); + size_t keys_bytes = 0; + Sizes key_sizes(count); + for (size_t j = 0; j < count; ++j) + { + if (!columns[j]->isFixed()) + return false; + key_sizes[j] = columns[j]->sizeOfField(); + keys_bytes += key_sizes[j]; + } + if (keys_bytes > 16) + return false; + + typedef ClearableHashMap, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > ValuesToIndices; + + ValuesToIndices indices; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; + } + prev_off = off; + } + + return true; +} + +void FunctionArrayEnumerateUniq::executeHashed( + const ColumnArray::Offsets_t & offsets, + const ConstColumnPlainPtrs & columns, + ColumnUInt32::Container_t & res_values) +{ + size_t count = columns.size(); + + typedef ClearableHashMap, + HashTableAllocatorWithStackMemory<(1 << INITIAL_SIZE_DEGREE) * sizeof(UInt128)> > ValuesToIndices; + + ValuesToIndices indices; + size_t prev_off = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + res_values[j] = ++indices[hash128(j, count, columns)]; + } + prev_off = off; + } +} + +/// Implementation of FunctionEmptyArrayToSingle. + +FunctionPtr FunctionEmptyArrayToSingle::create(const Context & context) { return std::make_shared(); } + +String FunctionEmptyArrayToSingle::getName() const +{ + return name; +} + +DataTypePtr FunctionEmptyArrayToSingle::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeArray * array_type = typeid_cast(arguments[0].get()); + if (!array_type) + throw Exception("Argument for function " + getName() + " must be array.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return arguments[0]->clone(); +} + +void FunctionEmptyArrayToSingle::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (executeConst(block, arguments, result)) + return; + + const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + if (!array) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + ColumnPtr res_ptr = array->cloneEmpty(); + block.getByPosition(result).column = res_ptr; + ColumnArray & res = static_cast(*res_ptr); + + const IColumn & src_data = array->getData(); + const ColumnArray::Offsets_t & src_offsets = array->getOffsets(); + IColumn & res_data = res.getData(); + ColumnArray::Offsets_t & res_offsets = res.getOffsets(); + + if (!( executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeString (src_data, src_offsets, res_data, res_offsets) + || executeFixedString (src_data, src_offsets, res_data, res_offsets))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); +} + +bool FunctionEmptyArrayToSingle::executeConst(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (const ColumnConstArray * const_array = typeid_cast(block.getByPosition(arguments[0]).column.get())) + { + if (const_array->getData().empty()) + { + auto nested_type = typeid_cast(*block.getByPosition(arguments[0]).type).getNestedType(); + + block.getByPosition(result).column = std::make_shared( + block.rowsInFirstColumn(), + Array{nested_type->getDefault()}, + nested_type->clone()); + } + else + block.getByPosition(result).column = block.getByPosition(arguments[0]).column; + + return true; + } + else + return false; +} + +template +bool FunctionEmptyArrayToSingle::executeNumber( + const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, + IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets) +{ + if (const ColumnVector * src_data_concrete = typeid_cast *>(&src_data)) + { + const PaddedPODArray & src_data = src_data_concrete->getData(); + PaddedPODArray & res_data = typeid_cast &>(res_data_col).getData(); + size_t size = src_offsets.size(); + res_offsets.resize(size); + res_data.reserve(src_data.size()); + + ColumnArray::Offset_t src_prev_offset = 0; + ColumnArray::Offset_t res_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_offsets[i] != src_prev_offset) + { + size_t size_to_write = src_offsets[i] - src_prev_offset; + size_t prev_res_data_size = res_data.size(); + res_data.resize(prev_res_data_size + size_to_write); + memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset], size_to_write * sizeof(T)); + res_prev_offset += size_to_write; + res_offsets[i] = res_prev_offset; + } + else + { + res_data.push_back(T()); + ++res_prev_offset; + res_offsets[i] = res_prev_offset; + } + + src_prev_offset = src_offsets[i]; + } + + return true; + } + else + return false; +} + +bool FunctionEmptyArrayToSingle::executeFixedString( + const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, + IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets) +{ + if (const ColumnFixedString * src_data_concrete = typeid_cast(&src_data)) + { + const size_t n = src_data_concrete->getN(); + const ColumnFixedString::Chars_t & src_data = src_data_concrete->getChars(); + ColumnFixedString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); + size_t size = src_offsets.size(); + res_offsets.resize(size); + res_data.reserve(src_data.size()); + + ColumnArray::Offset_t src_prev_offset = 0; + ColumnArray::Offset_t res_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_offsets[i] != src_prev_offset) + { + size_t size_to_write = src_offsets[i] - src_prev_offset; + size_t prev_res_data_size = res_data.size(); + res_data.resize(prev_res_data_size + size_to_write * n); + memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset], size_to_write * n); + res_prev_offset += size_to_write; + res_offsets[i] = res_prev_offset; + } + else + { + size_t prev_res_data_size = res_data.size(); + res_data.resize(prev_res_data_size + n); + memset(&res_data[prev_res_data_size], 0, n); + ++res_prev_offset; + res_offsets[i] = res_prev_offset; + } + + src_prev_offset = src_offsets[i]; + } + + return true; + } + else + return false; +} + +bool FunctionEmptyArrayToSingle::executeString( + const IColumn & src_data, const ColumnArray::Offsets_t & src_array_offsets, + IColumn & res_data_col, ColumnArray::Offsets_t & res_array_offsets) +{ + if (const ColumnString * src_data_concrete = typeid_cast(&src_data)) + { + const ColumnString::Offsets_t & src_string_offsets = src_data_concrete->getOffsets(); + ColumnString::Offsets_t & res_string_offsets = typeid_cast(res_data_col).getOffsets(); + + const ColumnString::Chars_t & src_data = src_data_concrete->getChars(); + ColumnString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); + + size_t size = src_array_offsets.size(); + res_array_offsets.resize(size); + res_string_offsets.reserve(src_string_offsets.size()); + res_data.reserve(src_data.size()); + + ColumnArray::Offset_t src_array_prev_offset = 0; + ColumnArray::Offset_t res_array_prev_offset = 0; + + ColumnString::Offset_t src_string_prev_offset = 0; + ColumnString::Offset_t res_string_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_array_offsets[i] != src_array_prev_offset) + { + size_t array_size = src_array_offsets[i] - src_array_prev_offset; + + size_t bytes_to_copy = 0; + size_t from_string_prev_offset_local = src_string_prev_offset; + for (size_t j = 0; j < array_size; ++j) + { + size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local; + + res_string_prev_offset += string_size; + res_string_offsets.push_back(res_string_prev_offset); + + from_string_prev_offset_local += string_size; + bytes_to_copy += string_size; + } + + size_t res_data_old_size = res_data.size(); + res_data.resize(res_data_old_size + bytes_to_copy); + memcpy(&res_data[res_data_old_size], &src_data[src_string_prev_offset], bytes_to_copy); + + res_array_prev_offset += array_size; + res_array_offsets[i] = res_array_prev_offset; + } + else + { + res_data.push_back(0); /// Пустая строка, включая ноль на конце. + + ++res_string_prev_offset; + res_string_offsets.push_back(res_string_prev_offset); + + ++res_array_prev_offset; + res_array_offsets[i] = res_array_prev_offset; + } + + src_array_prev_offset = src_array_offsets[i]; + + if (src_array_prev_offset) + src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1]; + } + + return true; + } + else + return false; +} + +/// Implementation of FunctionArrayReverse. + +FunctionPtr FunctionArrayReverse::create(const Context & context) +{ + return std::make_shared(); +} + +String FunctionArrayReverse::getName() const +{ + return name; +} + +DataTypePtr FunctionArrayReverse::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeArray * array_type = typeid_cast(arguments[0].get()); + if (!array_type) + throw Exception("Argument for function " + getName() + " must be array.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return arguments[0]->clone(); +} + +void FunctionArrayReverse::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (executeConst(block, arguments, result)) + return; + + const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + if (!array) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + ColumnPtr res_ptr = array->cloneEmpty(); + block.getByPosition(result).column = res_ptr; + ColumnArray & res = static_cast(*res_ptr); + + const IColumn & src_data = array->getData(); + const ColumnArray::Offsets_t & offsets = array->getOffsets(); + IColumn & res_data = res.getData(); + res.getOffsetsColumn() = array->getOffsetsColumn(); + + if (!( executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeNumber (src_data, offsets, res_data) + || executeString (src_data, offsets, res_data) + || executeFixedString (src_data, offsets, res_data))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); +} + +bool FunctionArrayReverse::executeConst(Block & block, const ColumnNumbers & arguments, size_t result) +{ + if (const ColumnConstArray * const_array = typeid_cast(block.getByPosition(arguments[0]).column.get())) + { + const Array & arr = const_array->getData(); + + size_t size = arr.size(); + Array res(size); + + for (size_t i = 0; i < size; ++i) + res[i] = arr[size - i - 1]; + + block.getByPosition(result).column = std::make_shared( + block.rowsInFirstColumn(), + res, + block.getByPosition(arguments[0]).type->clone()); + + return true; + } + else + return false; +} + +template +bool FunctionArrayReverse::executeNumber( + const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, + IColumn & res_data_col) +{ + if (const ColumnVector * src_data_concrete = typeid_cast *>(&src_data)) + { + const PaddedPODArray & src_data = src_data_concrete->getData(); + PaddedPODArray & res_data = typeid_cast &>(res_data_col).getData(); + size_t size = src_offsets.size(); + res_data.resize(src_data.size()); + + ColumnArray::Offset_t src_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + const T * src = &src_data[src_prev_offset]; + const T * src_end = &src_data[src_offsets[i]]; + + if (src == src_end) + continue; + + T * dst = &res_data[src_offsets[i] - 1]; + + while (src < src_end) + { + *dst = *src; + ++src; + --dst; + } + + src_prev_offset = src_offsets[i]; + } + + return true; + } + else + return false; +} + +bool FunctionArrayReverse::executeFixedString( + const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, + IColumn & res_data_col) +{ + if (const ColumnFixedString * src_data_concrete = typeid_cast(&src_data)) + { + const size_t n = src_data_concrete->getN(); + const ColumnFixedString::Chars_t & src_data = src_data_concrete->getChars(); + ColumnFixedString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); + size_t size = src_offsets.size(); + res_data.resize(src_data.size()); + + ColumnArray::Offset_t src_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + const UInt8 * src = &src_data[src_prev_offset * n]; + const UInt8 * src_end = &src_data[src_offsets[i] * n]; + + if (src == src_end) + continue; + + UInt8 * dst = &res_data[src_offsets[i] * n - n]; + + while (src < src_end) + { + memcpySmallAllowReadWriteOverflow15(dst, src, n); + src += n; + dst -= n; + } + + src_prev_offset = src_offsets[i]; + } + + return true; + } + else + return false; +} + +bool FunctionArrayReverse::executeString( + const IColumn & src_data, const ColumnArray::Offsets_t & src_array_offsets, + IColumn & res_data_col) +{ + if (const ColumnString * src_data_concrete = typeid_cast(&src_data)) + { + const ColumnString::Offsets_t & src_string_offsets = src_data_concrete->getOffsets(); + ColumnString::Offsets_t & res_string_offsets = typeid_cast(res_data_col).getOffsets(); + + const ColumnString::Chars_t & src_data = src_data_concrete->getChars(); + ColumnString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); + + size_t size = src_array_offsets.size(); + res_string_offsets.resize(src_string_offsets.size()); + res_data.resize(src_data.size()); + + ColumnArray::Offset_t src_array_prev_offset = 0; + ColumnString::Offset_t res_string_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_array_offsets[i] != src_array_prev_offset) + { + size_t array_size = src_array_offsets[i] - src_array_prev_offset; + + for (size_t j = 0; j < array_size; ++j) + { + size_t j_reversed = array_size - j - 1; + + auto src_pos = src_array_prev_offset + j_reversed == 0 ? 0 : src_string_offsets[src_array_prev_offset + j_reversed - 1]; + size_t string_size = src_string_offsets[src_array_prev_offset + j_reversed] - src_pos; + + memcpySmallAllowReadWriteOverflow15(&res_data[res_string_prev_offset], &src_data[src_pos], string_size); + + res_string_prev_offset += string_size; + res_string_offsets[src_array_prev_offset + j] = res_string_prev_offset; + } + } + + src_array_prev_offset = src_array_offsets[i]; + } + + return true; + } + else + return false; +} + +/// Implementation of FunctionArrayReduce. + +FunctionPtr FunctionArrayReduce::create(const Context & context) +{ + return std::make_shared(); +} + +String FunctionArrayReduce::getName() const +{ + return name; +} + +void FunctionArrayReduce::getReturnTypeAndPrerequisitesImpl( + const ColumnsWithTypeAndName & arguments, + DataTypePtr & out_return_type, + std::vector & out_prerequisites) +{ + /// Первый аргумент - константная строка с именем агрегатной функции (возможно, с параметрами в скобках, например: "quantile(0.99)"). + + if (arguments.size() < 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be at least 2.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const ColumnConstString * aggregate_function_name_column = typeid_cast(arguments[0].column.get()); + if (!aggregate_function_name_column) + throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + DataTypes argument_types(arguments.size() - 1); + for (size_t i = 1, size = arguments.size(); i < size; ++i) + { + const DataTypeArray * arg = typeid_cast(arguments[i].type.get()); + if (!arg) + throw Exception("Argument " + toString(i) + " for function " + getName() + " must be array.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + argument_types[i - 1] = arg->getNestedType()->clone(); + } + + if (!aggregate_function) + { + const String & aggregate_function_name_with_params = aggregate_function_name_column->getData(); + + if (aggregate_function_name_with_params.empty()) + throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", + ErrorCodes::BAD_ARGUMENTS); + + bool has_parameters = ')' == aggregate_function_name_with_params.back(); + + String aggregate_function_name = aggregate_function_name_with_params; + String parameters; + Array params_row; + + if (has_parameters) + { + size_t pos = aggregate_function_name_with_params.find('('); + if (pos == std::string::npos || pos + 2 >= aggregate_function_name_with_params.size()) + throw Exception("First argument for function " + getName() + " doesn't look like aggregate function name.", + ErrorCodes::BAD_ARGUMENTS); + + aggregate_function_name = aggregate_function_name_with_params.substr(0, pos); + parameters = aggregate_function_name_with_params.substr(pos + 1, aggregate_function_name_with_params.size() - pos - 2); + + if (aggregate_function_name.empty()) + throw Exception("First argument for function " + getName() + " doesn't look like aggregate function name.", + ErrorCodes::BAD_ARGUMENTS); + + ParserExpressionList params_parser(false); + ASTPtr args_ast = parseQuery(params_parser, + parameters.data(), parameters.data() + parameters.size(), + "parameters of aggregate function"); + + ASTExpressionList & args_list = typeid_cast(*args_ast); + + if (args_list.children.empty()) + throw Exception("Incorrect list of parameters to aggregate function " + + aggregate_function_name, ErrorCodes::BAD_ARGUMENTS); + + params_row.reserve(args_list.children.size()); + for (const auto & child : args_list.children) + { + const ASTLiteral * lit = typeid_cast(child.get()); + if (!lit) + throw Exception("Parameters to aggregate functions must be literals", + ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS); + + params_row.push_back(lit->value); + } + } + + aggregate_function = AggregateFunctionFactory().get(aggregate_function_name, argument_types); + + /// Потому что владение состояниями агрегатных функций никуда не отдаётся. + if (aggregate_function->isState()) + throw Exception("Using aggregate function with -State modifier in function arrayReduce is not supported", ErrorCodes::BAD_ARGUMENTS); + + if (has_parameters) + aggregate_function->setParameters(params_row); + aggregate_function->setArguments(argument_types); + } + + out_return_type = aggregate_function->getReturnType(); +} + +void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + IAggregateFunction & agg_func = *aggregate_function.get(); + std::unique_ptr place_holder { new char[agg_func.sizeOfData()] }; + AggregateDataPtr place = place_holder.get(); + + size_t rows = block.rowsInFirstColumn(); + + /// Агрегатные функции не поддерживают константные столбцы. Поэтому, материализуем их. + std::vector materialized_columns; + + std::vector aggregate_arguments_vec(arguments.size() - 1); + + for (size_t i = 0, size = arguments.size() - 1; i < size; ++i) + { + const IColumn * col = block.unsafeGetByPosition(arguments[i + 1]).column.get(); + if (const ColumnArray * arr = typeid_cast(col)) + { + aggregate_arguments_vec[i] = arr->getDataPtr().get(); + } + else if (const ColumnConstArray * arr = typeid_cast(col)) + { + materialized_columns.emplace_back(arr->convertToFullColumn()); + aggregate_arguments_vec[i] = typeid_cast(*materialized_columns.back().get()).getDataPtr().get(); + } + else + throw Exception("Illegal column " + col->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + } + const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); + + const ColumnArray::Offsets_t & offsets = typeid_cast(!materialized_columns.empty() + ? *materialized_columns.front().get() + : *block.unsafeGetByPosition(arguments[1]).column.get()).getOffsets(); + + ColumnPtr result_holder = block.getByPosition(result).type->createColumn(); + block.getByPosition(result).column = result_holder; + IColumn & res_col = *result_holder.get(); + + ColumnArray::Offset_t current_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + agg_func.create(place); + ColumnArray::Offset_t next_offset = offsets[i]; + + try + { + for (size_t j = current_offset; j < next_offset; ++j) + agg_func.add(place, aggregate_arguments, j); + + agg_func.insertResultInto(place, res_col); + } + catch (...) + { + agg_func.destroy(place); + throw; + } + + agg_func.destroy(place); + current_offset = next_offset; + } +} + } diff --git a/dbms/src/Functions/FunctionsConditional.cpp b/dbms/src/Functions/FunctionsConditional.cpp index fb044c2aba2..9aa5b4cf020 100644 --- a/dbms/src/Functions/FunctionsConditional.cpp +++ b/dbms/src/Functions/FunctionsConditional.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 090fbe2edbd..7eef5cf0e15 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -11,6 +11,11 @@ namespace DB namespace { +/// Suppose a function which has no special support for nullable arguments +/// has been called with arguments, one or more of them being nullable. +/// Then the method below endows the result, which is nullable, with a null +/// byte map that is determined by OR-ing the null byte maps of the nullable +/// arguments. void createNullValuesByteMap(Block & block, const ColumnNumbers & args, size_t result) { ColumnNullable & res_col = static_cast(*block.unsafeGetByPosition(result).column); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 636b3e4c304..e75f7047f4a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -255,7 +255,7 @@ void ExpressionAnalyzer::analyzeAggregation() { NameSet unique_keys; ASTs & group_asts = select_query->group_expression_list->children; - for (ssize_t i = 0; i < group_asts.size(); ++i) + for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { size_t size = group_asts.size(); getRootActions(group_asts[i], true, false, temp_actions); @@ -274,7 +274,7 @@ void ExpressionAnalyzer::analyzeAggregation() /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. if (!aggregate_descriptions.empty() || size > 1) { - if (i + 1 < size) + if (i + 1 < static_cast(size)) group_asts[i] = std::move(group_asts.back()); group_asts.pop_back();