From e48c3d7b5e1275d401483f23b656e7c7ca21bd7e Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Thu, 17 Apr 2014 18:37:59 +0400 Subject: [PATCH] dbms: array element support non-constant index + test on new functionality [METR-10798] --- dbms/include/DB/Functions/FunctionsArray.h | 297 ++++++++++++++++-- .../0_stateless/00036_array_element.reference | 30 ++ .../0_stateless/00036_array_element.sql | 33 ++ 3 files changed, 329 insertions(+), 31 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00036_array_element.reference create mode 100644 dbms/tests/queries/0_stateless/00036_array_element.sql diff --git a/dbms/include/DB/Functions/FunctionsArray.h b/dbms/include/DB/Functions/FunctionsArray.h index 9edc6f5ca94..8db734bb9ba 100644 --- a/dbms/include/DB/Functions/FunctionsArray.h +++ b/dbms/include/DB/Functions/FunctionsArray.h @@ -186,13 +186,14 @@ public: }; -template +template struct ArrayElementNumImpl { /** Если negative = false - передаётся индекс с начала массива, начиная с нуля. * Если negative = true - передаётся индекс с конца массива, начиная с нуля. */ - static void vector( + template + static void vectorConst( const PODArray & data, const ColumnArray::Offsets_t & offsets, const ColumnArray::Offset_t index, PODArray & result) @@ -213,12 +214,55 @@ struct ArrayElementNumImpl current_offset = offsets[i]; } } + + template + static void vector( + const PODArray & data, const ColumnArray::Offsets_t & offsets, + const ColumnVector & index, + PODArray & result) + { + size_t size = offsets.size(); + result.resize(size); + + ColumnArray::Offset_t current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + + if (index[i].getType() == Field::Types::UInt64) + { + UInt64 cur_id = safeGet(index[i]); + if (cur_id == 0) + throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + else if (cur_id <= array_size) + result[i] = data[current_offset + cur_id - 1]; + else + result[i] = T(); + } + else if (index[i].getType() == Field::Types::Int64) + { + Int64 cur_id = safeGet(index[i]); + if (cur_id == 0) + throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + else if (cur_id > 0 && cur_id <= array_size) + result[i] = data[current_offset + cur_id - 1]; + else if (cur_id < 0 && -cur_id <= array_size) + result[i] = data[offsets[i] + cur_id]; + else + result[i] = T(); + } + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + + current_offset = offsets[i]; + } + } }; -template struct ArrayElementStringImpl { - static void vector( + template + static void vectorConst( const ColumnString::Chars_t & data, const ColumnArray::Offsets_t & offsets, const ColumnString::Offsets_t & string_offsets, const ColumnArray::Offset_t index, ColumnString::Chars_t & result_data, ColumnArray::Offsets_t & result_offsets) @@ -260,6 +304,70 @@ struct ArrayElementStringImpl current_offset = offsets[i]; } } + + template + static void vector( + const ColumnString::Chars_t & data, const ColumnArray::Offsets_t & offsets, const ColumnString::Offsets_t & string_offsets, + const ColumnVector & index, + ColumnString::Chars_t & result_data, ColumnArray::Offsets_t & result_offsets) + { + size_t size = offsets.size(); + result_offsets.resize(size); + result_data.reserve(data.size()); + + ColumnArray::Offset_t current_offset = 0; + ColumnArray::Offset_t current_result_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + size_t adjusted_index; + + if (index[i].getType() == Field::Types::UInt64) + { + UInt64 cur_id = safeGet(index[i]); + if (cur_id == 0) + adjusted_index = array_size; /// Индекс не вписывается в рамки массива, заменяем заведомо слишком большим + else + adjusted_index = cur_id - 1; + } + else if (index[i].getType() == Field::Types::Int64) + { + Int64 cur_id = safeGet(index[i]); + if (cur_id > 0 && cur_id <= array_size) + adjusted_index = cur_id - 1; + else if (cur_id < 0 && -cur_id <= array_size) + adjusted_index = array_size + cur_id; + else + adjusted_index = array_size; /// Индекс не вписывается в рамки массива, заменяем слишком большим + } + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + + if (adjusted_index < array_size) + { + ColumnArray::Offset_t string_pos = current_offset == 0 && adjusted_index == 0 + ? 0 + : string_offsets[current_offset + adjusted_index - 1]; + + ColumnArray::Offset_t string_size = string_offsets[current_offset + adjusted_index] - string_pos; + + result_data.resize(current_result_offset + string_size); + memcpy(&result_data[current_result_offset], &data[string_pos], string_size); + current_result_offset += string_size; + result_offsets[i] = current_result_offset; + } + else + { + /// Вставим пустую строку. + result_data.resize(current_result_offset + 1); + result_data[current_result_offset] = 0; + current_result_offset += 1; + result_offsets[i] = current_result_offset; + } + + current_offset = offsets[i]; + } + } }; @@ -267,7 +375,7 @@ class FunctionArrayElement : public IFunction { private: template - bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) + bool executeNumberConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) { const ColumnArray * col_array = dynamic_cast(&*block.getByPosition(arguments[0]).column); @@ -283,16 +391,37 @@ private: block.getByPosition(result).column = col_res; if (index.getType() == Field::Types::UInt64) - ArrayElementNumImpl::vector(col_nested->getData(), col_array->getOffsets(), safeGet(index) - 1, col_res->getData()); + ArrayElementNumImpl::template vectorConst(col_nested->getData(), col_array->getOffsets(), safeGet(index) - 1, col_res->getData()); else if (index.getType() == Field::Types::Int64) - ArrayElementNumImpl::vector(col_nested->getData(), col_array->getOffsets(), -safeGet(index) - 1, col_res->getData()); + ArrayElementNumImpl::template vectorConst(col_nested->getData(), col_array->getOffsets(), -safeGet(index) - 1, col_res->getData()); else throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); return true; } - bool executeString(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) + template + bool executeNumber(Block & block, const ColumnNumbers & arguments, size_t result, const ColumnVector & index) + { + const ColumnArray * col_array = dynamic_cast(&*block.getByPosition(arguments[0]).column); + + if (!col_array) + return false; + + const ColumnVector * col_nested = dynamic_cast *>(&col_array->getData()); + + if (!col_nested) + return false; + + ColumnVector * col_res = new ColumnVector; + block.getByPosition(result).column = col_res; + + ArrayElementNumImpl::template vector(col_nested->getData(), col_array->getOffsets(), index, col_res->getData()); + + return true; + } + + bool executeStringConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) { const ColumnArray * col_array = dynamic_cast(&*block.getByPosition(arguments[0]).column); @@ -308,7 +437,7 @@ private: block.getByPosition(result).column = col_res; if (index.getType() == Field::Types::UInt64) - ArrayElementStringImpl::vector( + ArrayElementStringImpl::vectorConst( col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), @@ -316,7 +445,7 @@ private: col_res->getChars(), col_res->getOffsets()); else if (index.getType() == Field::Types::Int64) - ArrayElementStringImpl::vector( + ArrayElementStringImpl::vectorConst( col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), @@ -329,7 +458,34 @@ private: return true; } - bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) + template + bool executeString(Block & block, const ColumnNumbers & arguments, size_t result, const ColumnVector & index) + { + const ColumnArray * col_array = dynamic_cast(&*block.getByPosition(arguments[0]).column); + + if (!col_array) + return false; + + const ColumnString * col_nested = dynamic_cast(&col_array->getData()); + + if (!col_nested) + return false; + + ColumnString * col_res = new ColumnString; + block.getByPosition(result).column = col_res; + + ArrayElementStringImpl::vector( + col_nested->getChars(), + col_array->getOffsets(), + col_nested->getOffsets(), + index, + col_res->getChars(), + col_res->getOffsets()); + + return true; + } + + bool executeConstConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index) { const ColumnConstArray * col_array = dynamic_cast(&*block.getByPosition(arguments[0]).column); @@ -356,6 +512,72 @@ private: return true; } + template + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const ColumnVector & index) + { + const ColumnConstArray * col_array = dynamic_cast(&*block.getByPosition(arguments[0]).column); + + if (!col_array) + return false; + + const DB::Array & array = col_array->getData(); + size_t array_size = array.size(); + + block.getByPosition(result).column = block.getByPosition(result).type->createColumn(); + + for (size_t i = 0; i < col_array->size(); ++i) + { + if (index[i].getType() == Field::Types::UInt64) + { + UInt64 cur_id = safeGet(index[i]); + if (cur_id > 0 && cur_id <= array_size) + block.getByPosition(result).column->insert(array[cur_id - 1]); + else + block.getByPosition(result).column->insertDefault(); + } + else if (index[i].getType() == Field::Types::Int64) + { + Int64 cur_id = safeGet(index[i]); + if (cur_id > 0 && cur_id <= array_size) + block.getByPosition(result).column->insert(array[cur_id - 1]); + else if (cur_id < 0 && -cur_id <= array_size) + block.getByPosition(result).column->insert(array[array_size + cur_id]); + else + block.getByPosition(result).column->insertDefault(); + } + else + throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + } + + return true; + } + + template + bool executeArgument(Block & block, const ColumnNumbers & arguments, size_t result) + { + const ColumnVector * index = dynamic_cast *> (&*block.getByPosition(arguments[1]).column); + + if (!index) + return false; + + if (!( executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeNumber (block, arguments, result, *index) + || executeConst (block, arguments, result, *index) + || executeString (block, arguments, result, *index))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + return true; + } + public: /// Получить имя функции. String getName() const @@ -386,28 +608,41 @@ public: void execute(Block & block, const ColumnNumbers & arguments, size_t result) { if (!block.getByPosition(arguments[1]).column->isConst()) - throw Exception("Second argument for function " + getName() + " must be constant.", ErrorCodes::ILLEGAL_COLUMN); + { + if (!( executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result) + || executeArgument (block, arguments, result))) + throw Exception("Second argument for function " + getName() + " must must have UInt or Int type.", + ErrorCodes::ILLEGAL_COLUMN); + } + else + { + Field index = (*block.getByPosition(arguments[1]).column)[0]; - Field index = (*block.getByPosition(arguments[1]).column)[0]; + if (index == UInt64(0)) + throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); - if (index == UInt64(0)) - throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); - - if (!( executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeNumber (block, arguments, result, index) - || executeConst (block, arguments, result, index) - || executeString (block, arguments, result, index))) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + if (!( executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeNumberConst (block, arguments, result, index) + || executeConstConst (block, arguments, result, index) + || executeStringConst (block, arguments, result, index))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } } }; diff --git a/dbms/tests/queries/0_stateless/00036_array_element.reference b/dbms/tests/queries/0_stateless/00036_array_element.reference new file mode 100644 index 00000000000..6033afb6984 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00036_array_element.reference @@ -0,0 +1,30 @@ +12 +0 +13 +11 +0 +12 +0 +11 +0 +Df + +ERT +Ab + +Df + +ABC + +[1,2,3] 2 +[1,2,3] 1 +[1,2,3] 0 +[1,2,3] 3 +[1,2,3] 2 +[1,2,3] 1 +[1,2,3] 0 +[1,2,3] 3 +[1,2,3] 2 +[1,2,3] 3 +[1,2,3] 0 +[1,2,3] 1 diff --git a/dbms/tests/queries/0_stateless/00036_array_element.sql b/dbms/tests/queries/0_stateless/00036_array_element.sql new file mode 100644 index 00000000000..4492e77feed --- /dev/null +++ b/dbms/tests/queries/0_stateless/00036_array_element.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS array_element_test; +CREATE TABLE array_element_test (arr Array(Int32), id Int32) ENGINE = Memory; +insert into array_element_test VALUES ([11,12,13], 2), ([11,12], 3), ([11,12,13], -1), ([11,12], -2), ([11,12], -3); +select arr[id] from array_element_test; + +DROP TABLE IF EXISTS array_element_test; +CREATE TABLE array_element_test (arr Array(Int32), id UInt32) ENGINE = Memory; +insert into array_element_test VALUES ([11,12,13], 2), ([11,12], 3), ([11,12,13], 1), ([11,12], 4); +select arr[id] from array_element_test; + +DROP TABLE IF EXISTS array_element_test; +CREATE TABLE array_element_test (arr Array(String), id Int32) ENGINE = Memory; +insert into array_element_test VALUES (['Abc','Df','Q'], 2), (['Abc','DEFQ'], 3), (['ABC','Q','ERT'], -1), (['Ab','ber'], -2), (['AB','asd'], -3); +select arr[id] from array_element_test; + +DROP TABLE IF EXISTS array_element_test; +CREATE TABLE array_element_test (arr Array(String), id UInt32) ENGINE = Memory; +insert into array_element_test VALUES (['Abc','Df','Q'], 2), (['Abc','DEFQ'], 3), (['ABC','Q','ERT'], 1), (['Ab','ber'], 4); +select arr[id] from array_element_test; + +DROP TABLE IF EXISTS array_element_test; +CREATE TABLE array_element_test (id UInt32) ENGINE = Memory; +insert into array_element_test VALUES (2), (1), (4), (3); +select [1, 2, 3] as arr, arr[id] from array_element_test; + +DROP TABLE IF EXISTS array_element_test; +CREATE TABLE array_element_test (id Int32) ENGINE = Memory; +insert into array_element_test VALUES (-2), (1), (-4), (3), (2), (-1), (4), (-3); +select [1, 2, 3] as arr, arr[id] from array_element_test; + +DROP TABLE IF EXISTS array_element_test; + +