Avoid too slow queries with arrays

This commit is contained in:
Alexey Milovidov 2020-08-15 12:13:52 +03:00
parent d0eeedd322
commit 57b8d3f89b
2 changed files with 28 additions and 4 deletions

View File

@ -31,8 +31,17 @@ namespace ErrorCodes
extern const int PARAMETER_OUT_OF_BOUND;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
extern const int TOO_LARGE_ARRAY_SIZE;
}
/** Obtaining array as Field can be slow for large arrays and consume vast amount of memory.
* Just don't allow to do it.
* You can increase the limit if the following query:
* SELECT range(10000000)
* will take less than 500ms on your machine.
*/
static constexpr size_t max_array_size_as_field = 1000000;
ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column)
: data(std::move(nested_column)), offsets(std::move(offsets_column))
@ -117,6 +126,11 @@ Field ColumnArray::operator[](size_t n) const
{
size_t offset = offsetAt(n);
size_t size = sizeAt(n);
if (size > max_array_size_as_field)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array of size {} is too large to be manipulated as single field, maximum size {}",
size, max_array_size_as_field);
Array res(size);
for (size_t i = 0; i < size; ++i)
@ -130,6 +144,11 @@ void ColumnArray::get(size_t n, Field & res) const
{
size_t offset = offsetAt(n);
size_t size = sizeAt(n);
if (size > max_array_size_as_field)
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array of size {} is too large to be manipulated as single field, maximum size {}",
size, max_array_size_as_field);
res = Array(size);
Array & res_arr = DB::get<Array &>(res);

View File

@ -102,7 +102,8 @@ private:
}
template <typename T>
bool executeConstStartStep(Block & block, const IColumn * end_arg, const T start, const T step, const size_t input_rows_count, const size_t result) const
bool executeConstStartStep(
Block & block, const IColumn * end_arg, const T start, const T step, const size_t input_rows_count, const size_t result) const
{
auto end_column = checkAndGetColumn<ColumnVector<T>>(end_arg);
if (!end_column)
@ -155,7 +156,8 @@ private:
}
template <typename T>
bool executeConstStep(Block & block, const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count, const size_t result) const
bool executeConstStep(
Block & block, const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count, const size_t result) const
{
auto start_column = checkAndGetColumn<ColumnVector<T>>(start_arg);
auto end_column = checkAndGetColumn<ColumnVector<T>>(end_arg);
@ -210,7 +212,8 @@ private:
}
template <typename T>
bool executeConstStart(Block & block, const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count, const size_t result) const
bool executeConstStart(
Block & block, const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count, const size_t result) const
{
auto end_column = checkAndGetColumn<ColumnVector<T>>(end_arg);
auto step_column = checkAndGetColumn<ColumnVector<T>>(step_arg);
@ -265,7 +268,9 @@ private:
}
template <typename T>
bool executeGeneric(Block & block, const IColumn * start_col, const IColumn * end_col, const IColumn * step_col, const size_t input_rows_count, const size_t result) const
bool executeGeneric(
Block & block, const IColumn * start_col, const IColumn * end_col, const IColumn * step_col,
const size_t input_rows_count, const size_t result) const
{
auto start_column = checkAndGetColumn<ColumnVector<T>>(start_col);
auto end_column = checkAndGetColumn<ColumnVector<T>>(end_col);