From e789d15948eaec3eaa9a8604e24d2f6ed7b60db5 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 5 Mar 2024 16:06:25 +0800 Subject: [PATCH 1/9] optimize insertmanyfrom of nullable(number) or nullable(string) --- src/Columns/ColumnDecimal.h | 7 +++++++ src/Columns/ColumnNullable.cpp | 8 ++++++++ src/Columns/ColumnNullable.h | 1 + src/Columns/ColumnString.cpp | 21 +++++++++++++++++++++ src/Columns/ColumnString.h | 2 ++ 5 files changed, 39 insertions(+) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 7ca01a8342c..e0ea26744dc 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -56,6 +56,13 @@ public: void shrinkToFit() override { data.shrink_to_fit(); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } + + void insertManyFrom(const IColumn & src, size_t position, size_t length) override + { + ValueType v = assert_cast(src).getData()[position]; + data.resize_fill(data.size() + length, v); + } + void insertData(const char * src, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 1d11827ac97..fa5fdfb8c21 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -231,6 +231,14 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n) getNullMapData().push_back(src_concrete.getNullMapData()[n]); } + +void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnNullable & src_concrete = assert_cast(src); + getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); + getNullMapColumn().insertManyFrom(src_concrete.getNullMapColumn(), position, length); +} + void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n) { getNestedColumn().insertFrom(src, n); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index b4aef8e08fa..ef4bf4fa41b 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,6 +69,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index b9128372cea..f3c7ac1bf0c 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -38,6 +38,27 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } +void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnString & src_concrete = assert_cast(src); + const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; + const size_t src_buf_size + = src_concrete.offsets[position] - src_concrete.offsets[position - 1]; /// -1th index is Ok, see PaddedPODArray. + + const size_t old_size = chars.size(); + const size_t new_size = old_size + src_buf_size * length; + chars.resize(new_size); + + const size_t old_rows = offsets.size(); + offsets.resize(old_rows + length); + + for (size_t current_offset = old_size; current_offset < new_size; current_offset += src_buf_size) + memcpySmallAllowReadWriteOverflow15(&chars[current_offset], src_buf, src_buf_size); + + for (size_t i = 0, current_offset = old_size + src_buf_size; i < length; ++i, current_offset += src_buf_size) + offsets[old_rows + i] = current_offset; +} + MutableColumnPtr ColumnString::cloneResized(size_t to_size) const { diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 04aa1849187..2d1d69ced73 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -160,6 +160,8 @@ public: } } + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void insertData(const char * pos, size_t length) override { const size_t old_size = chars.size(); From a109952960acac12790cffde030062ec60208994 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 5 Mar 2024 22:08:36 +0800 Subject: [PATCH 2/9] dev columnstring --- src/Columns/ColumnArray.cpp | 83 +++++++++++++++++++++++++++++++ src/Columns/ColumnArray.h | 9 ++++ src/Columns/ColumnConst.h | 2 + src/Columns/ColumnFixedString.cpp | 14 ++++++ src/Columns/ColumnFixedString.h | 2 + 5 files changed, 110 insertions(+) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 7b268b80116..b620da81ae8 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -347,6 +347,89 @@ void ColumnArray::insertFrom(const IColumn & src_, size_t n) getOffsets().push_back(getOffsets().back() + size); } +template +void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, size_t length) +{ + using ColVecType = ColumnVectorOrDecimal; + size_t src_size = src.sizeAt(position); + size_t src_offset = src.offsetAt(position); + + const typename ColVecType::Container & src_data = typeid_cast(src.getData()).getData(); + typename ColVecType::Container & data_ref = typeid_cast(getData()).getData(); + size_t old_size = data_ref.size(); + size_t new_size = old_size + src_size * length; + data_ref.resize(new_size); + for (size_t i = 0, offset = old_size; i < length; ++i, offset += src_size) + memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); +} + +void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, size_t length) +{ + size_t src_size = src.sizeAt(position); + size_t src_offset = src.offsetAt(position); + + const auto & src_string = typeid_cast(src.getData()); + const auto & src_chars = src_string.getChars(); + const auto & src_string_offsets = src_string.getOffsets(); + auto & dst_string = typeid_cast(getData()); + auto & dst_chars = dst_string.getChars(); + auto & dst_string_offsets = dst_string.getOffsets(); + + /// Each row may have multiple strings, copy them to dst_chars and update dst_offsets + size_t old_size = dst_string_offsets.size(); + size_t new_size = old_size + src_size * length; + dst_string_offsets.resize(new_size); + size_t dst_string_offset = dst_chars.size(); + for (size_t i = 0; i < length; ++i) + { + for (size_t j = 0; j < src_size; ++j) + { + size_t nested_offset = src_string_offsets[src_offset + j - 1]; + size_t nested_length = src_string_offsets[src_offset + j] - nested_offset; + + dst_string_offset += nested_length; + dst_string_offsets[old_size + i * src_size + j] = dst_string_offset; + } + } + + size_t chars_to_copy = src_string_offsets[src_offset + src_size - 1] - src_string_offsets[src_offset - 1]; + dst_chars.resize(dst_chars.size() + chars_to_copy * length); + for (size_t dst_offset = old_size; dst_offset < new_size; dst_offset += src_size) + memcpy(&dst_chars[dst_string_offsets[dst_offset - 1]], &src_chars[src_string_offsets[src_offset - 1]], chars_to_copy); +} + +void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, size_t length) +{ + +} +void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t position, size_t length) +{ + +} +void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length) +{ + size_t src_size = src.sizeAt(position); + size_t src_offset = src.offsetAt(position); + const auto & src_data = src.getData(); + size_t new_size = data->size() + src_size * length; + data->reserve(new_size); + for (size_t i = 0; i < length; ++i) + data->insertRangeFrom(src_data, src_offset, src_size); +} + +void ColumnArray::insertManyFrom(const IColumn & src_, size_t position, size_t length) +{ + /// First fill offsets + const ColumnArray & src = assert_cast(src_); + size_t src_size = src.sizeAt(position); + auto & offsets_ref = getOffsets(); + size_t old_rows = offsets_ref.size(); + size_t new_rows = old_rows + length; + size_t old_size = offsets_ref.back(); + offsets_ref.resize(new_rows); + for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) + offsets_ref[old_rows + i] = offset; +} void ColumnArray::insertDefault() { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 230d8830265..73d632a38b9 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -88,6 +88,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -213,6 +214,14 @@ private: ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; + /// Specializations for insertManyFrom + template + void insertManyFromNumber(const ColumnArray & src, size_t position, size_t length); + void insertManyFromString(const ColumnArray & src, size_t position, size_t length); + void insertManyFromTuple(const ColumnArray & src, size_t position, size_t length); + void insertManyFromNullable(const ColumnArray & src, size_t position, size_t length); + void insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length); + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const; }; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 990b7189fa3..4a3d40ca0d2 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -150,6 +150,8 @@ public: ++s; } + void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } + void insertDefault() override { ++s; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index e460c84d696..b55f68d4687 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -85,6 +85,20 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } +void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnFixedString & src_concrete = assert_cast(src); + if (n != src_concrete.getN()) + throw Exception(ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH, "Size of FixedString doesn't match"); + + const size_t old_size = chars.size(); + const size_t new_size = old_size + n * length; + chars.resize(new_size); + + for (size_t offset = old_size; offset < new_size; offset += n) + memcpySmallAllowReadWriteOverflow15(&chars[offset], &src_concrete.chars[n * position], n); +} + void ColumnFixedString::insertData(const char * pos, size_t length) { if (length > n) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index f40e1356b27..56d42e8b34e 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -100,6 +100,8 @@ public: void insertFrom(const IColumn & src_, size_t index) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void insertData(const char * pos, size_t length) override; void insertDefault() override From 53c9d4513c4b93ed79df305bb5c36c0cfb43ef79 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 12:16:17 +0800 Subject: [PATCH 3/9] finish dev column array --- src/Columns/ColumnArray.cpp | 132 +++++++++++++++++++++++++++++++++--- src/Columns/ColumnArray.h | 3 + 2 files changed, 125 insertions(+), 10 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index b620da81ae8..aa0d5aa3e50 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int LOGICAL_ERROR; extern const int TOO_LARGE_ARRAY_SIZE; + extern const int ILLEGAL_COLUMN; } /** Obtaining array as Field can be slow for large arrays and consume vast amount of memory. @@ -363,6 +364,19 @@ void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); } +void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t position, size_t length) +{ + const ColumnArray * src_array = typeid_cast(&src.getDataColumn()); + if (!src_array) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot insert from const column of type {} to column of type {}", + src.getDataColumn().getName(), + getName()); + + insertManyFromImpl(*src_array, 0, length, true); +} + void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, size_t length) { size_t src_size = src.sizeAt(position); @@ -400,12 +414,53 @@ void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, size_t length) { + ColumnTuple & tuple = assert_cast(getData()); + const ColumnTuple & src_tuple = assert_cast(src.getData()); + /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. + size_t tuple_size = tuple.tupleSize(); + size_t src_tuple_size = src_tuple.tupleSize(); + if (tuple_size == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple"); + if (tuple_size != src_tuple_size) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); + + Columns temporary_arrays(tuple_size); + Columns src_temporary_arrays(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + { + temporary_arrays[i] = ColumnArray::create(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable()); + src_temporary_arrays[i] = ColumnArray::create(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + assert_cast(*temporary_arrays[i]) + .insertManyFromImpl(assert_cast(*src_temporary_arrays[i]), position, length, false); + } + + Columns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = assert_cast(*temporary_arrays[i]).getDataPtr(); + + getDataPtr() = ColumnTuple::create(std::move(tuple_columns)); } + void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t position, size_t length) { + ColumnNullable & nullable = assert_cast(getData()); + const ColumnNullable & src_nullable = assert_cast(src.getData()); + /// Process nested column without updating array offsets + auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto src_array_of_nested = ColumnArray(src_nullable.getNestedColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + array_of_nested.insertManyFromImpl(src_array_of_nested, position, length, false); + + /// Process null map column without updating array offsets + auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto src_array_of_null_map = ColumnArray(src_nullable.getNullMapColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + array_of_null_map.insertManyFromImpl(src_array_of_null_map, position, length, false); + + /// Update array data + getDataPtr() = ColumnNullable::create(array_of_nested.getDataPtr(), array_of_null_map.getDataPtr()); } + void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length) { size_t src_size = src.sizeAt(position); @@ -419,16 +474,73 @@ void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position void ColumnArray::insertManyFrom(const IColumn & src_, size_t position, size_t length) { - /// First fill offsets - const ColumnArray & src = assert_cast(src_); - size_t src_size = src.sizeAt(position); - auto & offsets_ref = getOffsets(); - size_t old_rows = offsets_ref.size(); - size_t new_rows = old_rows + length; - size_t old_size = offsets_ref.back(); - offsets_ref.resize(new_rows); - for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) - offsets_ref[old_rows + i] = offset; + const ColumnConst * src_const = typeid_cast(&src_); + if (src_const) + return insertManyFromConst(*src_const, position, length); + + const ColumnArray * src_array = typeid_cast(&src_); + if (!src_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert from column of type {} to column of type {}", src_.getName(), getName()); + + return insertManyFromImpl(*src_array, position, length, true); +} + +void ColumnArray::insertManyFromImpl(const ColumnArray & src, size_t position, size_t length, bool update_offsets) +{ + /// First fill offsets if needed + if (update_offsets) + { + size_t src_size = src.sizeAt(position); + auto & offsets_ref = getOffsets(); + size_t old_rows = offsets_ref.size(); + size_t new_rows = old_rows + length; + size_t old_size = offsets_ref.back(); + offsets_ref.resize(new_rows); + for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) + offsets_ref[old_rows + i] = offset; + } + + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNullable(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromTuple(src, position, length); + return insertManyFromGeneric(src, position, length); } void ColumnArray::insertDefault() diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 73d632a38b9..765f86ec552 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -215,6 +215,9 @@ private: ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; /// Specializations for insertManyFrom + void insertManyFromConst(const ColumnConst & src, size_t position, size_t length); + void insertManyFromImpl(const ColumnArray & src, size_t position, size_t length, bool update_offsets = true); + template void insertManyFromNumber(const ColumnArray & src, size_t position, size_t length); void insertManyFromString(const ColumnArray & src, size_t position, size_t length); From 3bf3c7cc708d1a564896d649a1a804b868f89d8d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 12:32:23 +0800 Subject: [PATCH 4/9] finish column map and tuple --- src/Columns/ColumnArray.cpp | 2 +- src/Columns/ColumnMap.cpp | 5 +++++ src/Columns/ColumnMap.h | 1 + src/Columns/ColumnTuple.cpp | 12 ++++++++++++ src/Columns/ColumnTuple.h | 1 + 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index aa0d5aa3e50..5b0df8e9b6b 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -364,7 +364,7 @@ void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); } -void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t position, size_t length) +void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t /*position*/, size_t length) { const ColumnArray * src_array = typeid_cast(&src.getDataColumn()); if (!src_array) diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 995f3103484..57e8ba685b4 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -158,6 +158,11 @@ void ColumnMap::insertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } +void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); +} + void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) { nested->insertRangeFrom( diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 17cd86a3788..60aa69e7bf6 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -67,6 +67,7 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void insertFrom(const IColumn & src_, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 17cc58d92f5..062bdadf9d2 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -185,6 +185,18 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } +void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnTuple & src_tuple = assert_cast(src); + + const size_t tuple_size = columns.size(); + if (src_tuple.columns.size() != tuple_size) + throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple"); + + for (size_t i = 0; i < tuple_size; ++i) + columns[i]->insertManyFrom(*src_tuple.columns[i], position, length); +} + void ColumnTuple::insertDefault() { for (auto & column : columns) diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 610416b8b11..5b626155754 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -60,6 +60,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; From 3005bff23100539dbb71f9623dc3aed9c34a87f6 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 14:43:33 +0800 Subject: [PATCH 5/9] fix building --- src/Columns/ColumnArray.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 5b0df8e9b6b..389b3e97820 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -425,7 +425,7 @@ void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, if (tuple_size != src_tuple_size) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); - Columns temporary_arrays(tuple_size); + MutableColumns temporary_arrays(tuple_size); Columns src_temporary_arrays(tuple_size); for (size_t i = 0; i < tuple_size; ++i) { From 8e413da8f156ab03c875b9525044265cffcc5b83 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 17:32:08 +0800 Subject: [PATCH 6/9] apply opts for string nested in array --- src/Columns/ColumnArray.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 389b3e97820..44b17c89ae1 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -536,6 +536,8 @@ void ColumnArray::insertManyFromImpl(const ColumnArray & src, size_t position, s return insertManyFromNumber(src, position, length); if (typeid_cast *>(data.get())) return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromString(src, position, length); if (typeid_cast(data.get())) return insertManyFromNullable(src, position, length); if (typeid_cast(data.get())) From 930deee699be05398aac334ce9e025d084c68a30 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 7 Mar 2024 22:02:10 +0800 Subject: [PATCH 7/9] fix bugs --- src/Columns/ColumnArray.cpp | 63 ++++++++++++++++++------------------- src/Columns/ColumnArray.h | 2 +- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 44b17c89ae1..0214375122f 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -43,29 +43,34 @@ namespace ErrorCodes static constexpr size_t max_array_size_as_field = 1000000; -ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column) +ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column, bool check_offsets) : data(std::move(nested_column)), offsets(std::move(offsets_column)) { - const ColumnOffsets * offsets_concrete = typeid_cast(offsets.get()); - - if (!offsets_concrete) - throw Exception(ErrorCodes::LOGICAL_ERROR, "offsets_column must be a ColumnUInt64"); - - if (!offsets_concrete->empty() && data && !data->empty()) + if (check_offsets) { - Offset last_offset = offsets_concrete->getData().back(); + const ColumnOffsets * offsets_concrete = typeid_cast(offsets.get()); - /// This will also prevent possible overflow in offset. - if (data->size() != last_offset) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "offsets_column has data inconsistent with nested_column. Data size: {}, last offset: {}", - data->size(), last_offset); + if (!offsets_concrete) + throw Exception(ErrorCodes::LOGICAL_ERROR, "offsets_column must be a ColumnUInt64"); + + if (!offsets_concrete->empty() && data && !data->empty()) + { + Offset last_offset = offsets_concrete->getData().back(); + + /// This will also prevent possible overflow in offset. + if (data->size() != last_offset) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "offsets_column has data inconsistent with nested_column. Data size: {}, last offset: {}", + data->size(), + last_offset); + } + + /** NOTE + * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). + * But in most cases, arrays with constant value are unexpected and code will work wrong. Use with caution. + */ } - - /** NOTE - * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). - * But in most cases, arrays with constant value are unexpected and code will work wrong. Use with caution. - */ } ColumnArray::ColumnArray(MutableColumnPtr && nested_column) @@ -425,20 +430,14 @@ void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, if (tuple_size != src_tuple_size) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); - MutableColumns temporary_arrays(tuple_size); - Columns src_temporary_arrays(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - { - temporary_arrays[i] = ColumnArray::create(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable()); - src_temporary_arrays[i] = ColumnArray::create(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); - assert_cast(*temporary_arrays[i]) - .insertManyFromImpl(assert_cast(*src_temporary_arrays[i]), position, length, false); - } - Columns tuple_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) - tuple_columns[i] = assert_cast(*temporary_arrays[i]).getDataPtr(); - + { + auto array_of_element = ColumnArray(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable(), false); + auto src_array_of_element = ColumnArray(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + array_of_element.insertManyFromImpl(src_array_of_element, position, length, false); + tuple_columns[i] = array_of_element.getDataPtr(); + } getDataPtr() = ColumnTuple::create(std::move(tuple_columns)); } @@ -448,12 +447,12 @@ void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t positio const ColumnNullable & src_nullable = assert_cast(src.getData()); /// Process nested column without updating array offsets - auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable(), false); auto src_array_of_nested = ColumnArray(src_nullable.getNestedColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); array_of_nested.insertManyFromImpl(src_array_of_nested, position, length, false); /// Process null map column without updating array offsets - auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable(), false); auto src_array_of_null_map = ColumnArray(src_nullable.getNullMapColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); array_of_null_map.insertManyFromImpl(src_array_of_null_map, position, length, false); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 765f86ec552..8c4d103e7d0 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -21,7 +21,7 @@ private: friend class COWHelper, ColumnArray>; /** Create an array column with specified values and offsets. */ - ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column); + ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column, bool check_offsets = true); /** Create an empty column of arrays with the type of values as in the column `nested_column` */ explicit ColumnArray(MutableColumnPtr && nested_column); From d2b8afb98b39067d057ce0159d24d5879d284e44 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 14 Mar 2024 18:26:13 +0800 Subject: [PATCH 8/9] add benchmarks for column::insertmanyfrom --- src/Columns/CMakeLists.txt | 4 + src/Columns/benchmarks/CMakeLists.txt | 4 + .../benchmark_column_insert_many_from.cpp | 102 ++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 src/Columns/benchmarks/CMakeLists.txt create mode 100644 src/Columns/benchmarks/benchmark_column_insert_many_from.cpp diff --git a/src/Columns/CMakeLists.txt b/src/Columns/CMakeLists.txt index f676f415eea..1febe4f71d7 100644 --- a/src/Columns/CMakeLists.txt +++ b/src/Columns/CMakeLists.txt @@ -1,3 +1,7 @@ if (ENABLE_EXAMPLES) add_subdirectory (examples) endif () + +if (ENABLE_BENCHMARKS) + add_subdirectory(benchmarks) +endif() diff --git a/src/Columns/benchmarks/CMakeLists.txt b/src/Columns/benchmarks/CMakeLists.txt new file mode 100644 index 00000000000..47f5dfe4c59 --- /dev/null +++ b/src/Columns/benchmarks/CMakeLists.txt @@ -0,0 +1,4 @@ +clickhouse_add_executable(column_insert_many_from benchmark_column_insert_many_from.cpp) +target_link_libraries (column_insert_many_from PRIVATE + ch_contrib::gbenchmark_all + dbms) \ No newline at end of file diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp new file mode 100644 index 00000000000..325cf5559cd --- /dev/null +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +static constexpr size_t ROWS = 65536; + +static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) +{ + const auto * type_array = typeid_cast(type.get()); + if (type_array) + { + auto data_col = mockColumn(type_array->getNestedType(), rows); + auto offset_col = ColumnArray::ColumnOffsets::create(rows); + auto & offsets = offset_col->getData(); + for (size_t i = 0; i < data_col->size(); ++i) + offsets[i] = offsets[i - 1] + (rand() % 10); + auto new_data_col = data_col->replicate(offsets); + + return ColumnArray::create(new_data_col, std::move(offset_col)); + } + + auto type_not_nullable = removeNullable(type); + auto column = type->createColumn(); + for (size_t i = 0; i < rows; ++i) + { + if (i % 100) + column->insertDefault(); + else if (isInt(type_not_nullable)) + column->insert(i); + else if (isFloat(type_not_nullable)) + { + double d = i * 1.0; + column->insert(d); + } + else if (isString(type_not_nullable)) + { + String s = "helloworld"; + column->insert(s); + } + else + column->insertDefault(); + } + return std::move(column); +} + + +static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) +{ + size_t size = src.size(); + dst.insertManyFrom(src, size / 2, size); +} + + +template +static void BM_insertManyFrom(benchmark::State & state) +{ + auto type = DataTypeFactory::instance().get(str_type); + auto src = mockColumn(type, ROWS); + + for (auto _ : state) + { + state.PauseTiming(); + auto dst = type->createColumn(); + dst->reserve(ROWS); + state.ResumeTiming(); + + insertManyFrom(*dst, *src); + benchmark::DoNotOptimize(dst); + } +} + +static const String type_int64 = "Int64"; +static const String type_nullable_int64 = "Nullable(Int64)"; +static const String type_string = "String"; +static const String type_nullable_string = "Nullable(String)"; +static const String type_decimal = "Decimal128(3)"; +static const String type_nullable_decimal = "Nullable(Decimal128(3))"; + +static const String type_array_int64 = "Array(Int64)"; +static const String type_array_nullable_int64 = "Array(Nullable(Int64))"; +static const String type_array_string = "Array(String)"; +static const String type_array_nullable_string = "Array(Nullable(String))"; + +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_int64); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_nullable_int64); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_string); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_nullable_string); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_decimal); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_nullable_decimal); + +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_int64); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_nullable_int64); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_string); +BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_nullable_string); From 00533f3df634c3a96e78fa0732c9375f257ffb5b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 15 Mar 2024 19:43:44 +0800 Subject: [PATCH 9/9] revert opts in column array --- src/Columns/ColumnArray.cpp | 232 +++--------------------------------- src/Columns/ColumnArray.h | 14 +-- 2 files changed, 19 insertions(+), 227 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 0214375122f..7b268b80116 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -31,7 +31,6 @@ namespace ErrorCodes extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int LOGICAL_ERROR; extern const int TOO_LARGE_ARRAY_SIZE; - extern const int ILLEGAL_COLUMN; } /** Obtaining array as Field can be slow for large arrays and consume vast amount of memory. @@ -43,34 +42,29 @@ namespace ErrorCodes static constexpr size_t max_array_size_as_field = 1000000; -ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column, bool check_offsets) +ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column) : data(std::move(nested_column)), offsets(std::move(offsets_column)) { - if (check_offsets) + const ColumnOffsets * offsets_concrete = typeid_cast(offsets.get()); + + if (!offsets_concrete) + throw Exception(ErrorCodes::LOGICAL_ERROR, "offsets_column must be a ColumnUInt64"); + + if (!offsets_concrete->empty() && data && !data->empty()) { - const ColumnOffsets * offsets_concrete = typeid_cast(offsets.get()); + Offset last_offset = offsets_concrete->getData().back(); - if (!offsets_concrete) - throw Exception(ErrorCodes::LOGICAL_ERROR, "offsets_column must be a ColumnUInt64"); - - if (!offsets_concrete->empty() && data && !data->empty()) - { - Offset last_offset = offsets_concrete->getData().back(); - - /// This will also prevent possible overflow in offset. - if (data->size() != last_offset) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "offsets_column has data inconsistent with nested_column. Data size: {}, last offset: {}", - data->size(), - last_offset); - } - - /** NOTE - * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). - * But in most cases, arrays with constant value are unexpected and code will work wrong. Use with caution. - */ + /// This will also prevent possible overflow in offset. + if (data->size() != last_offset) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "offsets_column has data inconsistent with nested_column. Data size: {}, last offset: {}", + data->size(), last_offset); } + + /** NOTE + * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). + * But in most cases, arrays with constant value are unexpected and code will work wrong. Use with caution. + */ } ColumnArray::ColumnArray(MutableColumnPtr && nested_column) @@ -353,196 +347,6 @@ void ColumnArray::insertFrom(const IColumn & src_, size_t n) getOffsets().push_back(getOffsets().back() + size); } -template -void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, size_t length) -{ - using ColVecType = ColumnVectorOrDecimal; - size_t src_size = src.sizeAt(position); - size_t src_offset = src.offsetAt(position); - - const typename ColVecType::Container & src_data = typeid_cast(src.getData()).getData(); - typename ColVecType::Container & data_ref = typeid_cast(getData()).getData(); - size_t old_size = data_ref.size(); - size_t new_size = old_size + src_size * length; - data_ref.resize(new_size); - for (size_t i = 0, offset = old_size; i < length; ++i, offset += src_size) - memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); -} - -void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t /*position*/, size_t length) -{ - const ColumnArray * src_array = typeid_cast(&src.getDataColumn()); - if (!src_array) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Cannot insert from const column of type {} to column of type {}", - src.getDataColumn().getName(), - getName()); - - insertManyFromImpl(*src_array, 0, length, true); -} - -void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, size_t length) -{ - size_t src_size = src.sizeAt(position); - size_t src_offset = src.offsetAt(position); - - const auto & src_string = typeid_cast(src.getData()); - const auto & src_chars = src_string.getChars(); - const auto & src_string_offsets = src_string.getOffsets(); - auto & dst_string = typeid_cast(getData()); - auto & dst_chars = dst_string.getChars(); - auto & dst_string_offsets = dst_string.getOffsets(); - - /// Each row may have multiple strings, copy them to dst_chars and update dst_offsets - size_t old_size = dst_string_offsets.size(); - size_t new_size = old_size + src_size * length; - dst_string_offsets.resize(new_size); - size_t dst_string_offset = dst_chars.size(); - for (size_t i = 0; i < length; ++i) - { - for (size_t j = 0; j < src_size; ++j) - { - size_t nested_offset = src_string_offsets[src_offset + j - 1]; - size_t nested_length = src_string_offsets[src_offset + j] - nested_offset; - - dst_string_offset += nested_length; - dst_string_offsets[old_size + i * src_size + j] = dst_string_offset; - } - } - - size_t chars_to_copy = src_string_offsets[src_offset + src_size - 1] - src_string_offsets[src_offset - 1]; - dst_chars.resize(dst_chars.size() + chars_to_copy * length); - for (size_t dst_offset = old_size; dst_offset < new_size; dst_offset += src_size) - memcpy(&dst_chars[dst_string_offsets[dst_offset - 1]], &src_chars[src_string_offsets[src_offset - 1]], chars_to_copy); -} - -void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, size_t length) -{ - ColumnTuple & tuple = assert_cast(getData()); - const ColumnTuple & src_tuple = assert_cast(src.getData()); - - /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. - size_t tuple_size = tuple.tupleSize(); - size_t src_tuple_size = src_tuple.tupleSize(); - if (tuple_size == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple"); - if (tuple_size != src_tuple_size) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); - - Columns tuple_columns(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - { - auto array_of_element = ColumnArray(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable(), false); - auto src_array_of_element = ColumnArray(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); - array_of_element.insertManyFromImpl(src_array_of_element, position, length, false); - tuple_columns[i] = array_of_element.getDataPtr(); - } - getDataPtr() = ColumnTuple::create(std::move(tuple_columns)); -} - -void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t position, size_t length) -{ - ColumnNullable & nullable = assert_cast(getData()); - const ColumnNullable & src_nullable = assert_cast(src.getData()); - - /// Process nested column without updating array offsets - auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable(), false); - auto src_array_of_nested = ColumnArray(src_nullable.getNestedColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); - array_of_nested.insertManyFromImpl(src_array_of_nested, position, length, false); - - /// Process null map column without updating array offsets - auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable(), false); - auto src_array_of_null_map = ColumnArray(src_nullable.getNullMapColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); - array_of_null_map.insertManyFromImpl(src_array_of_null_map, position, length, false); - - /// Update array data - getDataPtr() = ColumnNullable::create(array_of_nested.getDataPtr(), array_of_null_map.getDataPtr()); -} - -void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length) -{ - size_t src_size = src.sizeAt(position); - size_t src_offset = src.offsetAt(position); - const auto & src_data = src.getData(); - size_t new_size = data->size() + src_size * length; - data->reserve(new_size); - for (size_t i = 0; i < length; ++i) - data->insertRangeFrom(src_data, src_offset, src_size); -} - -void ColumnArray::insertManyFrom(const IColumn & src_, size_t position, size_t length) -{ - const ColumnConst * src_const = typeid_cast(&src_); - if (src_const) - return insertManyFromConst(*src_const, position, length); - - const ColumnArray * src_array = typeid_cast(&src_); - if (!src_array) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert from column of type {} to column of type {}", src_.getName(), getName()); - - return insertManyFromImpl(*src_array, position, length, true); -} - -void ColumnArray::insertManyFromImpl(const ColumnArray & src, size_t position, size_t length, bool update_offsets) -{ - /// First fill offsets if needed - if (update_offsets) - { - size_t src_size = src.sizeAt(position); - auto & offsets_ref = getOffsets(); - size_t old_rows = offsets_ref.size(); - size_t new_rows = old_rows + length; - size_t old_size = offsets_ref.back(); - offsets_ref.resize(new_rows); - for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) - offsets_ref[old_rows + i] = offset; - } - - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast *>(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast *>(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast *>(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast *>(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast *>(data.get())) - return insertManyFromNumber(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromString(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromNullable(src, position, length); - if (typeid_cast(data.get())) - return insertManyFromTuple(src, position, length); - return insertManyFromGeneric(src, position, length); -} void ColumnArray::insertDefault() { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 8c4d103e7d0..230d8830265 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -21,7 +21,7 @@ private: friend class COWHelper, ColumnArray>; /** Create an array column with specified values and offsets. */ - ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column, bool check_offsets = true); + ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column); /** Create an empty column of arrays with the type of values as in the column `nested_column` */ explicit ColumnArray(MutableColumnPtr && nested_column); @@ -88,7 +88,6 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -214,17 +213,6 @@ private: ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; - /// Specializations for insertManyFrom - void insertManyFromConst(const ColumnConst & src, size_t position, size_t length); - void insertManyFromImpl(const ColumnArray & src, size_t position, size_t length, bool update_offsets = true); - - template - void insertManyFromNumber(const ColumnArray & src, size_t position, size_t length); - void insertManyFromString(const ColumnArray & src, size_t position, size_t length); - void insertManyFromTuple(const ColumnArray & src, size_t position, size_t length); - void insertManyFromNullable(const ColumnArray & src, size_t position, size_t length); - void insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length); - int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const; };