Merge pull request #60846 from bigo-sg/opt_insertmanyfrom

Column insertManyFrom Specializations
This commit is contained in:
Kruglov Pavel 2024-03-21 14:29:23 +01:00 committed by GitHub
commit f941f52a7f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 186 additions and 0 deletions

View File

@ -1,3 +1,7 @@
if (ENABLE_EXAMPLES)
add_subdirectory (examples)
endif ()
if (ENABLE_BENCHMARKS)
add_subdirectory(benchmarks)
endif()

View File

@ -150,6 +150,8 @@ public:
++s;
}
void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
void insertDefault() override
{
++s;

View File

@ -56,6 +56,13 @@ public:
void shrinkToFit() override { data.shrink_to_fit(); }
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
{
ValueType v = assert_cast<const Self &>(src).getData()[position];
data.resize_fill(data.size() + length, v);
}
void insertData(const char * src, size_t /*length*/) override;
void insertDefault() override { data.push_back(T()); }
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }

View File

@ -85,6 +85,20 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
}
void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length)
{
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
if (n != src_concrete.getN())
throw Exception(ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH, "Size of FixedString doesn't match");
const size_t old_size = chars.size();
const size_t new_size = old_size + n * length;
chars.resize(new_size);
for (size_t offset = old_size; offset < new_size; offset += n)
memcpySmallAllowReadWriteOverflow15(&chars[offset], &src_concrete.chars[n * position], n);
}
void ColumnFixedString::insertData(const char * pos, size_t length)
{
if (length > n)

View File

@ -100,6 +100,8 @@ public:
void insertFrom(const IColumn & src_, size_t index) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertData(const char * pos, size_t length) override;
void insertDefault() override

View File

@ -158,6 +158,11 @@ void ColumnMap::insertFrom(const IColumn & src, size_t n)
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
}
void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length)
{
assert_cast<ColumnArray &>(*nested).insertManyFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), position, length);
}
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
nested->insertRangeFrom(

View File

@ -67,6 +67,7 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;

View File

@ -231,6 +231,14 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n)
getNullMapData().push_back(src_concrete.getNullMapData()[n]);
}
void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length)
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length);
getNullMapColumn().insertManyFrom(src_concrete.getNullMapColumn(), position, length);
}
void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
{
getNestedColumn().insertFrom(src, n);

View File

@ -69,6 +69,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertFromNotNullable(const IColumn & src, size_t n);
void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length);

View File

@ -38,6 +38,27 @@ ColumnString::ColumnString(const ColumnString & src)
last_offset, chars.size());
}
void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length)
{
const ColumnString & src_concrete = assert_cast<const ColumnString &>(src);
const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]];
const size_t src_buf_size
= src_concrete.offsets[position] - src_concrete.offsets[position - 1]; /// -1th index is Ok, see PaddedPODArray.
const size_t old_size = chars.size();
const size_t new_size = old_size + src_buf_size * length;
chars.resize(new_size);
const size_t old_rows = offsets.size();
offsets.resize(old_rows + length);
for (size_t current_offset = old_size; current_offset < new_size; current_offset += src_buf_size)
memcpySmallAllowReadWriteOverflow15(&chars[current_offset], src_buf, src_buf_size);
for (size_t i = 0, current_offset = old_size + src_buf_size; i < length; ++i, current_offset += src_buf_size)
offsets[old_rows + i] = current_offset;
}
MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
{

View File

@ -160,6 +160,8 @@ public:
}
}
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertData(const char * pos, size_t length) override
{
const size_t old_size = chars.size();

View File

@ -185,6 +185,18 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
columns[i]->insertFrom(*src.columns[i], n);
}
void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length)
{
const ColumnTuple & src_tuple = assert_cast<const ColumnTuple &>(src);
const size_t tuple_size = columns.size();
if (src_tuple.columns.size() != tuple_size)
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insertManyFrom(*src_tuple.columns[i], position, length);
}
void ColumnTuple::insertDefault()
{
for (auto & column : columns)

View File

@ -60,6 +60,7 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;

View File

@ -0,0 +1,4 @@
clickhouse_add_executable(column_insert_many_from benchmark_column_insert_many_from.cpp)
target_link_libraries (column_insert_many_from PRIVATE
ch_contrib::gbenchmark_all
dbms)

View File

@ -0,0 +1,102 @@
#include <cstddef>
#include <Columns/IColumn.h>
#include <Core/Block.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/IDataType.h>
#include <base/types.h>
#include <benchmark/benchmark.h>
using namespace DB;
static constexpr size_t ROWS = 65536;
static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows)
{
const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
if (type_array)
{
auto data_col = mockColumn(type_array->getNestedType(), rows);
auto offset_col = ColumnArray::ColumnOffsets::create(rows);
auto & offsets = offset_col->getData();
for (size_t i = 0; i < data_col->size(); ++i)
offsets[i] = offsets[i - 1] + (rand() % 10);
auto new_data_col = data_col->replicate(offsets);
return ColumnArray::create(new_data_col, std::move(offset_col));
}
auto type_not_nullable = removeNullable(type);
auto column = type->createColumn();
for (size_t i = 0; i < rows; ++i)
{
if (i % 100)
column->insertDefault();
else if (isInt(type_not_nullable))
column->insert(i);
else if (isFloat(type_not_nullable))
{
double d = i * 1.0;
column->insert(d);
}
else if (isString(type_not_nullable))
{
String s = "helloworld";
column->insert(s);
}
else
column->insertDefault();
}
return std::move(column);
}
static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src)
{
size_t size = src.size();
dst.insertManyFrom(src, size / 2, size);
}
template <const std::string & str_type>
static void BM_insertManyFrom(benchmark::State & state)
{
auto type = DataTypeFactory::instance().get(str_type);
auto src = mockColumn(type, ROWS);
for (auto _ : state)
{
state.PauseTiming();
auto dst = type->createColumn();
dst->reserve(ROWS);
state.ResumeTiming();
insertManyFrom(*dst, *src);
benchmark::DoNotOptimize(dst);
}
}
static const String type_int64 = "Int64";
static const String type_nullable_int64 = "Nullable(Int64)";
static const String type_string = "String";
static const String type_nullable_string = "Nullable(String)";
static const String type_decimal = "Decimal128(3)";
static const String type_nullable_decimal = "Nullable(Decimal128(3))";
static const String type_array_int64 = "Array(Int64)";
static const String type_array_nullable_int64 = "Array(Nullable(Int64))";
static const String type_array_string = "Array(String)";
static const String type_array_nullable_string = "Array(Nullable(String))";
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_int64);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_nullable_int64);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_string);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_nullable_string);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_decimal);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_nullable_decimal);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_int64);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_nullable_int64);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_string);
BENCHMARK_TEMPLATE(BM_insertManyFrom, type_array_nullable_string);