remove opts of memory allocation in function tokens

This commit is contained in:
taiyang-li 2024-04-16 10:29:25 +08:00
parent 34e07f6596
commit 8c0d2cc0fc
12 changed files with 25 additions and 74 deletions

View File

@ -58,7 +58,6 @@ private:
public: public:
static constexpr auto name = Generator::name; static constexpr auto name = Generator::name;
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); } static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
explicit FunctionTokens<Generator>(ContextPtr context) explicit FunctionTokens<Generator>(ContextPtr context)
@ -107,13 +106,9 @@ public:
const ColumnString::Chars & src_chars = col_str->getChars(); const ColumnString::Chars & src_chars = col_str->getChars();
const ColumnString::Offsets & src_offsets = col_str->getOffsets(); const ColumnString::Offsets & src_offsets = col_str->getOffsets();
res_offsets.resize_exact(src_offsets.size()); res_offsets.reserve(src_offsets.size());
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random. res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
std::optional<size_t> res_chars_reserve_size = generator.getResultReserveSize(); res_strings_chars.reserve(src_chars.size());
if (res_chars_reserve_size.has_value())
res_strings_chars.reserve_exact(*res_chars_reserve_size);
else
res_strings_chars.reserve(src_chars.size());
Pos token_begin = nullptr; Pos token_begin = nullptr;
Pos token_end = nullptr; Pos token_end = nullptr;
@ -122,40 +117,32 @@ public:
ColumnString::Offset current_src_offset = 0; ColumnString::Offset current_src_offset = 0;
ColumnArray::Offset current_dst_offset = 0; ColumnArray::Offset current_dst_offset = 0;
ColumnString::Offset current_dst_strings_offset = 0; ColumnString::Offset current_dst_strings_offset = 0;
for (size_t i = 0; i < size; ++i)
{
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
current_src_offset = src_offsets[i];
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
#define PROCESS_WITH_CUSTOM_RESIZE(RESIZE_METHOD) \ generator.set(pos, end);
for (size_t i = 0; i < size; ++i) \ size_t j = 0;
{ \ while (generator.get(token_begin, token_end))
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]); \ {
current_src_offset = src_offsets[i]; \ size_t token_size = token_end - token_begin;
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1; \
generator.set(pos, end); \ res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
size_t j = 0; \ memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
while (generator.get(token_begin, token_end)) \ res_strings_chars[current_dst_strings_offset + token_size] = 0;
{ \
size_t token_size = token_end - token_begin; \ current_dst_strings_offset += token_size + 1;
res_strings_chars.RESIZE_METHOD(res_strings_chars.size() + token_size + 1); \ res_strings_offsets.push_back(current_dst_strings_offset);
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size); \ ++j;
res_strings_chars[current_dst_strings_offset + token_size] = 0; \ }
current_dst_strings_offset += token_size + 1; \
res_strings_offsets.push_back(current_dst_strings_offset); \ current_dst_offset += j;
++j; \ res_offsets.push_back(current_dst_offset);
} \
current_dst_offset += j; \
res_offsets[i] = current_dst_offset; \
} }
if (res_chars_reserve_size.has_value())
{
/// If res_chars_reserve_size is not -1, then we are sure that the actual size of res_strings_chars doesn't exceed res_chars_reserve_size.
PROCESS_WITH_CUSTOM_RESIZE(resize_assume_reserved)
}
else
{
PROCESS_WITH_CUSTOM_RESIZE(resize)
}
return col_res; return col_res;
#undef PROCESS_WITH_CUSTOM_RESIZE
} }
else if (col_str_const) else if (col_str_const)
{ {

View File

@ -39,8 +39,6 @@ public:
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -37,8 +37,6 @@ public:
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {} void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {}
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -37,8 +37,6 @@ public:
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -36,8 +36,6 @@ public:
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {} void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
static constexpr auto strings_argument_position = 0uz; static constexpr auto strings_argument_position = 0uz;
/// Called for each next string. /// Called for each next string.

View File

@ -47,8 +47,6 @@ public:
max_splits = extractMaxSplits(arguments, 1); max_splits = extractMaxSplits(arguments, 1);
} }
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -78,8 +78,6 @@ public:
matches.resize(capture + 1); matches.resize(capture + 1);
} }
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -34,7 +34,6 @@ private:
std::optional<size_t> max_splits; std::optional<size_t> max_splits;
size_t splits; size_t splits;
bool max_substrings_includes_remaining_string; bool max_substrings_includes_remaining_string;
std::optional<size_t> result_reserve_size;
public: public:
static constexpr auto name = "splitByChar"; static constexpr auto name = "splitByChar";
@ -67,21 +66,6 @@ public:
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_; max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2); max_splits = extractMaxSplits(arguments, 2);
const ColumnString * col_str = checkAndGetColumn<ColumnString>(arguments[strings_argument_position].column.get());
/// There is another possibility that the input column is ColumnConst. We ignore it because there is no need to get reserve size under such condition.
if (col_str)
{
const ColumnString::Chars & src_chars = col_str->getChars();
/// Consider use case: splitByChar(' ', 'a b c'), where input chars is "a b c\0", output chars is "a\0", "b\0", "c\0".
/// The size of output chars should never exceeds input chars
result_reserve_size = src_chars.size();
}
}
std::optional<size_t> getResultReserveSize() const
{
return result_reserve_size;
} }
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
@ -93,7 +77,7 @@ public:
bool get(Pos & token_begin, Pos & token_end) bool get(Pos & token_begin, Pos & token_end)
{ {
if (!pos) [[unlikely]] if (!pos)
return false; return false;
token_begin = pos; token_begin = pos;

View File

@ -57,8 +57,6 @@ public:
max_splits = extractMaxSplits(arguments, 1); max_splits = extractMaxSplits(arguments, 1);
} }
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -70,8 +70,6 @@ public:
max_splits = extractMaxSplits(arguments, 2); max_splits = extractMaxSplits(arguments, 2);
} }
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -62,8 +62,6 @@ public:
max_splits = extractMaxSplits(arguments, 2); max_splits = extractMaxSplits(arguments, 2);
} }
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {

View File

@ -45,8 +45,6 @@ public:
max_splits = extractMaxSplits(arguments, 1); max_splits = extractMaxSplits(arguments, 1);
} }
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
/// Called for each next string. /// Called for each next string.
void set(Pos pos_, Pos end_) void set(Pos pos_, Pos end_)
{ {