mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
remove opts of memory allocation in function tokens
This commit is contained in:
parent
34e07f6596
commit
8c0d2cc0fc
@ -58,7 +58,6 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr auto name = Generator::name;
|
static constexpr auto name = Generator::name;
|
||||||
|
|
||||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
|
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
|
||||||
|
|
||||||
explicit FunctionTokens<Generator>(ContextPtr context)
|
explicit FunctionTokens<Generator>(ContextPtr context)
|
||||||
@ -107,13 +106,9 @@ public:
|
|||||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
const ColumnString::Chars & src_chars = col_str->getChars();
|
||||||
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
|
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
|
||||||
|
|
||||||
res_offsets.resize_exact(src_offsets.size());
|
res_offsets.reserve(src_offsets.size());
|
||||||
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
|
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
|
||||||
std::optional<size_t> res_chars_reserve_size = generator.getResultReserveSize();
|
res_strings_chars.reserve(src_chars.size());
|
||||||
if (res_chars_reserve_size.has_value())
|
|
||||||
res_strings_chars.reserve_exact(*res_chars_reserve_size);
|
|
||||||
else
|
|
||||||
res_strings_chars.reserve(src_chars.size());
|
|
||||||
|
|
||||||
Pos token_begin = nullptr;
|
Pos token_begin = nullptr;
|
||||||
Pos token_end = nullptr;
|
Pos token_end = nullptr;
|
||||||
@ -122,40 +117,32 @@ public:
|
|||||||
ColumnString::Offset current_src_offset = 0;
|
ColumnString::Offset current_src_offset = 0;
|
||||||
ColumnArray::Offset current_dst_offset = 0;
|
ColumnArray::Offset current_dst_offset = 0;
|
||||||
ColumnString::Offset current_dst_strings_offset = 0;
|
ColumnString::Offset current_dst_strings_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
|
||||||
|
current_src_offset = src_offsets[i];
|
||||||
|
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
|
||||||
|
|
||||||
#define PROCESS_WITH_CUSTOM_RESIZE(RESIZE_METHOD) \
|
generator.set(pos, end);
|
||||||
for (size_t i = 0; i < size; ++i) \
|
size_t j = 0;
|
||||||
{ \
|
while (generator.get(token_begin, token_end))
|
||||||
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]); \
|
{
|
||||||
current_src_offset = src_offsets[i]; \
|
size_t token_size = token_end - token_begin;
|
||||||
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1; \
|
|
||||||
generator.set(pos, end); \
|
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
|
||||||
size_t j = 0; \
|
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
|
||||||
while (generator.get(token_begin, token_end)) \
|
res_strings_chars[current_dst_strings_offset + token_size] = 0;
|
||||||
{ \
|
|
||||||
size_t token_size = token_end - token_begin; \
|
current_dst_strings_offset += token_size + 1;
|
||||||
res_strings_chars.RESIZE_METHOD(res_strings_chars.size() + token_size + 1); \
|
res_strings_offsets.push_back(current_dst_strings_offset);
|
||||||
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size); \
|
++j;
|
||||||
res_strings_chars[current_dst_strings_offset + token_size] = 0; \
|
}
|
||||||
current_dst_strings_offset += token_size + 1; \
|
|
||||||
res_strings_offsets.push_back(current_dst_strings_offset); \
|
current_dst_offset += j;
|
||||||
++j; \
|
res_offsets.push_back(current_dst_offset);
|
||||||
} \
|
|
||||||
current_dst_offset += j; \
|
|
||||||
res_offsets[i] = current_dst_offset; \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (res_chars_reserve_size.has_value())
|
|
||||||
{
|
|
||||||
/// If res_chars_reserve_size is not -1, then we are sure that the actual size of res_strings_chars doesn't exceed res_chars_reserve_size.
|
|
||||||
PROCESS_WITH_CUSTOM_RESIZE(resize_assume_reserved)
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
PROCESS_WITH_CUSTOM_RESIZE(resize)
|
|
||||||
}
|
|
||||||
return col_res;
|
return col_res;
|
||||||
#undef PROCESS_WITH_CUSTOM_RESIZE
|
|
||||||
}
|
}
|
||||||
else if (col_str_const)
|
else if (col_str_const)
|
||||||
{
|
{
|
||||||
|
@ -39,8 +39,6 @@ public:
|
|||||||
|
|
||||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -37,8 +37,6 @@ public:
|
|||||||
|
|
||||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {}
|
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -37,8 +37,6 @@ public:
|
|||||||
|
|
||||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -36,8 +36,6 @@ public:
|
|||||||
|
|
||||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
static constexpr auto strings_argument_position = 0uz;
|
static constexpr auto strings_argument_position = 0uz;
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
|
@ -47,8 +47,6 @@ public:
|
|||||||
max_splits = extractMaxSplits(arguments, 1);
|
max_splits = extractMaxSplits(arguments, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -78,8 +78,6 @@ public:
|
|||||||
matches.resize(capture + 1);
|
matches.resize(capture + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -34,7 +34,6 @@ private:
|
|||||||
std::optional<size_t> max_splits;
|
std::optional<size_t> max_splits;
|
||||||
size_t splits;
|
size_t splits;
|
||||||
bool max_substrings_includes_remaining_string;
|
bool max_substrings_includes_remaining_string;
|
||||||
std::optional<size_t> result_reserve_size;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr auto name = "splitByChar";
|
static constexpr auto name = "splitByChar";
|
||||||
@ -67,21 +66,6 @@ public:
|
|||||||
|
|
||||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||||
max_splits = extractMaxSplits(arguments, 2);
|
max_splits = extractMaxSplits(arguments, 2);
|
||||||
|
|
||||||
const ColumnString * col_str = checkAndGetColumn<ColumnString>(arguments[strings_argument_position].column.get());
|
|
||||||
/// There is another possibility that the input column is ColumnConst. We ignore it because there is no need to get reserve size under such condition.
|
|
||||||
if (col_str)
|
|
||||||
{
|
|
||||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
|
||||||
/// Consider use case: splitByChar(' ', 'a b c'), where input chars is "a b c\0", output chars is "a\0", "b\0", "c\0".
|
|
||||||
/// The size of output chars should never exceeds input chars
|
|
||||||
result_reserve_size = src_chars.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const
|
|
||||||
{
|
|
||||||
return result_reserve_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
@ -93,7 +77,7 @@ public:
|
|||||||
|
|
||||||
bool get(Pos & token_begin, Pos & token_end)
|
bool get(Pos & token_begin, Pos & token_end)
|
||||||
{
|
{
|
||||||
if (!pos) [[unlikely]]
|
if (!pos)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
token_begin = pos;
|
token_begin = pos;
|
||||||
|
@ -57,8 +57,6 @@ public:
|
|||||||
max_splits = extractMaxSplits(arguments, 1);
|
max_splits = extractMaxSplits(arguments, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -70,8 +70,6 @@ public:
|
|||||||
max_splits = extractMaxSplits(arguments, 2);
|
max_splits = extractMaxSplits(arguments, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -62,8 +62,6 @@ public:
|
|||||||
max_splits = extractMaxSplits(arguments, 2);
|
max_splits = extractMaxSplits(arguments, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
@ -45,8 +45,6 @@ public:
|
|||||||
max_splits = extractMaxSplits(arguments, 1);
|
max_splits = extractMaxSplits(arguments, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
|
||||||
|
|
||||||
/// Called for each next string.
|
/// Called for each next string.
|
||||||
void set(Pos pos_, Pos end_)
|
void set(Pos pos_, Pos end_)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user