mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
remove opts of memory allocation in function tokens
This commit is contained in:
parent
34e07f6596
commit
8c0d2cc0fc
@ -58,7 +58,6 @@ private:
|
||||
|
||||
public:
|
||||
static constexpr auto name = Generator::name;
|
||||
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
|
||||
|
||||
explicit FunctionTokens<Generator>(ContextPtr context)
|
||||
@ -107,12 +106,8 @@ public:
|
||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
||||
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
|
||||
|
||||
res_offsets.resize_exact(src_offsets.size());
|
||||
res_offsets.reserve(src_offsets.size());
|
||||
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
|
||||
std::optional<size_t> res_chars_reserve_size = generator.getResultReserveSize();
|
||||
if (res_chars_reserve_size.has_value())
|
||||
res_strings_chars.reserve_exact(*res_chars_reserve_size);
|
||||
else
|
||||
res_strings_chars.reserve(src_chars.size());
|
||||
|
||||
Pos token_begin = nullptr;
|
||||
@ -122,40 +117,32 @@ public:
|
||||
ColumnString::Offset current_src_offset = 0;
|
||||
ColumnArray::Offset current_dst_offset = 0;
|
||||
ColumnString::Offset current_dst_strings_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
|
||||
current_src_offset = src_offsets[i];
|
||||
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
|
||||
|
||||
#define PROCESS_WITH_CUSTOM_RESIZE(RESIZE_METHOD) \
|
||||
for (size_t i = 0; i < size; ++i) \
|
||||
{ \
|
||||
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]); \
|
||||
current_src_offset = src_offsets[i]; \
|
||||
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1; \
|
||||
generator.set(pos, end); \
|
||||
size_t j = 0; \
|
||||
while (generator.get(token_begin, token_end)) \
|
||||
{ \
|
||||
size_t token_size = token_end - token_begin; \
|
||||
res_strings_chars.RESIZE_METHOD(res_strings_chars.size() + token_size + 1); \
|
||||
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size); \
|
||||
res_strings_chars[current_dst_strings_offset + token_size] = 0; \
|
||||
current_dst_strings_offset += token_size + 1; \
|
||||
res_strings_offsets.push_back(current_dst_strings_offset); \
|
||||
++j; \
|
||||
} \
|
||||
current_dst_offset += j; \
|
||||
res_offsets[i] = current_dst_offset; \
|
||||
generator.set(pos, end);
|
||||
size_t j = 0;
|
||||
while (generator.get(token_begin, token_end))
|
||||
{
|
||||
size_t token_size = token_end - token_begin;
|
||||
|
||||
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
|
||||
res_strings_chars[current_dst_strings_offset + token_size] = 0;
|
||||
|
||||
current_dst_strings_offset += token_size + 1;
|
||||
res_strings_offsets.push_back(current_dst_strings_offset);
|
||||
++j;
|
||||
}
|
||||
|
||||
if (res_chars_reserve_size.has_value())
|
||||
{
|
||||
/// If res_chars_reserve_size is not -1, then we are sure that the actual size of res_strings_chars doesn't exceed res_chars_reserve_size.
|
||||
PROCESS_WITH_CUSTOM_RESIZE(resize_assume_reserved)
|
||||
}
|
||||
else
|
||||
{
|
||||
PROCESS_WITH_CUSTOM_RESIZE(resize)
|
||||
current_dst_offset += j;
|
||||
res_offsets.push_back(current_dst_offset);
|
||||
}
|
||||
|
||||
return col_res;
|
||||
#undef PROCESS_WITH_CUSTOM_RESIZE
|
||||
}
|
||||
else if (col_str_const)
|
||||
{
|
||||
|
@ -39,8 +39,6 @@ public:
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -37,8 +37,6 @@ public:
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -37,8 +37,6 @@ public:
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -36,8 +36,6 @@ public:
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
/// Called for each next string.
|
||||
|
@ -47,8 +47,6 @@ public:
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -78,8 +78,6 @@ public:
|
||||
matches.resize(capture + 1);
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -34,7 +34,6 @@ private:
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
std::optional<size_t> result_reserve_size;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByChar";
|
||||
@ -67,21 +66,6 @@ public:
|
||||
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
|
||||
const ColumnString * col_str = checkAndGetColumn<ColumnString>(arguments[strings_argument_position].column.get());
|
||||
/// There is another possibility that the input column is ColumnConst. We ignore it because there is no need to get reserve size under such condition.
|
||||
if (col_str)
|
||||
{
|
||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
||||
/// Consider use case: splitByChar(' ', 'a b c'), where input chars is "a b c\0", output chars is "a\0", "b\0", "c\0".
|
||||
/// The size of output chars should never exceeds input chars
|
||||
result_reserve_size = src_chars.size();
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const
|
||||
{
|
||||
return result_reserve_size;
|
||||
}
|
||||
|
||||
void set(Pos pos_, Pos end_)
|
||||
@ -93,7 +77,7 @@ public:
|
||||
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (!pos) [[unlikely]]
|
||||
if (!pos)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
@ -57,8 +57,6 @@ public:
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -70,8 +70,6 @@ public:
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -62,8 +62,6 @@ public:
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
@ -45,8 +45,6 @@ public:
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
std::optional<size_t> getResultReserveSize() const { return std::nullopt; }
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user