Merge pull request #66897 from rschu1ze/funcs-g-r-input_rows_count

Functions [h-r]*: Iterate over input_rows_count where appropriate
This commit is contained in:
Robert Schulze 2024-07-25 08:54:47 +00:00 committed by GitHub
commit 6fab321e9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 169 additions and 159 deletions

View File

@ -59,19 +59,19 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnString::create();
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column.get()))
{
auto col_res = ColumnFixedString::create(col_fixed->getN());
Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars());
Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars(), input_rows_count);
return col_res;
}
else

View File

@ -8,17 +8,19 @@ namespace DB
template <char not_case_lower_bound, char not_case_upper_bound>
struct LowerUpperImpl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t /*input_rows_count*/)
{
res_data.resize_exact(data.size());
res_offsets.assign(offsets);
array(data.data(), data.data() + data.size(), res_data.data());
}
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data, size_t /*input_rows_count*/)
{
res_data.resize_exact(data.size());
array(data.data(), data.data() + data.size(), res_data.data());

View File

@ -90,7 +90,8 @@ struct LowerUpperUTF8Impl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (data.empty())
return;
@ -98,7 +99,7 @@ struct LowerUpperUTF8Impl
bool all_ascii = isAllASCII(data.data(), data.size());
if (all_ascii)
{
LowerUpperImpl<not_case_lower_bound, not_case_upper_bound>::vector(data, offsets, res_data, res_offsets);
LowerUpperImpl<not_case_lower_bound, not_case_upper_bound>::vector(data, offsets, res_data, res_offsets, input_rows_count);
return;
}
@ -107,7 +108,7 @@ struct LowerUpperUTF8Impl
array(data.data(), data.data() + data.size(), offsets, res_data.data());
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument");
}

View File

@ -62,12 +62,13 @@ using Pos = const char *;
template <typename Extractor>
struct ExtractSubstringImpl
{
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
static void vector(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
size_t size = offsets.size();
res_offsets.resize(size);
res_data.reserve(size * Extractor::getReserveLengthForElement());
res_offsets.resize(input_rows_count);
res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement());
size_t prev_offset = 0;
size_t res_offset = 0;
@ -76,7 +77,7 @@ struct ExtractSubstringImpl
Pos start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Extractor::execute(reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);
@ -99,7 +100,7 @@ struct ExtractSubstringImpl
res_data.assign(start, length);
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
@ -111,12 +112,13 @@ struct ExtractSubstringImpl
template <typename Extractor>
struct CutSubstringImpl
{
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
static void vector(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
res_data.reserve(data.size());
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
size_t res_offset = 0;
@ -125,7 +127,7 @@ struct CutSubstringImpl
Pos start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * current = reinterpret_cast<const char *>(&data[prev_offset]);
Extractor::execute(current, offsets[i] - prev_offset - 1, start, length);
@ -154,7 +156,7 @@ struct CutSubstringImpl
res_data.append(start + length, data.data() + data.size());
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}

View File

@ -1,8 +1,8 @@
#pragma once
#include <base/find_symbols.h>
#include "domain.h"
#include "tldLookup.h"
#include <Functions/URL/domain.h>
#include <Functions/URL/tldLookup.h>
#include <Common/TLDListsHolder.h> /// TLDType
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "fragment.h"
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/fragment.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "queryString.h"
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/queryString.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "queryStringAndFragment.h"
#include <Functions/FunctionStringToString.h>
#include <Functions/URL/queryStringAndFragment.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "ExtractFirstSignificantSubdomain.h"
#include <Functions/URL/ExtractFirstSignificantSubdomain.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "ExtractFirstSignificantSubdomain.h"
#include "FirstSignificantSubdomainCustomImpl.h"
#include <Functions/URL/ExtractFirstSignificantSubdomain.h>
#include <Functions/URL/FirstSignificantSubdomainCustomImpl.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "protocol.h"
#include <Functions/URL/protocol.h>
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#include <base/hex.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <base/find_symbols.h>
#include <base/hex.h>
namespace DB
@ -121,8 +121,10 @@ enum URLCodeStrategy
template <URLCodeStrategy code_strategy, bool space_as_plus>
struct CodeURLComponentImpl
{
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
static void vector(
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (code_strategy == encode)
{
@ -134,13 +136,12 @@ struct CodeURLComponentImpl
res_data.resize(data.size());
}
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
size_t res_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
size_t src_size = offsets[i] - prev_offset;
@ -165,7 +166,7 @@ struct CodeURLComponentImpl
res_data.resize(res_offset);
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
}

View File

@ -1,5 +1,4 @@
#include "domain.h"
#include <Functions/URL/domain.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>

View File

@ -1,9 +1,10 @@
#pragma once
#include "protocol.h"
#include <base/find_symbols.h>
#include <cstring>
#include <Common/StringUtils.h>
#include <Functions/URL/protocol.h>
#include <base/find_symbols.h>
#include <cstring>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "domain.h"
#include <Functions/URL/domain.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "ExtractFirstSignificantSubdomain.h"
#include <Functions/URL/ExtractFirstSignificantSubdomain.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include "ExtractFirstSignificantSubdomain.h"
#include "FirstSignificantSubdomainCustomImpl.h"
#include <Functions/URL/ExtractFirstSignificantSubdomain.h>
#include <Functions/URL/FirstSignificantSubdomainCustomImpl.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "fragment.h"
#include <Functions/URL/fragment.h>
namespace DB
{

View File

@ -1,7 +1,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include "path.h"
#include <Functions/URL/path.h>
#include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include "path.h"
#include <Functions/URL/path.h>
#include <base/find_symbols.h>
namespace DB

View File

@ -5,7 +5,7 @@
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include "domain.h"
#include <Functions/URL/domain.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "protocol.h"
#include <Functions/URL/protocol.h>
namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "queryString.h"
#include <Functions/URL/queryString.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "queryStringAndFragment.h"
#include <Functions/URL/queryStringAndFragment.h>
namespace DB
{

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include "domain.h"
#include <Functions/URL/domain.h>
namespace DB
{

View File

@ -28,20 +28,20 @@ namespace
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
/// The size of result is always not more than the size of source.
/// Because entities decodes to the shorter byte sequence.
/// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes.
res_data.resize(data.size());
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
size_t res_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
size_t src_size = offsets[i] - prev_offset;
@ -55,7 +55,7 @@ namespace
res_data.resize(res_offset);
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function decodeHTMLComponent cannot work with FixedString argument");
}
@ -64,7 +64,6 @@ namespace
static const int max_legal_unicode_value = 0x10FFFF;
static const int max_decimal_length_of_unicode_point = 7; /// 1114111
static size_t execute(const char * src, size_t src_size, char * dst)
{
const char * src_pos = src;

View File

@ -27,20 +27,20 @@ namespace
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
/// The size of result is always not more than the size of source.
/// Because entities decodes to the shorter byte sequence.
/// Example: &#xx... &#xx... will decode to UTF-8 byte sequence not longer than 4 bytes.
res_data.resize(data.size());
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
size_t res_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
size_t src_size = offsets[i] - prev_offset;
@ -54,7 +54,7 @@ namespace
res_data.resize(res_offset);
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function decodeXMLComponent cannot work with FixedString argument");
}

View File

@ -25,17 +25,17 @@ namespace
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
/// 6 is the maximum size amplification (the maximum length of encoded entity: &quot;)
res_data.resize(data.size() * 6);
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
size_t res_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * src_data = reinterpret_cast<const char *>(&data[prev_offset]);
size_t src_size = offsets[i] - prev_offset;
@ -49,7 +49,7 @@ namespace
res_data.resize(res_offset);
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function encodeXML cannot work with FixedString argument");
}

View File

@ -44,15 +44,15 @@ struct IdnaEncode
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
res_offsets.reserve(input_rows_count);
size_t prev_offset = 0;
std::string ascii;
for (size_t row = 0; row < rows; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
@ -85,7 +85,7 @@ struct IdnaEncode
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
@ -99,15 +99,15 @@ struct IdnaDecode
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
res_offsets.reserve(input_rows_count);
size_t prev_offset = 0;
std::string unicode;
for (size_t row = 0; row < rows; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t ascii_length = offsets[row] - prev_offset - 1;
@ -124,7 +124,7 @@ struct IdnaDecode
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}

View File

@ -9,10 +9,12 @@ namespace
struct InitcapImpl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t /*input_rows_count*/)
{
if (data.empty())
return;
@ -21,7 +23,7 @@ struct InitcapImpl
array(data.data(), data.data() + data.size(), res_data.data());
}
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data, size_t)
{
res_data.resize(data.size());
array(data.data(), data.data() + data.size(), res_data.data());

View File

@ -22,7 +22,8 @@ struct InitcapUTF8Impl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t /*input_rows_count*/)
{
if (data.empty())
return;
@ -31,7 +32,7 @@ struct InitcapUTF8Impl
array(data.data(), data.data() + data.size(), offsets, res_data.data());
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument");
}

View File

@ -19,17 +19,19 @@ template <bool keep_names>
struct Impl
{
static constexpr auto name = keep_names ? "normalizeQueryKeepNames" : "normalizeQuery";
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
res_data.reserve(data.size());
ColumnString::Offset prev_src_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
ColumnString::Offset curr_src_offset = offsets[i];
@ -43,7 +45,7 @@ struct Impl
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function normalizeQuery to fixed string.");
}

View File

@ -84,7 +84,8 @@ struct NormalizeUTF8Impl
static void vector(const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
UErrorCode err = U_ZERO_ERROR;
@ -92,8 +93,7 @@ struct NormalizeUTF8Impl
if (U_FAILURE(err))
throw Exception(ErrorCodes::CANNOT_NORMALIZE_STRING, "Normalization failed (getNormalizer): {}", u_errorName(err));
size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
res_data.reserve(data.size() * 2);
@ -103,7 +103,7 @@ struct NormalizeUTF8Impl
PODArray<UChar> from_uchars;
PODArray<UChar> to_uchars;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t from_size = offsets[i] - current_from_offset - 1;
@ -157,7 +157,7 @@ struct NormalizeUTF8Impl
res_data.resize(current_to_offset);
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function normalizeUTF8 to fixed string.");
}

View File

@ -27,13 +27,13 @@ struct Impl
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
PaddedPODArray<UInt64> & res_data)
PaddedPODArray<UInt64> & res_data,
size_t input_rows_count)
{
size_t size = offsets.size();
res_data.resize(size);
res_data.resize(input_rows_count);
ColumnString::Offset prev_src_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
ColumnString::Offset curr_src_offset = offsets[i];
res_data[i] = normalizedQueryHash(
@ -77,15 +77,15 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnUInt64::create();
typename ColumnUInt64::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
Impl<keep_names>::vector(col->getChars(), col->getOffsets(), vec_res);
vec_res.resize(input_rows_count);
Impl<keep_names>::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count);
return col_res;
}
else

View File

@ -91,8 +91,6 @@ private:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto size = input_rows_count;
/// Prepare array of ellipses.
size_t ellipses_count = (arguments.size() - 2) / 4;
std::vector<Ellipse> ellipses(ellipses_count);
@ -141,13 +139,11 @@ private:
auto dst = ColumnVector<UInt8>::create();
auto & dst_data = dst->getData();
dst_data.resize(size);
dst_data.resize(input_rows_count);
size_t start_index = 0;
for (const auto row : collections::range(0, size))
{
for (size_t row = 0; row < input_rows_count; ++row)
dst_data[row] = isPointInEllipses(col_vec_x->getData()[row], col_vec_y->getData()[row], ellipses.data(), ellipses_count, start_index);
}
return dst;
}
@ -157,7 +153,7 @@ private:
const auto * col_const_y = assert_cast<const ColumnConst *> (col_y);
size_t start_index = 0;
UInt8 res = isPointInEllipses(col_const_x->getValue<Float64>(), col_const_y->getValue<Float64>(), ellipses.data(), ellipses_count, start_index);
return DataTypeUInt8().createColumnConst(size, res);
return DataTypeUInt8().createColumnConst(input_rows_count, res);
}
else
{

View File

@ -6,11 +6,11 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wnewline-eof"
# include <ada/idna/punycode.h>
# include <ada/idna/unicode_transcoding.h>
# pragma clang diagnostic pop
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wnewline-eof"
#include <ada/idna/punycode.h>
#include <ada/idna/unicode_transcoding.h>
#pragma clang diagnostic pop
namespace DB
{
@ -38,16 +38,16 @@ struct PunycodeEncode
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
res_offsets.reserve(input_rows_count);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_puny;
for (size_t row = 0; row < rows; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
@ -72,7 +72,7 @@ struct PunycodeEncode
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}
@ -86,16 +86,16 @@ struct PunycodeDecode
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const size_t rows = offsets.size();
res_data.reserve(data.size()); /// just a guess, assuming the input is all-ASCII
res_offsets.reserve(rows);
res_offsets.reserve(input_rows_count);
size_t prev_offset = 0;
std::u32string value_utf32;
std::string value_utf8;
for (size_t row = 0; row < rows; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[row] - prev_offset - 1;
@ -129,7 +129,7 @@ struct PunycodeDecode
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Arguments of type FixedString are not allowed");
}

View File

@ -55,19 +55,19 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnString::create();
ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column.get()))
{
auto col_res = ColumnFixedString::create(col_fixed->getN());
ReverseImpl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars());
ReverseImpl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), col_res->getChars(), input_rows_count);
return col_res;
}
else

View File

@ -9,17 +9,18 @@ namespace DB
*/
struct ReverseImpl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
res_data.resize_exact(data.size());
res_offsets.assign(offsets);
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
for (size_t j = prev_offset; j < offsets[i] - 1; ++j)
res_data[j] = data[offsets[i] + prev_offset - 2 - j];
@ -28,12 +29,15 @@ struct ReverseImpl
}
}
static void vectorFixed(const ColumnString::Chars & data, size_t n, ColumnString::Chars & res_data)
static void vectorFixed(
const ColumnString::Chars & data,
size_t n,
ColumnString::Chars & res_data,
size_t input_rows_count)
{
res_data.resize_exact(data.size());
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
for (size_t j = i * n; j < (i + 1) * n; ++j)
res_data[j] = data[(i * 2 + 1) * n - j - 1];
}

View File

@ -23,25 +23,25 @@ namespace
*/
struct ReverseUTF8Impl
{
static void vector(const ColumnString::Chars & data,
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
bool all_ascii = isAllASCII(data.data(), data.size());
if (all_ascii)
{
ReverseImpl::vector(data, offsets, res_data, res_offsets);
ReverseImpl::vector(data, offsets, res_data, res_offsets, input_rows_count);
return;
}
res_data.resize(data.size());
res_offsets.assign(offsets);
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
ColumnString::Offset j = prev_offset;
while (j < offsets[i] - 1)
@ -73,7 +73,7 @@ struct ReverseUTF8Impl
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function reverseUTF8 to fixed string.");
}

View File

@ -79,14 +79,14 @@ struct SoundexImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const size_t size = offsets.size();
res_data.resize(size * (length + 1));
res_offsets.resize(size);
res_data.resize(input_rows_count * (length + 1));
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t value_length = offsets[i] - prev_offset - 1;
@ -98,7 +98,7 @@ struct SoundexImpl
}
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by soundex function");
}

View File

@ -128,16 +128,16 @@ struct ToValidUTF8Impl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const size_t offsets_size = offsets.size();
/// It can be larger than that, but we believe it is unlikely to happen.
res_data.resize(data.size());
res_offsets.resize(offsets_size);
res_offsets.resize(input_rows_count);
size_t prev_offset = 0;
WriteBufferFromVector<ColumnString::Chars> write_buffer(res_data);
for (size_t i = 0; i < offsets_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * haystack_data = reinterpret_cast<const char *>(&data[prev_offset]);
const size_t haystack_size = offsets[i] - prev_offset - 1;
@ -149,7 +149,7 @@ struct ToValidUTF8Impl
write_buffer.finalize();
}
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by toValidUTF8 function");
}

View File

@ -43,10 +43,10 @@ public:
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
size_t size = offsets.size();
res_offsets.resize_exact(size);
res_offsets.resize_exact(input_rows_count);
res_data.reserve_exact(data.size());
size_t prev_offset = 0;
@ -55,7 +55,7 @@ public:
const UInt8 * start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
execute(reinterpret_cast<const UInt8 *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);
@ -69,7 +69,7 @@ public:
}
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Functions trimLeft, trimRight and trimBoth cannot work with FixedString argument");
}