Merge remote-tracking branch 'origin/master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-07-30 08:00:08 +00:00
commit 5ffa54bd70
88 changed files with 1123 additions and 944 deletions

View File

@ -45,7 +45,6 @@
* Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)).
#### Performance Improvement
* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)).
* Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)).
* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)).
* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)).

View File

@ -18,6 +18,16 @@ if (GLIBC_COMPATIBILITY)
message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.")
endif ()
if (SANITIZE STREQUAL thread)
# Disable TSAN instrumentation that conflicts with re-exec due to high ASLR entropy using getauxval
# See longer comment in __auxv_init_procfs
# In the case of tsan we need to make sure getauxval is not instrumented as that would introduce tsan
# internal calls to functions that depend on a state that isn't initialized yet
set_source_files_properties(
musl/getauxval.c
PROPERTIES COMPILE_FLAGS "-mllvm -tsan-instrument-func-entry-exit=false")
endif()
# Need to omit frame pointers to match the performance of glibc
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer")

View File

@ -75,6 +75,44 @@ unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type)
}
static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type)
{
#if defined(__x86_64__) && defined(__has_feature)
# if __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer)
/// Sanitizers are not compatible with high ASLR entropy, which is the default on modern Linux distributions, and
/// to workaround this limitation, TSAN and MSAN (couldn't see other sanitizers doing the same), re-exec the binary
/// without ASLR (see https://github.com/llvm/llvm-project/commit/0784b1eefa36d4acbb0dacd2d18796e26313b6c5)
/// The problem we face is that, in order to re-exec, the sanitizer wants to use the original pathname in the call
/// and to get its value it uses getauxval (https://github.com/llvm/llvm-project/blob/20eff684203287828d6722fc860b9d3621429542/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L985-L988).
/// Since we provide getauxval ourselves (to minimize the version dependency on runtime glibc), we are the ones
// being called and we fail horribly:
///
/// ==301455==ERROR: MemorySanitizer: SEGV on unknown address 0x2ffc6d721550 (pc 0x5622c1cc0073 bp 0x000000000003 sp 0x7ffc6d721530 T301455)
/// ==301455==The signal is caused by a WRITE memory access.
/// #0 0x5622c1cc0073 in __auxv_init_procfs ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:129:5
/// #1 0x5622c1cbffe9 in getauxval ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:240:12
/// #2 0x5622c0d7bfb4 in __sanitizer::ReExec() crtstuff.c
/// #3 0x5622c0df7bfc in __msan::InitShadowWithReExec(bool) crtstuff.c
/// #4 0x5622c0d95356 in __msan_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x256356) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741)
/// #5 0x5622c0dfe878 in msan.module_ctor main.cc
/// #6 0x5622c1cc156c in __libc_csu_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x118256c) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741)
/// #7 0x73dc05dd7ea3 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:343:6
/// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741)
/// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as
/// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector.
/// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very
/// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise
/// this complexity of reading "/proc/self/auxv" or using __environ would not be necessary).
/// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used
/// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and
/// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in
/// our case, as we don't load any libraries. That's the theory at least.
if (type == AT_EXECFN)
return (unsigned long)"/proc/self/exe";
# endif
#endif
// For debugging:
// - od -t dL /proc/self/auxv
// - LD_SHOW_AUX= ls
@ -199,7 +237,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type)
// - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ
static void * volatile getauxval_func = (void *)__auxv_init_procfs;
unsigned long getauxval(unsigned long type)
unsigned long NO_SANITIZE_THREAD getauxval(unsigned long type)
{
return ((unsigned long (*)(unsigned long))getauxval_func)(type);
}

2
contrib/icu vendored

@ -1 +1 @@
Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668
Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625

View File

@ -4,7 +4,9 @@ else ()
option(ENABLE_ICU "Enable ICU" 0)
endif ()
if (NOT ENABLE_ICU)
# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they generated
# the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255
if (NOT ENABLE_ICU OR ARCH_S390X)
message(STATUS "Not using ICU")
return()
endif()
@ -12,8 +14,6 @@ endif()
set(ICU_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/source")
set(ICUDATA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icudata/")
set (CMAKE_CXX_STANDARD 17)
# These lists of sources were generated from build log of the original ICU build system (configure + make).
set(ICUUC_SOURCES
@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ")
enable_language(ASM)
if (ARCH_S390X)
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" )
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S" )
else()
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" )
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" )
endif()
set(ICUDATA_SOURCES

2
contrib/icudata vendored

@ -1 +1 @@
Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab
Subproject commit d345d6ac22f381c882420de9053d30ae1ff38d75

View File

@ -32,6 +32,7 @@ The supported formats are:
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | ✔ | ✔ |
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
| [JSONAsObject](#jsonasobject) | ✔ | ✗ |
| [JSONStrings](#jsonstrings) | ✔ | ✔ |
| [JSONColumns](#jsoncolumns) | ✔ | ✔ |
| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock) | ✔ | ✔ |
@ -822,6 +823,67 @@ Result:
└────────────────────────────┘
```
## JSONAsObject {#jsonasobject}
In this format, a single JSON object is interpreted as a single [Object('json')](/docs/en/sql-reference/data-types/json.md) value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs.
This format can only be parsed for a table with a single field of type [Object('json')](/docs/en/sql-reference/data-types/json.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized).
**Examples**
Query:
``` sql
SET allow_experimental_object_type = 1;
CREATE TABLE json_as_object (json Object('json')) ENGINE = Memory;
INSERT INTO json_as_object (json) FORMAT JSONAsObject {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1}
SELECT * FROM json_as_object FORMAT JSONEachRow;
```
Result:
``` response
{"json":{"any json stucture":0,"foo":{"bar":{"x":"y"},"baz":1}}}
{"json":{"any json stucture":0,"foo":{"bar":{"x":""},"baz":0}}}
{"json":{"any json stucture":1,"foo":{"bar":{"x":""},"baz":0}}}
```
**An array of JSON objects**
Query:
``` sql
SET allow_experimental_object_type = 1;
CREATE TABLE json_square_brackets (field Object('json')) ENGINE = Memory;
INSERT INTO json_square_brackets FORMAT JSONAsObject [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}];
SELECT * FROM json_square_brackets FORMAT JSONEachRow;
```
Result:
```response
{"field":{"id":1,"name":"name1"}}
{"field":{"id":2,"name":"name2"}}
```
**Columns with default values**
```sql
SET allow_experimental_object_type = 1;
CREATE TABLE json_as_object (json Object('json'), time DateTime MATERIALIZED now()) ENGINE = Memory;
INSERT INTO json_as_object (json) FORMAT JSONAsObject {"foo":{"bar":{"x":"y"},"baz":1}};
INSERT INTO json_as_object (json) FORMAT JSONAsObject {};
INSERT INTO json_as_object (json) FORMAT JSONAsObject {"any json stucture":1}
SELECT * FROM json_as_object FORMAT JSONEachRow
```
```resonse
{"json":{"any json stucture":0,"foo":{"bar":{"x":"y"},"baz":1}},"time":"2024-07-25 17:02:45"}
{"json":{"any json stucture":0,"foo":{"bar":{"x":""},"baz":0}},"time":"2024-07-25 17:02:47"}
{"json":{"any json stucture":1,"foo":{"bar":{"x":""},"baz":0}},"time":"2024-07-25 17:02:50"}
```
## JSONCompact {#jsoncompact}
Differs from JSON only in that data rows are output in arrays, not in objects.

View File

@ -224,7 +224,11 @@ void AccessRightsElement::replaceEmptyDatabase(const String & current_database)
String AccessRightsElement::toString() const { return toStringImpl(*this, true); }
String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); }
String AccessRightsElement::toStringForAccessTypeSource() const
{
String result{access_flags.toKeywords().front()};
return result + " ON *.*";
}
bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); }

View File

@ -89,6 +89,7 @@ struct AccessRightsElement
/// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table".
String toString() const;
String toStringWithoutOptions() const;
String toStringForAccessTypeSource() const;
};

View File

@ -38,6 +38,24 @@ namespace ErrorCodes
namespace
{
const std::vector<std::tuple<AccessFlags, std::string>> source_and_table_engines = {
{AccessType::FILE, "File"},
{AccessType::URL, "URL"},
{AccessType::REMOTE, "Distributed"},
{AccessType::MONGO, "MongoDB"},
{AccessType::REDIS, "Redis"},
{AccessType::MYSQL, "MySQL"},
{AccessType::POSTGRES, "PostgreSQL"},
{AccessType::SQLITE, "SQLite"},
{AccessType::ODBC, "ODBC"},
{AccessType::JDBC, "JDBC"},
{AccessType::HDFS, "HDFS"},
{AccessType::S3, "S3"},
{AccessType::HIVE, "Hive"},
{AccessType::AZURE, "AzureBlobStorage"}
};
AccessRights mixAccessRightsFromUserAndRoles(const User & user, const EnabledRolesInfo & roles_info)
{
AccessRights res = user.access;
@ -206,22 +224,6 @@ namespace
}
/// There is overlap between AccessType sources and table engines, so the following code avoids user granting twice.
static const std::vector<std::tuple<AccessFlags, std::string>> source_and_table_engines = {
{AccessType::FILE, "File"},
{AccessType::URL, "URL"},
{AccessType::REMOTE, "Distributed"},
{AccessType::MONGO, "MongoDB"},
{AccessType::REDIS, "Redis"},
{AccessType::MYSQL, "MySQL"},
{AccessType::POSTGRES, "PostgreSQL"},
{AccessType::SQLITE, "SQLite"},
{AccessType::ODBC, "ODBC"},
{AccessType::JDBC, "JDBC"},
{AccessType::HDFS, "HDFS"},
{AccessType::S3, "S3"},
{AccessType::HIVE, "Hive"},
{AccessType::AZURE, "AzureBlobStorage"}
};
/// Sync SOURCE and TABLE_ENGINE, so only need to check TABLE_ENGINE later.
if (access_control.doesTableEnginesRequireGrant())
@ -267,6 +269,11 @@ namespace
template <typename... OtherArgs>
std::string_view getDatabase(std::string_view arg1, const OtherArgs &...) { return arg1; }
std::string_view getTableEngine() { return {}; }
template <typename... OtherArgs>
std::string_view getTableEngine(std::string_view arg1, const OtherArgs &...) { return arg1; }
}
@ -620,18 +627,58 @@ bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlag
if (!granted)
{
if (grant_option && acs->isGranted(flags, args...))
auto access_denied_no_grant = [&]<typename... FmtArgs>(AccessFlags access_flags, FmtArgs && ...fmt_args)
{
if (grant_option && acs->isGranted(access_flags, fmt_args...))
{
return access_denied(ErrorCodes::ACCESS_DENIED,
"{}: Not enough privileges. "
"The required privileges have been granted, but without grant option. "
"To execute this query, it's necessary to have the grant {} WITH GRANT OPTION",
AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions());
}
return access_denied(ErrorCodes::ACCESS_DENIED,
"{}: Not enough privileges. "
"The required privileges have been granted, but without grant option. "
"To execute this query, it's necessary to have the grant {} WITH GRANT OPTION",
AccessRightsElement{flags, args...}.toStringWithoutOptions());
"{}: Not enough privileges. To execute this query, it's necessary to have the grant {}",
AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : ""));
};
/// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading
/// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources.
if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant())
{
AccessFlags new_flags;
String table_engine_name{getTableEngine(args...)};
for (const auto & source_and_table_engine : source_and_table_engines)
{
const auto & table_engine = std::get<1>(source_and_table_engine);
if (table_engine != table_engine_name) continue;
const auto & source = std::get<0>(source_and_table_engine);
/// Set the flags from Table Engine to SOURCES so that prompts can be meaningful.
new_flags = source;
break;
}
/// Might happen in the case of grant Table Engine on A (but not source), then revoke A.
if (new_flags.isEmpty())
return access_denied_no_grant(flags, args...);
if (grant_option && acs->isGranted(flags, args...))
{
return access_denied(ErrorCodes::ACCESS_DENIED,
"{}: Not enough privileges. "
"The required privileges have been granted, but without grant option. "
"To execute this query, it's necessary to have the grant {} WITH GRANT OPTION",
AccessRightsElement{new_flags}.toStringForAccessTypeSource());
}
return access_denied(ErrorCodes::ACCESS_DENIED,
"{}: Not enough privileges. To execute this query, it's necessary to have the grant {}",
AccessRightsElement{new_flags}.toStringForAccessTypeSource() + (grant_option ? " WITH GRANT OPTION" : ""));
}
return access_denied(ErrorCodes::ACCESS_DENIED,
"{}: Not enough privileges. To execute this query, it's necessary to have the grant {}",
AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : ""));
return access_denied_no_grant(flags, args...);
}
struct PrecalculatedFlags

View File

@ -81,46 +81,43 @@ struct CRCFunctionWrapper
static constexpr auto is_fixed_to_constant = true;
using ReturnType = typename Impl::ReturnType;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res)
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = doCRC(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = doCRC(data, 0, n); }
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res)
static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res, size_t)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
res[i] = doCRC(data, i * n, n);
}
res = doCRC(data, 0, n);
}
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = doCRC(data, i * n, n);
}
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", std::string(Impl::name));
}
[[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", std::string(Impl::name));
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", std::string(Impl::name));
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", std::string(Impl::name));
}

View File

@ -32,13 +32,12 @@ struct WeekTransformer
{}
template <typename FromVectorType, typename ToVectorType>
void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const
void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone, size_t input_rows_count) const
{
using ValueType = typename ToVectorType::value_type;
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (is_extended_result)
vec_to[i] = static_cast<ValueType>(transform.executeExtendedResult(vec_from[i], week_mode, time_zone));
@ -56,7 +55,7 @@ template <typename FromDataType, typename ToDataType, bool is_extended_result =
struct CustomWeekTransformImpl
{
template <typename Transform>
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {})
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count, Transform transform = {})
{
const auto op = WeekTransformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform, is_extended_result>{transform};
@ -77,9 +76,9 @@ struct CustomWeekTransformImpl
const auto * sources = checkAndGetColumn<DataTypeString::ColumnType>(source_col.get());
auto col_to = ToDataType::ColumnType::create();
col_to->getData().resize(sources->size());
col_to->getData().resize(input_rows_count);
for (size_t i = 0; i < sources->size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
DateTime64 dt64;
ReadBufferFromString buf(sources->getDataAt(i).toView());
@ -92,7 +91,7 @@ struct CustomWeekTransformImpl
else if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
{
auto col_to = ToDataType::ColumnType::create();
op.vector(sources->getData(), col_to->getData(), week_mode, time_zone);
op.vector(sources->getData(), col_to->getData(), week_mode, time_zone, input_rows_count);
return col_to;
}
else

View File

@ -24,7 +24,7 @@ namespace DB
static constexpr auto millisecond_multiplier = 1'000;
static constexpr auto microsecond_multiplier = 1'000'000;
static constexpr auto nanosecond_multiplier = 1'000'000'000;
static constexpr auto nanosecond_multiplier = 1'000'000'000;
static constexpr FormatSettings::DateTimeOverflowBehavior default_date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore;
@ -2134,13 +2134,12 @@ struct Transformer
{
template <typename FromTypeVector, typename ToTypeVector>
static void vector(const FromTypeVector & vec_from, ToTypeVector & vec_to, const DateLUTImpl & time_zone, const Transform & transform,
[[maybe_unused]] ColumnUInt8::Container * vec_null_map_to)
[[maybe_unused]] ColumnUInt8::Container * vec_null_map_to, size_t input_rows_count)
{
using ValueType = typename ToTypeVector::value_type;
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (std::is_same_v<ToType, DataTypeDate> || std::is_same_v<ToType, DataTypeDateTime>)
{
@ -2178,7 +2177,7 @@ struct DateTimeTransformImpl
{
template <typename Additions = void *>
static ColumnPtr execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {})
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const Transform & transform = {})
{
using Op = Transformer<FromDataType, ToDataType, Transform, is_extended_result, Additions>;
@ -2200,7 +2199,7 @@ struct DateTimeTransformImpl
if (result_data_type.isDateTime() || result_data_type.isDateTime64())
{
const auto & time_zone = dynamic_cast<const TimezoneMixin &>(*result_type).getTimeZone();
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count);
}
else
{
@ -2209,15 +2208,13 @@ struct DateTimeTransformImpl
time_zone_argument_position = 2;
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count);
}
if constexpr (std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
{
if (vec_null_map_to)
{
return ColumnNullable::create(std::move(mutable_result_col), std::move(col_null_map_to));
}
}
return mutable_result_col;

View File

@ -21,11 +21,10 @@ struct EmptyImpl
/// If the function will return constant value for FixedString data type.
static constexpr auto is_fixed_to_constant = false;
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 1;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = negative ^ (offsets[i] == prev_offset);
prev_offset = offsets[i] + 1;
@ -33,42 +32,40 @@ struct EmptyImpl
}
/// Only make sense if is_fixed_to_constant.
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/)
static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "'vectorFixedToConstant method' is called");
}
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = negative ^ memoryIsZeroSmallAllowOverflow15(data.data() + i * n, n);
}
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = negative ^ (offsets[i] == prev_offset);
prev_offset = offsets[i];
}
}
static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray<UInt8> & res)
static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray<UInt8> & res, size_t)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container[i].toUnderType() == 0);
}
static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray<UInt8> & res)
static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray<UInt8> & res, size_t)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container[i].toUnderType() == 0);
}
static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray<UInt8> & res)
static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray<UInt8> & res, size_t)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container[i].toUnderType() == 0);

View File

@ -20,7 +20,7 @@ namespace DB
// includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word
struct ExtractStringImpl
{
static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
static const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
{
// jump separators
while (pos < end && isUTF8Sep(*pos))

View File

@ -46,7 +46,7 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", first_arg->getName(), getName());
for (const auto i : collections::range(1, arguments.size()))
for (size_t i = 1; i < arguments.size(); ++i)
{
const auto & pos_arg = arguments[i];
@ -57,19 +57,19 @@ public:
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const auto * value_col = arguments.front().column.get();
ColumnPtr res;
if (!((res = execute<UInt8>(arguments, result_type, value_col))
|| (res = execute<UInt16>(arguments, result_type, value_col))
|| (res = execute<UInt32>(arguments, result_type, value_col))
|| (res = execute<UInt64>(arguments, result_type, value_col))
|| (res = execute<Int8>(arguments, result_type, value_col))
|| (res = execute<Int16>(arguments, result_type, value_col))
|| (res = execute<Int32>(arguments, result_type, value_col))
|| (res = execute<Int64>(arguments, result_type, value_col))))
if (!((res = execute<UInt8>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<UInt16>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<UInt32>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<UInt64>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int8>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int16>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int32>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int64>(arguments, result_type, value_col, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", value_col->getName(), getName());
return res;
@ -79,28 +79,28 @@ private:
template <typename T>
ColumnPtr execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type,
const IColumn * const value_col_untyped) const
const IColumn * const value_col_untyped,
size_t input_rows_count) const
{
if (const auto value_col = checkAndGetColumn<ColumnVector<T>>(value_col_untyped))
{
const auto size = value_col->size();
bool is_const;
const auto const_mask = createConstMaskIfConst<T>(arguments, is_const);
const auto & val = value_col->getData();
auto out_col = ColumnVector<UInt8>::create(size);
auto out_col = ColumnVector<UInt8>::create(input_rows_count);
auto & out = out_col->getData();
if (is_const)
{
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out[i] = Impl::apply(val[i], const_mask);
}
else
{
const auto mask = createMask<T>(size, arguments);
const auto mask = createMask<T>(input_rows_count, arguments);
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out[i] = Impl::apply(val[i], mask[i]);
}
@ -108,23 +108,22 @@ private:
}
else if (const auto value_col_const = checkAndGetColumnConst<ColumnVector<T>>(value_col_untyped))
{
const auto size = value_col_const->size();
bool is_const;
const auto const_mask = createConstMaskIfConst<T>(arguments, is_const);
const auto val = value_col_const->template getValue<T>();
if (is_const)
{
return result_type->createColumnConst(size, toField(Impl::apply(val, const_mask)));
return result_type->createColumnConst(input_rows_count, toField(Impl::apply(val, const_mask)));
}
else
{
const auto mask = createMask<T>(size, arguments);
auto out_col = ColumnVector<UInt8>::create(size);
const auto mask = createMask<T>(input_rows_count, arguments);
auto out_col = ColumnVector<UInt8>::create(input_rows_count);
auto & out = out_col->getData();
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out[i] = Impl::apply(val, mask[i]);
return out_col;
@ -140,7 +139,7 @@ private:
out_is_const = true;
ValueType mask = 0;
for (const auto i : collections::range(1, arguments.size()))
for (size_t i = 1; i < arguments.size(); ++i)
{
if (auto pos_col_const = checkAndGetColumnConst<ColumnVector<ValueType>>(arguments[i].column.get()))
{
@ -166,7 +165,7 @@ private:
{
PaddedPODArray<ValueType> mask(size, ValueType{});
for (const auto i : collections::range(1, arguments.size()))
for (size_t i = 1; i < arguments.size(); ++i)
{
const auto * pos_col = arguments[i].column.get();
@ -187,7 +186,7 @@ private:
{
const auto & pos = pos_col->getData();
for (const auto i : collections::range(0, mask.size()))
for (size_t i = 0; i < mask.size(); ++i)
if (pos[i] < 8 * sizeof(ValueType))
mask[i] = mask[i] | (ValueType(1) << pos[i]);
else
@ -205,7 +204,7 @@ private:
const auto new_mask = ValueType(1) << pos;
for (const auto i : collections::range(0, mask.size()))
for (size_t i = 0; i < mask.size(); ++i)
mask[i] = mask[i] | new_mask;
return true;

View File

@ -103,14 +103,11 @@ private:
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
if (!src_data_concrete)
{
return false;
}
for (size_t row = 0; row < rows; ++row)
{
out_vec[row * size_per_row + column_idx] = static_cast<char>(src_data_concrete->getInt(row));
}
return true;
}
};

View File

@ -428,19 +428,17 @@ struct Processor
{}
template <typename FromColumnType, typename ToColumnType>
void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
if constexpr (std::is_same_v<FromColumnType, ColumnString>)
{
const auto & offsets_from = col_from.getOffsets();
auto & vec_to = col_to.getData();
size_t size = offsets_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0 ; i < size; ++i)
for (size_t i = 0 ; i < input_rows_count; ++i)
{
std::string_view from = col_from.getDataAt(i).toView();
vec_to[i] = transform.execute(from, checkOverflow(delta), time_zone, utc_time_zone, scale);
@ -451,32 +449,31 @@ struct Processor
const auto & vec_from = col_from.getData();
auto & vec_to = col_to.getData();
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, utc_time_zone, scale);
}
}
template <typename FromColumnType, typename ToColumnType>
void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
castTypeToEither<
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale); return true; });
&delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale, input_rows_count); return true; });
}
template <typename FromType, typename ToColumnType>
void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
castTypeToEither<
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale); return true; });
&delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale, input_rows_count); return true; });
}
private:
@ -491,19 +488,17 @@ private:
template <typename FromColumnType, typename ToColumnType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const
const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
if constexpr (std::is_same_v<FromColumnType, ColumnString>)
{
const auto & offsets_from = col_from.getOffsets();
auto & vec_to = col_to.getData();
size_t size = offsets_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0 ; i < size; ++i)
for (size_t i = 0 ; i < input_rows_count; ++i)
{
std::string_view from = col_from.getDataAt(i).toView();
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale);
@ -514,26 +509,24 @@ private:
const auto & vec_from = col_from.getData();
auto & vec_to = col_to.getData();
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale);
}
}
template <typename FromType, typename ToColumnType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const
const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
auto & vec_to = col_to.getData();
size_t size = delta.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale);
}
};
@ -542,7 +535,7 @@ private:
template <typename FromDataType, typename ToDataType, typename Transform>
struct DateTimeAddIntervalImpl
{
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale)
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale, size_t input_rows_count)
{
using FromValueType = typename FromDataType::FieldType;
using FromColumnType = typename FromDataType::ColumnType;
@ -561,15 +554,15 @@ struct DateTimeAddIntervalImpl
if (const auto * sources = checkAndGetColumn<FromColumnType>(&source_column))
{
if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column))
processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale);
processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale, input_rows_count);
else
processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale);
processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale, input_rows_count);
}
else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(&source_column))
{
processor.constantVector(
sources_const->template getValue<FromValueType>(),
*col_to, delta_column, time_zone, scale);
*col_to, delta_column, time_zone, scale, input_rows_count);
}
else
{
@ -708,25 +701,25 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const IDataType * from_type = arguments[0].type.get();
WhichDataType which(from_type);
if (which.isDate())
return DateTimeAddIntervalImpl<DataTypeDate, TransformResultDataType<DataTypeDate>, Transform>::execute(Transform{}, arguments, result_type, 0);
return DateTimeAddIntervalImpl<DataTypeDate, TransformResultDataType<DataTypeDate>, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count);
else if (which.isDate32())
return DateTimeAddIntervalImpl<DataTypeDate32, TransformResultDataType<DataTypeDate32>, Transform>::execute(Transform{}, arguments, result_type, 0);
return DateTimeAddIntervalImpl<DataTypeDate32, TransformResultDataType<DataTypeDate32>, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count);
else if (which.isDateTime())
return DateTimeAddIntervalImpl<DataTypeDateTime, TransformResultDataType<DataTypeDateTime>, Transform>::execute(Transform{}, arguments, result_type, 0);
return DateTimeAddIntervalImpl<DataTypeDateTime, TransformResultDataType<DataTypeDateTime>, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count);
else if (which.isDateTime64())
{
const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type);
auto from_scale = datetime64_type->getScale();
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(Transform{}, arguments, result_type, from_scale);
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(Transform{}, arguments, result_type, from_scale, input_rows_count);
}
else if (which.isString())
return DateTimeAddIntervalImpl<DataTypeString, DataTypeDateTime64, Transform>::execute(Transform{}, arguments, result_type, 3);
return DateTimeAddIntervalImpl<DataTypeString, DataTypeDateTime64, Transform>::execute(Transform{}, arguments, result_type, 3, input_rows_count);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", arguments[0].type->getName(), getName());
}

View File

@ -54,7 +54,7 @@ private:
}
template <typename LeftType, typename RightType>
static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg)
static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg, size_t input_rows_count)
{
if (const auto right_arg_typed = checkAndGetColumn<ColumnVector<RightType>>(right_arg))
{
@ -63,12 +63,11 @@ private:
LeftType left_src_data[Impl::rows_per_iteration];
std::fill(std::begin(left_src_data), std::end(left_src_data), left_arg->template getValue<LeftType>());
const auto & right_src_data = right_arg_typed->getData();
const auto src_size = right_src_data.size();
auto & dst_data = dst->getData();
dst_data.resize(src_size);
dst_data.resize(input_rows_count);
const auto rows_remaining = src_size % Impl::rows_per_iteration;
const auto rows_size = src_size - rows_remaining;
const auto rows_remaining = input_rows_count % Impl::rows_per_iteration;
const auto rows_size = input_rows_count - rows_remaining;
for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
Impl::execute(left_src_data, &right_src_data[i], &dst_data[i]);
@ -92,7 +91,7 @@ private:
}
template <typename LeftType, typename RightType>
static ColumnPtr executeTyped(const ColumnVector<LeftType> * left_arg, const IColumn * right_arg)
static ColumnPtr executeTyped(const ColumnVector<LeftType> * left_arg, const IColumn * right_arg, size_t input_rows_count)
{
if (const auto right_arg_typed = checkAndGetColumn<ColumnVector<RightType>>(right_arg))
{
@ -100,12 +99,11 @@ private:
const auto & left_src_data = left_arg->getData();
const auto & right_src_data = right_arg_typed->getData();
const auto src_size = left_src_data.size();
auto & dst_data = dst->getData();
dst_data.resize(src_size);
dst_data.resize(input_rows_count);
const auto rows_remaining = src_size % Impl::rows_per_iteration;
const auto rows_size = src_size - rows_remaining;
const auto rows_remaining = input_rows_count % Impl::rows_per_iteration;
const auto rows_size = input_rows_count - rows_remaining;
for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
Impl::execute(&left_src_data[i], &right_src_data[i], &dst_data[i]);
@ -136,12 +134,11 @@ private:
const auto & left_src_data = left_arg->getData();
RightType right_src_data[Impl::rows_per_iteration];
std::fill(std::begin(right_src_data), std::end(right_src_data), right_arg_typed->template getValue<RightType>());
const auto src_size = left_src_data.size();
auto & dst_data = dst->getData();
dst_data.resize(src_size);
dst_data.resize(input_rows_count);
const auto rows_remaining = src_size % Impl::rows_per_iteration;
const auto rows_size = src_size - rows_remaining;
const auto rows_remaining = input_rows_count % Impl::rows_per_iteration;
const auto rows_size = input_rows_count - rows_remaining;
for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
Impl::execute(&left_src_data[i], right_src_data, &dst_data[i]);
@ -165,7 +162,7 @@ private:
return nullptr;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_left = arguments[0];
const ColumnWithTypeAndName & col_right = arguments[1];
@ -202,7 +199,7 @@ private:
if (const auto left_arg_typed = checkAndGetColumn<ColVecLeft>(left_arg))
{
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg)))
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg, input_rows_count)))
return true;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}",
@ -210,7 +207,7 @@ private:
}
if (const auto left_arg_typed = checkAndGetColumnConst<ColVecLeft>(left_arg))
{
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg)))
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg, input_rows_count)))
return true;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}",

View File

@ -106,42 +106,40 @@ private:
}
template <typename T, typename ReturnType>
static ColumnPtr execute(const ColumnVector<T> * col)
static ColumnPtr execute(const ColumnVector<T> * col, size_t input_rows_count)
{
const auto & src_data = col->getData();
const size_t size = src_data.size();
auto dst = ColumnVector<ReturnType>::create();
auto & dst_data = dst->getData();
dst_data.resize(size);
dst_data.resize(input_rows_count);
executeInIterations(src_data.data(), dst_data.data(), size);
executeInIterations(src_data.data(), dst_data.data(), input_rows_count);
return dst;
}
template <typename T, typename ReturnType>
static ColumnPtr execute(const ColumnDecimal<T> * col)
static ColumnPtr execute(const ColumnDecimal<T> * col, size_t input_rows_count)
{
const auto & src_data = col->getData();
const size_t size = src_data.size();
UInt32 scale = col->getScale();
auto dst = ColumnVector<ReturnType>::create();
auto & dst_data = dst->getData();
dst_data.resize(size);
dst_data.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
dst_data[i] = DecimalUtils::convertTo<ReturnType>(src_data[i], scale);
executeInIterations(dst_data.data(), dst_data.data(), size);
executeInIterations(dst_data.data(), dst_data.data(), input_rows_count);
return dst;
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col = arguments[0];
ColumnPtr res;
@ -156,7 +154,7 @@ private:
const auto col_vec = checkAndGetColumn<ColVecType>(col.column.get());
if (col_vec == nullptr)
return false;
return (res = execute<Type, ReturnType>(col_vec)) != nullptr;
return (res = execute<Type, ReturnType>(col_vec, input_rows_count)) != nullptr;
};
if (!callOnBasicType<void, true, true, true, false>(col.type->getTypeId(), call))

View File

@ -53,39 +53,37 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * in = arguments.front().column.get();
ColumnPtr res;
if (!((res = execute<UInt8>(in))
|| (res = execute<UInt16>(in))
|| (res = execute<UInt32>(in))
|| (res = execute<UInt64>(in))
|| (res = execute<Int8>(in))
|| (res = execute<Int16>(in))
|| (res = execute<Int32>(in))
|| (res = execute<Int64>(in))
|| (res = execute<Float32>(in))
|| (res = execute<Float64>(in))))
if (!((res = execute<UInt8>(in, input_rows_count))
|| (res = execute<UInt16>(in, input_rows_count))
|| (res = execute<UInt32>(in, input_rows_count))
|| (res = execute<UInt64>(in, input_rows_count))
|| (res = execute<Int8>(in, input_rows_count))
|| (res = execute<Int16>(in, input_rows_count))
|| (res = execute<Int32>(in, input_rows_count))
|| (res = execute<Int64>(in, input_rows_count))
|| (res = execute<Float32>(in, input_rows_count))
|| (res = execute<Float64>(in, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
return res;
}
template <typename T>
ColumnPtr execute(const IColumn * in_untyped) const
ColumnPtr execute(const IColumn * in_untyped, size_t input_rows_count) const
{
if (const auto in = checkAndGetColumn<ColumnVector<T>>(in_untyped))
{
const auto size = in->size();
auto out = ColumnUInt8::create(size);
auto out = ColumnUInt8::create(input_rows_count);
const auto & in_data = in->getData();
auto & out_data = out->getData();
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out_data[i] = Impl::execute(in_data[i]);
return out;

View File

@ -132,9 +132,7 @@ public:
}
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; i++)
{
types[i] = std::make_shared<DataTypeUInt64>();
}
return std::make_shared<DataTypeTuple>(types);
}
};

View File

@ -71,7 +71,7 @@ public:
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
Impl::vector(col->getChars(), col->getOffsets(), vec_res);
Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count);
return col_res;
}
@ -80,7 +80,7 @@ public:
if (Impl::is_fixed_to_constant)
{
ResultType res = 0;
Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res);
Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res, input_rows_count);
return result_type->createColumnConst(col_fixed->size(), toField(res));
}
@ -90,7 +90,7 @@ public:
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_fixed->size());
Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res);
Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res, input_rows_count);
return col_res;
}
@ -101,7 +101,7 @@ public:
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_arr->size());
Impl::array(col_arr->getOffsets(), vec_res);
Impl::array(col_arr->getOffsets(), vec_res, input_rows_count);
return col_res;
}
@ -112,7 +112,7 @@ public:
vec_res.resize(col_map->size());
const auto & col_nested = col_map->getNestedColumn();
Impl::array(col_nested.getOffsets(), vec_res);
Impl::array(col_nested.getOffsets(), vec_res, input_rows_count);
return col_res;
}
else if (const ColumnUUID * col_uuid = checkAndGetColumn<ColumnUUID>(column.get()))
@ -120,7 +120,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_uuid->size());
Impl::uuid(col_uuid->getData(), input_rows_count, vec_res);
Impl::uuid(col_uuid->getData(), input_rows_count, vec_res, input_rows_count);
return col_res;
}
else if (const ColumnIPv6 * col_ipv6 = checkAndGetColumn<ColumnIPv6>(column.get()))
@ -128,7 +128,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_ipv6->size());
Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res);
Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res, input_rows_count);
return col_res;
}
else if (const ColumnIPv4 * col_ipv4 = checkAndGetColumn<ColumnIPv4>(column.get()))
@ -136,7 +136,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_ipv4->size());
Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res);
Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res, input_rows_count);
return col_res;
}
else

View File

@ -45,7 +45,7 @@ public:
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr column_haystack = arguments[0].column;
column_haystack = column_haystack->convertToFullColumnIfConst();
@ -70,7 +70,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_const->getValue<String>(),
col_replacement_const->getValue<String>(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack && col_needle_vector && col_replacement_const)
@ -79,7 +80,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_vector->getChars(), col_needle_vector->getOffsets(),
col_replacement_const->getValue<String>(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack && col_needle_const && col_replacement_vector)
@ -88,7 +90,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_const->getValue<String>(),
col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack && col_needle_vector && col_replacement_vector)
@ -97,7 +100,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_vector->getChars(), col_needle_vector->getOffsets(),
col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack_fixed && col_needle_const && col_replacement_const)
@ -106,7 +110,8 @@ public:
col_haystack_fixed->getChars(), col_haystack_fixed->getN(),
col_needle_const->getValue<String>(),
col_replacement_const->getValue<String>(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else

View File

@ -632,7 +632,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
@ -646,11 +646,10 @@ public:
const ColumnString::Chars & in_vec = col->getChars();
const ColumnString::Offsets & in_offsets = col->getOffsets();
size_t size = in_offsets.size();
out_offsets.resize(size);
out_offsets.resize(input_rows_count);
size_t max_out_len = 0;
for (size_t i = 0; i < in_offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1])
- /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1;
@ -662,7 +661,7 @@ public:
char * pos = begin;
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t new_offset = in_offsets[i];
@ -691,15 +690,14 @@ public:
const ColumnString::Chars & in_vec = col_fix_string->getChars();
const size_t n = col_fix_string->getN();
size_t size = col_fix_string->size();
out_offsets.resize(size);
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size);
out_offsets.resize(input_rows_count);
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * input_rows_count);
char * begin = reinterpret_cast<char *>(out_vec.data());
char * pos = begin;
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t new_offset = prev_offset + n;

View File

@ -60,17 +60,17 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr res;
if (!((res = executeType<UInt8>(arguments))
|| (res = executeType<UInt16>(arguments))
|| (res = executeType<UInt32>(arguments))
|| (res = executeType<UInt64>(arguments))
|| (res = executeType<Int8>(arguments))
|| (res = executeType<Int16>(arguments))
|| (res = executeType<Int32>(arguments))
|| (res = executeType<Int64>(arguments))))
if (!((res = executeType<UInt8>(arguments, input_rows_count))
|| (res = executeType<UInt16>(arguments, input_rows_count))
|| (res = executeType<UInt32>(arguments, input_rows_count))
|| (res = executeType<UInt64>(arguments, input_rows_count))
|| (res = executeType<Int8>(arguments, input_rows_count))
|| (res = executeType<Int16>(arguments, input_rows_count))
|| (res = executeType<Int32>(arguments, input_rows_count))
|| (res = executeType<Int64>(arguments, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
@ -98,7 +98,7 @@ private:
}
template <typename T>
ColumnPtr executeType(const ColumnsWithTypeAndName & columns) const
ColumnPtr executeType(const ColumnsWithTypeAndName & columns, size_t input_rows_count) const
{
if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(columns[0].column.get()))
{
@ -107,13 +107,12 @@ private:
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
ColumnString::Chars & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
size_t size = vec_from.size();
data_to.resize(size * 2);
offsets_to.resize(size);
data_to.resize(input_rows_count * 2);
offsets_to.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
writeBitmask<T>(vec_from[i], buf_to);
writeChar(0, buf_to);
@ -244,7 +243,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
template <typename T>
ColumnPtr executeType(const IColumn * column) const
ColumnPtr executeType(const IColumn * column, size_t input_rows_count) const
{
const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(column);
if (!col_from)
@ -257,13 +256,12 @@ public:
auto & result_array_offsets_data = result_array_offsets->getData();
auto & vec_from = col_from->getData();
size_t size = vec_from.size();
result_array_offsets_data.resize(size);
result_array_values_data.reserve(size * 2);
result_array_offsets_data.resize(input_rows_count);
result_array_values_data.reserve(input_rows_count * 2);
using UnsignedType = make_unsigned_t<T>;
for (size_t row = 0; row < size; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
UnsignedType x = static_cast<UnsignedType>(vec_from[row]);
@ -302,24 +300,24 @@ public:
return result_column;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IColumn * in_column = arguments[0].column.get();
ColumnPtr result_column;
if (!((result_column = executeType<UInt8>(in_column))
|| (result_column = executeType<UInt16>(in_column))
|| (result_column = executeType<UInt32>(in_column))
|| (result_column = executeType<UInt32>(in_column))
|| (result_column = executeType<UInt64>(in_column))
|| (result_column = executeType<UInt128>(in_column))
|| (result_column = executeType<UInt256>(in_column))
|| (result_column = executeType<Int8>(in_column))
|| (result_column = executeType<Int16>(in_column))
|| (result_column = executeType<Int32>(in_column))
|| (result_column = executeType<Int64>(in_column))
|| (result_column = executeType<Int128>(in_column))
|| (result_column = executeType<Int256>(in_column))))
if (!((result_column = executeType<UInt8>(in_column, input_rows_count))
|| (result_column = executeType<UInt16>(in_column, input_rows_count))
|| (result_column = executeType<UInt32>(in_column, input_rows_count))
|| (result_column = executeType<UInt32>(in_column, input_rows_count))
|| (result_column = executeType<UInt64>(in_column, input_rows_count))
|| (result_column = executeType<UInt128>(in_column, input_rows_count))
|| (result_column = executeType<UInt256>(in_column, input_rows_count))
|| (result_column = executeType<Int8>(in_column, input_rows_count))
|| (result_column = executeType<Int16>(in_column, input_rows_count))
|| (result_column = executeType<Int32>(in_column, input_rows_count))
|| (result_column = executeType<Int64>(in_column, input_rows_count))
|| (result_column = executeType<Int128>(in_column, input_rows_count))
|| (result_column = executeType<Int256>(in_column, input_rows_count))))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of first argument of function {}",

View File

@ -155,7 +155,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IDataType * from_type = arguments[0].type.get();
const auto * array_type = typeid_cast<const DataTypeArray *>(from_type);
@ -165,21 +165,21 @@ public:
WhichDataType which(nested_type);
if (which.isUInt8())
return executeBitmapData<UInt8>(argument_types, arguments);
return executeBitmapData<UInt8>(argument_types, arguments, input_rows_count);
else if (which.isUInt16())
return executeBitmapData<UInt16>(argument_types, arguments);
return executeBitmapData<UInt16>(argument_types, arguments, input_rows_count);
else if (which.isUInt32())
return executeBitmapData<UInt32>(argument_types, arguments);
return executeBitmapData<UInt32>(argument_types, arguments, input_rows_count);
else if (which.isUInt64())
return executeBitmapData<UInt64>(argument_types, arguments);
return executeBitmapData<UInt64>(argument_types, arguments, input_rows_count);
else if (which.isInt8())
return executeBitmapData<Int8>(argument_types, arguments);
return executeBitmapData<Int8>(argument_types, arguments, input_rows_count);
else if (which.isInt16())
return executeBitmapData<Int16>(argument_types, arguments);
return executeBitmapData<Int16>(argument_types, arguments, input_rows_count);
else if (which.isInt32())
return executeBitmapData<Int32>(argument_types, arguments);
return executeBitmapData<Int32>(argument_types, arguments, input_rows_count);
else if (which.isInt64())
return executeBitmapData<Int64>(argument_types, arguments);
return executeBitmapData<Int64>(argument_types, arguments, input_rows_count);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected type {} of argument of function {}",
from_type->getName(), getName());
@ -187,7 +187,7 @@ public:
private:
template <typename T>
ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments) const
ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
{
// input data
const ColumnArray * array = typeid_cast<const ColumnArray *>(arguments[0].column.get());
@ -203,10 +203,10 @@ private:
AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<T>::name(), action, argument_types, params_row, properties);
auto col_to = ColumnAggregateFunction::create(bitmap_function);
col_to->reserve(offsets.size());
col_to->reserve(input_rows_count);
size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bitmap_data

View File

@ -23,7 +23,7 @@ namespace
constexpr size_t max_string_size = 1UL << 15;
template <typename ModelMap>
ALWAYS_INLINE inline Float64 naiveBayes(
Float64 naiveBayes(
const FrequencyHolder::EncodingMap & standard,
const ModelMap & model,
Float64 max_result)
@ -51,7 +51,7 @@ namespace
/// Count how many times each bigram occurs in the text.
template <typename ModelMap>
ALWAYS_INLINE inline void calculateStats(
void calculateStats(
const UInt8 * data,
const size_t size,
ModelMap & model)
@ -77,24 +77,25 @@ struct CharsetClassificationImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const auto & encodings_freq = FrequencyHolder::getInstance().getEncodingsFrequency();
if constexpr (detect_language)
/// 2 chars for ISO code + 1 zero byte
res_data.reserve(offsets.size() * 3);
res_data.reserve(input_rows_count * 3);
else
/// Mean charset length is 8
res_data.reserve(offsets.size() * 8);
res_data.reserve(input_rows_count * 8);
res_offsets.resize(offsets.size());
res_offsets.resize(input_rows_count);
size_t current_result_offset = 0;
double zero_frequency_log = log(zero_frequency);
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * str = data.data() + offsets[i - 1];
const size_t str_len = offsets[i] - offsets[i - 1] - 1;

View File

@ -341,7 +341,7 @@ class FunctionIPv4NumToString : public IFunction
{
private:
template <typename ArgType>
ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const
ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
{
using ColumnType = ColumnVector<ArgType>;
@ -356,12 +356,12 @@ private:
ColumnString::Chars & vec_res = col_res->getChars();
ColumnString::Offsets & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(vec_in.size());
vec_res.resize(input_rows_count * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(input_rows_count);
char * begin = reinterpret_cast<char *>(vec_res.data());
char * pos = begin;
for (size_t i = 0; i < vec_in.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
DB::formatIPv4(reinterpret_cast<const unsigned char*>(&vec_in[i]), sizeof(ArgType), pos, mask_tail_octets, "xxx");
offsets_res[i] = pos - begin;
@ -532,7 +532,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -542,11 +542,11 @@ public:
auto col_res = ColumnIPv6::create();
auto & vec_res = col_res->getData();
vec_res.resize(col_in->size());
vec_res.resize(input_rows_count);
const auto & vec_in = col_in->getData();
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
mapIPv4ToIPv6(vec_in[i], reinterpret_cast<UInt8 *>(&vec_res[i].toUnderType()));
return col_res;
@ -557,7 +557,7 @@ public:
auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH);
auto & vec_res = col_res->getChars();
vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH);
vec_res.resize(input_rows_count * IPV6_BINARY_LENGTH);
const auto & vec_in = col_in->getData();
@ -742,7 +742,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
@ -751,13 +751,13 @@ public:
auto col_res = ColumnUInt64::create();
ColumnUInt64::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
vec_res.resize(input_rows_count);
const ColumnString::Chars & vec_src = col->getChars();
const ColumnString::Offsets & offsets_src = col->getOffsets();
size_t prev_offset = 0;
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t current_offset = offsets_src[i];
size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte
@ -1054,7 +1054,7 @@ public:
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnString * input_column = checkAndGetColumn<ColumnString>(arguments[0].column.get());
@ -1067,14 +1067,14 @@ public:
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container & vec_res = col_res->getData();
vec_res.resize(input_column->size());
vec_res.resize(input_rows_count);
const ColumnString::Chars & vec_src = input_column->getChars();
const ColumnString::Offsets & offsets_src = input_column->getOffsets();
size_t prev_offset = 0;
UInt32 result = 0;
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = DB::parseIPv4whole(reinterpret_cast<const char *>(&vec_src[prev_offset]), reinterpret_cast<unsigned char *>(&result));
prev_offset = offsets_src[i];
@ -1110,7 +1110,7 @@ public:
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnString * input_column = checkAndGetColumn<ColumnString>(arguments[0].column.get());
if (!input_column)
@ -1122,14 +1122,14 @@ public:
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container & vec_res = col_res->getData();
vec_res.resize(input_column->size());
vec_res.resize(input_rows_count);
const ColumnString::Chars & vec_src = input_column->getChars();
const ColumnString::Offsets & offsets_src = input_column->getOffsets();
size_t prev_offset = 0;
char buffer[IPV6_BINARY_LENGTH];
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = DB::parseIPv6whole(reinterpret_cast<const char *>(&vec_src[prev_offset]),
reinterpret_cast<const char *>(&vec_src[offsets_src[i] - 1]),

View File

@ -177,7 +177,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -189,21 +189,20 @@ public:
"Illegal type {} of column {} argument of function {}, expected FixedString({})",
col_type_name.type->getName(), col_in->getName(), getName(), uuid_bytes_length);
const auto size = col_in->size();
const auto & vec_in = col_in->getChars();
auto col_res = ColumnString::create();
ColumnString::Chars & vec_res = col_res->getChars();
ColumnString::Offsets & offsets_res = col_res->getOffsets();
vec_res.resize(size * (uuid_text_length + 1));
offsets_res.resize(size);
vec_res.resize(input_rows_count * (uuid_text_length + 1));
offsets_res.resize(input_rows_count);
size_t src_offset = 0;
size_t dst_offset = 0;
const UUIDSerializer uuid_serializer(variant);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
uuid_serializer.deserialize(&vec_in[src_offset], &vec_res[dst_offset]);
src_offset += uuid_bytes_length;
@ -256,7 +255,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -266,17 +265,16 @@ public:
{
const auto & vec_in = col_in->getChars();
const auto & offsets_in = col_in->getOffsets();
const size_t size = offsets_in.size();
auto col_res = ColumnFixedString::create(uuid_bytes_length);
ColumnString::Chars & vec_res = col_res->getChars();
vec_res.resize(size * uuid_bytes_length);
vec_res.resize(input_rows_count * uuid_bytes_length);
size_t src_offset = 0;
size_t dst_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
/// If string has incorrect length - then return zero UUID.
/// If string has correct length but contains something not like UUID - implementation specific behaviour.
@ -300,18 +298,17 @@ public:
"Illegal type {} of column {} argument of function {}, expected FixedString({})",
col_type_name.type->getName(), col_in_fixed->getName(), getName(), uuid_text_length);
const auto size = col_in_fixed->size();
const auto & vec_in = col_in_fixed->getChars();
auto col_res = ColumnFixedString::create(uuid_bytes_length);
ColumnString::Chars & vec_res = col_res->getChars();
vec_res.resize(size * uuid_bytes_length);
vec_res.resize(input_rows_count * uuid_bytes_length);
size_t src_offset = 0;
size_t dst_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
uuid_serializer.serialize(&vec_in[src_offset], &vec_res[dst_offset]);
src_offset += uuid_text_length;
@ -359,7 +356,7 @@ public:
return std::make_shared<DataTypeFixedString>(uuid_bytes_length);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -370,16 +367,15 @@ public:
{
const auto & vec_in = col_in->getData();
const UUID * uuids = vec_in.data();
const size_t size = vec_in.size();
auto col_res = ColumnFixedString::create(uuid_bytes_length);
ColumnString::Chars & vec_res = col_res->getChars();
vec_res.resize(size * uuid_bytes_length);
vec_res.resize(input_rows_count * uuid_bytes_length);
size_t dst_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]);
uint64_t loBytes = DB::UUIDHelpers::getLowBytes(uuids[i]);
@ -448,7 +444,7 @@ public:
return std::make_shared<DataTypeDateTime64>(datetime_scale, timezone);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -457,12 +453,11 @@ public:
{
const auto & vec_in = col_in->getData();
const UUID * uuids = vec_in.data();
const size_t size = vec_in.size();
auto col_res = ColumnDateTime64::create(size, datetime_scale);
auto col_res = ColumnDateTime64::create(input_rows_count, datetime_scale);
auto & vec_res = col_res->getData();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]);
const uint64_t ms = ((hiBytes & 0xf000) == 0x7000) ? (hiBytes >> 16) : 0;

View File

@ -151,36 +151,36 @@ struct Processor
template <typename FirstArgVectorType, typename SecondArgType>
void NO_INLINE
vectorConstant(const FirstArgVectorType & vec_first, const SecondArgType second_value,
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale,
size_t input_rows_count) const
{
size_t size = vec_first.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_first[i], second_value, scale_a, scale_b, result_scale);
}
template <typename FirstArgVectorType, typename SecondArgVectorType>
void NO_INLINE
vectorVector(const FirstArgVectorType & vec_first, const SecondArgVectorType & vec_second,
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale,
size_t input_rows_count) const
{
size_t size = vec_first.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_first[i], vec_second[i], scale_a, scale_b, result_scale);
}
template <typename FirstArgType, typename SecondArgVectorType>
void NO_INLINE
constantVector(const FirstArgType & first_value, const SecondArgVectorType & vec_second,
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale,
size_t input_rows_count) const
{
size_t size = vec_second.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(first_value, vec_second[i], scale_a, scale_b, result_scale);
}
};
@ -189,7 +189,7 @@ struct Processor
template <typename FirstArgType, typename SecondArgType, typename ResultType, typename Transform>
struct DecimalArithmeticsImpl
{
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count)
{
using FirstArgValueType = typename FirstArgType::FieldType;
using FirstArgColumnType = typename FirstArgType::ColumnType;
@ -214,13 +214,13 @@ struct DecimalArithmeticsImpl
if (first_col)
{
if (second_col_const)
op.vectorConstant(first_col->getData(), second_col_const->template getValue<SecondArgValueType>(), col_to->getData(), scale_a, scale_b, result_scale);
op.vectorConstant(first_col->getData(), second_col_const->template getValue<SecondArgValueType>(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count);
else
op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale);
op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count);
}
else if (first_col_const)
{
op.constantVector(first_col_const->template getValue<FirstArgValueType>(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale);
op.constantVector(first_col_const->template getValue<FirstArgValueType>(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count);
}
else
{
@ -293,14 +293,14 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
return resolveOverload(arguments, result_type);
return resolveOverload(arguments, result_type, input_rows_count);
}
private:
// long resolver to call proper templated func
ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
WhichDataType which_dividend(arguments[0].type.get());
WhichDataType which_divisor(arguments[1].type.get());
@ -309,26 +309,26 @@ private:
{
using DividendType = DataTypeDecimal32;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}
else if (which_dividend.isDecimal64())
{
using DividendType = DataTypeDecimal64;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}
@ -336,13 +336,13 @@ private:
{
using DividendType = DataTypeDecimal128;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}
@ -350,13 +350,13 @@ private:
{
using DividendType = DataTypeDecimal256;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}

View File

@ -181,7 +181,7 @@ public:
bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// The dictionary key that defines the "point of view".
std::string dict_key;
@ -205,10 +205,9 @@ public:
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
typename ColumnVector<T>::Container & vec_to = col_to->getData();
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(vec_from[i], dict);
return col_to;
@ -273,7 +272,7 @@ public:
bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// The dictionary key that defines the "point of view".
std::string dict_key;
@ -303,10 +302,9 @@ public:
const typename ColumnVector<T>::Container & vec_from1 = col_vec1->getData();
const typename ColumnVector<T>::Container & vec_from2 = col_vec2->getData();
typename ColumnUInt8::Container & vec_to = col_to->getData();
size_t size = vec_from1.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(vec_from1[i], vec_from2[i], dict);
return col_to;
@ -318,10 +316,9 @@ public:
const typename ColumnVector<T>::Container & vec_from1 = col_vec1->getData();
const T const_from2 = col_const2->template getValue<T>();
typename ColumnUInt8::Container & vec_to = col_to->getData();
size_t size = vec_from1.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(vec_from1[i], const_from2, dict);
return col_to;
@ -333,10 +330,9 @@ public:
const T const_from1 = col_const1->template getValue<T>();
const typename ColumnVector<T>::Container & vec_from2 = col_vec2->getData();
typename ColumnUInt8::Container & vec_to = col_to->getData();
size_t size = vec_from2.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(const_from1, vec_from2[i], dict);
return col_to;
@ -405,7 +401,7 @@ public:
bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// The dictionary key that defines the "point of view".
std::string dict_key;
@ -432,11 +428,10 @@ public:
auto & res_values = col_values->getData();
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
size_t size = vec_from.size();
res_offsets.resize(size);
res_values.reserve(size * 4);
res_offsets.resize(input_rows_count);
res_values.reserve(input_rows_count * 4);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
T cur = vec_from[i];
for (size_t depth = 0; cur && depth < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++depth)

View File

@ -125,7 +125,7 @@ public:
}
String error;
for (const auto i : collections::range(0, input_rows_count))
for (size_t i = 0; i < input_rows_count; ++i)
{
if (!col_json_const)
{
@ -314,7 +314,7 @@ private:
static size_t calculateMaxSize(const ColumnString::Offsets & offsets)
{
size_t max_size = 0;
for (const auto i : collections::range(0, offsets.size()))
for (size_t i = 0; i < offsets.size(); ++i)
{
size_t size = offsets[i] - offsets[i - 1];
max_size = std::max(max_size, size);

View File

@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED;
struct FunctionDetectLanguageImpl
{
static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string)
static std::string_view codeISO(std::string_view code_string)
{
if (code_string.ends_with("-Latn"))
code_string.remove_suffix(code_string.size() - 5);
@ -63,16 +63,17 @@ struct FunctionDetectLanguageImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
/// Constant 3 is based on the fact that in general we need 2 characters for ISO code + 1 zero byte
res_data.reserve(offsets.size() * 3);
res_offsets.resize(offsets.size());
res_data.reserve(input_rows_count * 3);
res_offsets.resize(input_rows_count);
bool is_reliable;
size_t res_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * str = data.data() + offsets[i - 1];
const size_t str_len = offsets[i] - offsets[i - 1] - 1;

View File

@ -48,7 +48,7 @@ using UInt8Container = ColumnUInt8::Container;
using UInt8ColumnPtrs = std::vector<const ColumnUInt8 *>;
MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, const bool make_nullable)
MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, bool make_nullable)
{
const size_t rows_count = ternary_data.size();

View File

@ -40,17 +40,18 @@ struct FunctionDetectProgrammingLanguageImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const auto & programming_freq = FrequencyHolder::getInstance().getProgrammingFrequency();
/// Constant 5 is arbitrary
res_data.reserve(offsets.size() * 5);
res_offsets.resize(offsets.size());
res_data.reserve(input_rows_count * 5);
res_offsets.resize(input_rows_count);
size_t res_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * str = data.data() + offsets[i - 1];
const size_t str_len = offsets[i] - offsets[i - 1] - 1;

View File

@ -80,8 +80,7 @@ public:
auto col_to = ColumnVector<ToType>::create();
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
size_t size = input_rows_count;
vec_to.resize(size);
vec_to.resize(input_rows_count);
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(ToType));
return col_to;

View File

@ -37,12 +37,12 @@ struct FunctionStringDistanceImpl
const ColumnString::Offsets & haystack_offsets,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<ResultType> & res)
PaddedPODArray<ResultType> & res,
size_t input_rows_count)
{
size_t size = res.size();
const char * haystack = reinterpret_cast<const char *>(haystack_data.data());
const char * needle = reinterpret_cast<const char *>(needle_data.data());
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = Op::process(
haystack + haystack_offsets[i - 1],
@ -56,13 +56,13 @@ struct FunctionStringDistanceImpl
const String & haystack,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<ResultType> & res)
PaddedPODArray<ResultType> & res,
size_t input_rows_count)
{
const char * haystack_data = haystack.data();
size_t haystack_size = haystack.size();
const char * needle = reinterpret_cast<const char *>(needle_data.data());
size_t size = res.size();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = Op::process(haystack_data, haystack_size,
needle + needle_offsets[i - 1], needle_offsets[i] - needle_offsets[i - 1] - 1);
@ -73,9 +73,10 @@ struct FunctionStringDistanceImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const String & needle,
PaddedPODArray<ResultType> & res)
PaddedPODArray<ResultType> & res,
size_t input_rows_count)
{
constantVector(needle, data, offsets, res);
constantVector(needle, data, offsets, res, input_rows_count);
}
};

View File

@ -315,9 +315,9 @@ struct SimHashImpl
return getSimHash(finger_vec);
}
static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray<UInt64> & res)
static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * one_data = &data[offsets[i - 1]];
const size_t data_size = offsets[i] - offsets[i - 1] - 1;
@ -543,12 +543,13 @@ struct MinHashImpl
PaddedPODArray<UInt64> * res1,
PaddedPODArray<UInt64> * res2,
ColumnTuple * res1_strings,
ColumnTuple * res2_strings)
ColumnTuple * res2_strings,
size_t input_rows_count)
{
MinHeap min_heap;
MaxHeap max_heap;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * one_data = &data[offsets[i - 1]];
const size_t data_size = offsets[i] - offsets[i - 1] - 1;

View File

@ -135,7 +135,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
@ -152,9 +152,9 @@ public:
{
auto col_res = ColumnVector<UInt64>::create();
auto & vec_res = col_res->getData();
vec_res.resize(column->size());
vec_res.resize(input_rows_count);
const ColumnString & col_str_vector = checkAndGetColumn<ColumnString>(*column);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res, input_rows_count);
return col_res;
}
else if constexpr (is_arg) // Min hash arg
@ -171,7 +171,7 @@ public:
auto max_tuple = ColumnTuple::create(std::move(max_columns));
const ColumnString & col_str_vector = checkAndGetColumn<ColumnString>(*column);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get());
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get(), input_rows_count);
MutableColumns tuple_columns;
tuple_columns.emplace_back(std::move(min_tuple));
@ -184,10 +184,10 @@ public:
auto col_h2 = ColumnVector<UInt64>::create();
auto & vec_h1 = col_h1->getData();
auto & vec_h2 = col_h2->getData();
vec_h1.resize(column->size());
vec_h2.resize(column->size());
vec_h1.resize(input_rows_count);
vec_h2.resize(input_rows_count);
const ColumnString & col_str_vector = checkAndGetColumn<ColumnString>(*column);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr, input_rows_count);
MutableColumns tuple_columns;
tuple_columns.emplace_back(std::move(col_h1));
tuple_columns.emplace_back(std::move(col_h2));

View File

@ -224,7 +224,7 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
{
@ -233,11 +233,10 @@ public:
const typename ColumnString::Chars & data = col_from->getChars();
const typename ColumnString::Offsets & offsets = col_from->getOffsets();
auto & chars_to = col_to->getChars();
const auto size = offsets.size();
chars_to.resize(size * Impl::length);
chars_to.resize(input_rows_count * Impl::length);
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[current_offset]),
@ -253,11 +252,10 @@ public:
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
const auto size = col_from_fix->size();
auto & chars_to = col_to->getChars();
const auto length = col_from_fix->getN();
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
chars_to.resize(input_rows_count * Impl::length);
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
@ -268,11 +266,10 @@ public:
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnIPv6::Container & data = col_from_ip->getData();
const auto size = col_from_ip->size();
auto & chars_to = col_to->getChars();
const auto length = sizeof(IPv6::UnderlyingType);
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
chars_to.resize(input_rows_count * Impl::length);
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));

View File

@ -90,7 +90,7 @@ struct NgramDistanceImpl
((cont[Offset + I] = std::tolower(cont[Offset + I])), ...);
}
static ALWAYS_INLINE size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end)
static size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end)
{
/// Offset before which we copy some data.
constexpr size_t padding_offset = default_padding - N + 1;
@ -120,7 +120,7 @@ struct NgramDistanceImpl
return default_padding;
}
static ALWAYS_INLINE size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end)
static size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end)
{
/// The same copying as described in the function above.
memcpy(code_points, code_points + default_padding - N + 1, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(CodePoint));
@ -195,7 +195,7 @@ struct NgramDistanceImpl
}
template <bool save_ngrams>
static ALWAYS_INLINE inline size_t calculateNeedleStats(
static inline size_t calculateNeedleStats(
const char * data,
const size_t size,
NgramCount * ngram_stats,
@ -228,7 +228,7 @@ struct NgramDistanceImpl
}
template <bool reuse_stats>
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
static inline UInt64 calculateHaystackStatsAndMetric(
const char * data,
const size_t size,
NgramCount * ngram_stats,
@ -318,9 +318,9 @@ struct NgramDistanceImpl
const ColumnString::Offsets & haystack_offsets,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
const size_t haystack_offsets_size = haystack_offsets.size();
size_t prev_haystack_offset = 0;
size_t prev_needle_offset = 0;
@ -331,7 +331,7 @@ struct NgramDistanceImpl
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
for (size_t i = 0; i < haystack_offsets_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
const size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
@ -391,12 +391,13 @@ struct NgramDistanceImpl
std::string haystack,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
/// For symmetric version it is better to use vector_constant
if constexpr (symmetric)
{
vectorConstant(needle_data, needle_offsets, std::move(haystack), res);
vectorConstant(needle_data, needle_offsets, std::move(haystack), res, input_rows_count);
}
else
{
@ -404,7 +405,6 @@ struct NgramDistanceImpl
haystack.resize(haystack_size + default_padding);
/// For logic explanation see vector_vector function.
const size_t needle_offsets_size = needle_offsets.size();
size_t prev_offset = 0;
std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
@ -412,7 +412,7 @@ struct NgramDistanceImpl
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
for (size_t i = 0; i < needle_offsets_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * needle = reinterpret_cast<const char *>(&needle_data[prev_offset]);
const size_t needle_size = needle_offsets[i] - prev_offset - 1;
@ -456,7 +456,8 @@ struct NgramDistanceImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
std::string needle,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
/// zeroing our map
std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
@ -472,7 +473,7 @@ struct NgramDistanceImpl
size_t distance = needle_stats_size;
size_t prev_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * haystack = &data[prev_offset];
const size_t haystack_size = offsets[i] - prev_offset - 1;

View File

@ -57,7 +57,7 @@ public:
return std::make_shared<DataTypeNumber<typename Impl::ResultType>>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
using ResultType = typename Impl::ResultType;
@ -90,7 +90,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(column_haystack->size());
vec_res.resize(input_rows_count);
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
@ -110,7 +110,7 @@ public:
Impl::max_string_size);
}
}
Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res);
Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res, input_rows_count);
}
else if (col_haystack_vector && col_needle_vector)
{
@ -119,7 +119,8 @@ public:
col_haystack_vector->getOffsets(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
vec_res);
vec_res,
input_rows_count);
}
else if (col_haystack_const && col_needle_vector)
{
@ -136,7 +137,7 @@ public:
Impl::max_string_size);
}
}
Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res, input_rows_count);
}
else
{

View File

@ -55,7 +55,7 @@ public:
return arguments[0];
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
const ColumnString * col = checkAndGetColumn<ColumnString>(column.get());
@ -65,7 +65,7 @@ public:
arguments[0].column->getName(), getName());
auto col_res = ColumnString::create();
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
};
@ -104,7 +104,7 @@ public:
return std::make_shared<DataTypeFloat32>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
const ColumnString * col = checkAndGetColumn<ColumnString>(column.get());
@ -115,9 +115,9 @@ public:
auto col_res = ColumnVector<Float32>::create();
ColumnVector<Float32>::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
vec_res.resize(input_rows_count);
Impl::vector(col->getChars(), col->getOffsets(), vec_res);
Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count);
return col_res;
}
};

View File

@ -130,7 +130,7 @@ struct TimeWindowImpl
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name);
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name);
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count);
};
template <TimeWindowFunctionName type>
@ -196,7 +196,7 @@ struct TimeWindowImpl<TUMBLE>
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto & interval_column = arguments[1];
@ -214,38 +214,37 @@ struct TimeWindowImpl<TUMBLE>
{
/// TODO: add proper support for fractional seconds
case IntervalKind::Kind::Second:
return executeTumble<UInt32, IntervalKind::Kind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Second>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Minute:
return executeTumble<UInt32, IntervalKind::Kind::Minute>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Minute>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Hour:
return executeTumble<UInt32, IntervalKind::Kind::Hour>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Hour>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Day:
return executeTumble<UInt32, IntervalKind::Kind::Day>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Day>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Week:
return executeTumble<UInt16, IntervalKind::Kind::Week>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Week>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Month:
return executeTumble<UInt16, IntervalKind::Kind::Month>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Month>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Quarter:
return executeTumble<UInt16, IntervalKind::Kind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Year:
return executeTumble<UInt16, IntervalKind::Kind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Year>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
default:
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
}
}
template <typename ToType, IntervalKind::Kind unit>
static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone)
static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone, size_t input_rows_count)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
for (size_t i = 0; i != size; ++i)
start_data.resize(input_rows_count);
end_data.resize(input_rows_count);
for (size_t i = 0; i != input_rows_count; ++i)
{
start_data[i] = ToStartOfTransform<unit>::execute(time_data[i], num_units, time_zone);
end_data[i] = AddTime<unit>::execute(start_data[i], num_units, time_zone);
@ -283,7 +282,7 @@ struct TimeWindowImpl<TUMBLE_START>
}
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -296,7 +295,7 @@ struct TimeWindowImpl<TUMBLE_START>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 0, function_name);
}
};
@ -311,7 +310,7 @@ struct TimeWindowImpl<TUMBLE_END>
return TimeWindowImpl<TUMBLE_START>::getReturnType(arguments, function_name);
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name)
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -324,7 +323,7 @@ struct TimeWindowImpl<TUMBLE_END>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 1, function_name);
}
};
@ -372,7 +371,7 @@ struct TimeWindowImpl<HOP>
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
@ -396,28 +395,28 @@ struct TimeWindowImpl<HOP>
/// TODO: add proper support for fractional seconds
case IntervalKind::Kind::Second:
return executeHop<UInt32, IntervalKind::Kind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Minute:
return executeHop<UInt32, IntervalKind::Kind::Minute>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Hour:
return executeHop<UInt32, IntervalKind::Kind::Hour>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Day:
return executeHop<UInt32, IntervalKind::Kind::Day>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Week:
return executeHop<UInt16, IntervalKind::Kind::Week>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Month:
return executeHop<UInt16, IntervalKind::Kind::Month>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Quarter:
return executeHop<UInt16, IntervalKind::Kind::Quarter>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Year:
return executeHop<UInt16, IntervalKind::Kind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
default:
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
}
@ -425,18 +424,17 @@ struct TimeWindowImpl<HOP>
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
start_data.resize(input_rows_count);
end_data.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
@ -509,7 +507,7 @@ struct TimeWindowImpl<WINDOW_ID>
return std::make_shared<DataTypeUInt32>();
}
static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
@ -533,28 +531,28 @@ struct TimeWindowImpl<WINDOW_ID>
/// TODO: add proper support for fractional seconds
case IntervalKind::Kind::Second:
return executeHopSlice<UInt32, IntervalKind::Kind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Minute:
return executeHopSlice<UInt32, IntervalKind::Kind::Minute>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Hour:
return executeHopSlice<UInt32, IntervalKind::Kind::Hour>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Day:
return executeHopSlice<UInt32, IntervalKind::Kind::Day>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Week:
return executeHopSlice<UInt16, IntervalKind::Kind::Week>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Month:
return executeHopSlice<UInt16, IntervalKind::Kind::Month>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Quarter:
return executeHopSlice<UInt16, IntervalKind::Kind::Quarter>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Year:
return executeHopSlice<UInt16, IntervalKind::Kind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
default:
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
}
@ -563,17 +561,16 @@ struct TimeWindowImpl<WINDOW_ID>
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count)
{
Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units);
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto end = ColumnVector<ToType>::create();
auto & end_data = end->getData();
end_data.resize(size);
for (size_t i = 0; i < size; ++i)
end_data.resize(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
@ -593,23 +590,23 @@ struct TimeWindowImpl<WINDOW_ID>
return end;
}
static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
ColumnPtr column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
ColumnPtr column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(column, 1, function_name);
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
if (arguments.size() == 2)
return dispatchForTumbleColumns(arguments, function_name);
return dispatchForTumbleColumns(arguments, function_name, input_rows_count);
else
{
const auto & third_column = arguments[2];
if (arguments.size() == 3 && WhichDataType(third_column.type).isString())
return dispatchForTumbleColumns(arguments, function_name);
return dispatchForTumbleColumns(arguments, function_name, input_rows_count);
else
return dispatchForHopColumns(arguments, function_name);
return dispatchForHopColumns(arguments, function_name, input_rows_count);
}
}
};
@ -639,7 +636,7 @@ struct TimeWindowImpl<HOP_START>
}
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -652,7 +649,7 @@ struct TimeWindowImpl<HOP_START>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 0, function_name);
}
};
@ -667,7 +664,7 @@ struct TimeWindowImpl<HOP_END>
return TimeWindowImpl<HOP_START>::getReturnType(arguments, function_name);
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -680,7 +677,7 @@ struct TimeWindowImpl<HOP_END>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 1, function_name);
}
@ -693,9 +690,9 @@ DataTypePtr FunctionTimeWindow<type>::getReturnTypeImpl(const ColumnsWithTypeAnd
}
template <TimeWindowFunctionName type>
ColumnPtr FunctionTimeWindow<type>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const
ColumnPtr FunctionTimeWindow<type>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const
{
return TimeWindowImpl<type>::dispatchForColumns(arguments, name);
return TimeWindowImpl<type>::dispatchForColumns(arguments, name, input_rows_count);
}
}

View File

@ -18,7 +18,7 @@ namespace DB
*/
struct FunctionDetectTonalityImpl
{
static ALWAYS_INLINE inline Float32 detectTonality(
static Float32 detectTonality(
const UInt8 * str,
const size_t str_len,
const FrequencyHolder::Map & emotional_dict)
@ -63,13 +63,13 @@ struct FunctionDetectTonalityImpl
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
const auto & emotional_dict = FrequencyHolder::getInstance().getEmotionalDict();
size_t size = offsets.size();
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = detectTonality(data.data() + prev_offset, offsets[i] - 1 - prev_offset, emotional_dict);
prev_offset = offsets[i];

View File

@ -73,13 +73,11 @@ public:
size_t array_count = arguments.size();
const auto & last_arg = arguments[array_count - 1];
size_t input_rows_count_local = input_rows_count;
bool null_last = true;
if (!isArray(last_arg.type))
{
--array_count;
null_last = check_condition(last_arg, context, input_rows_count_local);
null_last = check_condition(last_arg, context, input_rows_count);
}
ColumnsWithTypeAndName new_args;
@ -119,11 +117,11 @@ public:
}
auto zipped
= FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local);
= FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count);
ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared<DataTypeArray>(result_type), "zipped"}});
auto sorted_tuple
= FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local);
= FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count);
auto null_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
@ -139,10 +137,10 @@ public:
= std::make_shared<DataTypeArray>(makeNullable(nested_types[i]));
ColumnsWithTypeAndName null_array_arg({
{null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"},
{null_type->createColumnConstWithDefaultValue(input_rows_count), null_type, "NULL"},
});
tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local);
tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count);
tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst();
}
else
@ -153,7 +151,7 @@ public:
auto tuple_coulmn = FunctionFactory::instance()
.get("tupleElement", context)
->build(untuple_args)
->execute(untuple_args, result_type, input_rows_count_local);
->execute(untuple_args, result_type, input_rows_count);
auto out_tmp = ColumnArray::create(nested_types[i]->createColumn());
@ -183,7 +181,7 @@ public:
auto inside_null_type = nested_types[0];
ColumnsWithTypeAndName indexof_args({
arg_of_index,
{inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"},
{inside_null_type->createColumnConstWithDefaultValue(input_rows_count), inside_null_type, "NULL"},
});
auto null_index_datetype = std::make_shared<DataTypeUInt64>();
@ -192,7 +190,7 @@ public:
slice_index.column = FunctionFactory::instance()
.get("indexOf", context)
->build(indexof_args)
->execute(indexof_args, result_type, input_rows_count_local);
->execute(indexof_args, result_type, input_rows_count);
auto null_index_in_array = slice_index.column->get64(0);
if (null_index_in_array > 0)
@ -220,15 +218,15 @@ public:
ColumnsWithTypeAndName slice_args_right(
{{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index});
ColumnWithTypeAndName arr_left{
fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""};
fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count), arg_type, ""};
ColumnWithTypeAndName arr_right{
fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""};
fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count), arg_type, ""};
ColumnsWithTypeAndName arr_cancat({arr_right, arr_left});
auto out_tmp = FunctionFactory::instance()
.get("arrayConcat", context)
->build(arr_cancat)
->execute(arr_cancat, arg_type, input_rows_count_local);
->execute(arr_cancat, arg_type, input_rows_count);
adjusted_columns[i] = std::move(out_tmp);
}
}

View File

@ -124,7 +124,7 @@ public:
bool hasEmptyBound() const { return has_empty_bound; }
inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const
inline bool contains(CoordinateType x, CoordinateType y) const
{
Point point(x, y);
@ -167,7 +167,7 @@ public:
UInt64 getAllocatedBytes() const;
inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const;
bool contains(CoordinateType x, CoordinateType y) const;
private:
enum class CellType : uint8_t
@ -199,7 +199,7 @@ private:
}
/// Inner part of the HalfPlane is the left side of initialized vector.
bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; }
bool contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; }
};
struct Cell
@ -233,7 +233,7 @@ private:
void calcGridAttributes(Box & box);
template <typename T>
T ALWAYS_INLINE getCellIndex(T row, T col) const { return row * grid_size + col; }
T getCellIndex(T row, T col) const { return row * grid_size + col; }
/// Complex case. Will check intersection directly.
inline void addComplexPolygonCell(size_t index, const Box & box);

View File

@ -201,15 +201,15 @@ struct ReplaceRegexpImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
@ -232,13 +232,13 @@ struct ReplaceRegexpImpl
case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All;
}
};
ReplaceStringImpl<Name, convertTrait(replace)>::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets);
ReplaceStringImpl<Name, convertTrait(replace)>::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets, input_rows_count);
return;
}
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -257,19 +257,19 @@ struct ReplaceRegexpImpl
const ColumnString::Offsets & needle_offsets,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
assert(haystack_offsets.size() == needle_offsets.size());
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
@ -302,7 +302,8 @@ struct ReplaceRegexpImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
assert(haystack_offsets.size() == replacement_offsets.size());
@ -311,8 +312,7 @@ struct ReplaceRegexpImpl
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
@ -323,7 +323,7 @@ struct ReplaceRegexpImpl
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
@ -349,20 +349,20 @@ struct ReplaceRegexpImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
assert(haystack_offsets.size() == needle_offsets.size());
assert(needle_offsets.size() == replacement_offsets.size());
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
@ -399,15 +399,15 @@ struct ReplaceRegexpImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
ColumnString::Offset res_offset = 0;
size_t haystack_size = haystack_data.size() / n;
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
@ -419,7 +419,7 @@ struct ReplaceRegexpImpl
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t from = i * n;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);

View File

@ -35,7 +35,8 @@ struct ReplaceStringImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
@ -46,8 +47,7 @@ struct ReplaceStringImpl
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
const size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
/// The current index in the array of strings.
size_t i = 0;
@ -124,21 +124,20 @@ struct ReplaceStringImpl
const ColumnString::Offsets & needle_offsets,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
chassert(haystack_offsets.size() == needle_offsets.size());
const size_t haystack_size = haystack_offsets.size();
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
ColumnString::Offset res_offset = 0;
size_t prev_haystack_offset = 0;
size_t prev_needle_offset = 0;
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
@ -195,24 +194,23 @@ struct ReplaceStringImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
chassert(haystack_offsets.size() == replacement_offsets.size());
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
const size_t haystack_size = haystack_offsets.size();
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
ColumnString::Offset res_offset = 0;
size_t prev_haystack_offset = 0;
size_t prev_replacement_offset = 0;
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
@ -267,15 +265,14 @@ struct ReplaceStringImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
chassert(haystack_offsets.size() == needle_offsets.size());
chassert(needle_offsets.size() == replacement_offsets.size());
const size_t haystack_size = haystack_offsets.size();
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
ColumnString::Offset res_offset = 0;
@ -283,7 +280,7 @@ struct ReplaceStringImpl
size_t prev_needle_offset = 0;
size_t prev_replacement_offset = 0;
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
@ -345,7 +342,8 @@ struct ReplaceStringImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
@ -355,9 +353,8 @@ struct ReplaceStringImpl
const UInt8 * pos = begin;
ColumnString::Offset res_offset = 0;
size_t haystack_size = haystack_data.size() / n;
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
/// The current index in the string array.
size_t i = 0;
@ -384,13 +381,13 @@ struct ReplaceStringImpl
/// Copy skipped strings without any changes but
/// add zero byte to the end of each string.
while (i < haystack_size && begin + n * (i + 1) <= match)
while (i < input_rows_count && begin + n * (i + 1) <= match)
{
COPY_REST_OF_CURRENT_STRING();
}
/// If you have reached the end, it's time to stop
if (i == haystack_size)
if (i == input_rows_count)
break;
/// Copy unchanged part of current string.

View File

@ -64,7 +64,7 @@ public:
return arguments[0].type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
@ -72,7 +72,7 @@ public:
if (const ColumnString * col = checkAndGetColumn<ColumnString>(&*arguments[0].column))
{
auto col_res = ColumnString::create();
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
else
@ -82,11 +82,11 @@ public:
static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup,
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
size_t size = offsets.size();
res_offsets.resize(size);
res_data.reserve(size * Extractor::getReserveLengthForElement());
res_offsets.resize(input_rows_count);
res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement());
size_t prev_offset = 0;
size_t res_offset = 0;
@ -95,7 +95,7 @@ public:
Pos start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Extractor::execute(tld_lookup, reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);

View File

@ -44,7 +44,7 @@ public:
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
const ColumnPtr column_needle = arguments[1].column;
@ -71,7 +71,7 @@ public:
ColumnString::Chars & vec_res = col_res->getChars();
ColumnString::Offsets & offsets_res = col_res->getOffsets();
vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res);
vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res, input_rows_count);
return col_res;
}
else
@ -130,7 +130,8 @@ public:
const ColumnString::Offsets & offsets,
const ColumnConst * col_needle,
const ColumnArray * col_needle_const_array,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
res_data.reserve(data.size());
res_offsets.resize(offsets.size());
@ -141,7 +142,7 @@ public:
size_t res_offset = 0;
size_t cur_res_offset;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
cur_offset = offsets[i];
cur_len = cur_offset - prev_offset;

View File

@ -46,7 +46,7 @@ struct FunctionPortImpl : public IFunction
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
UInt16 default_port = 0;
if (arguments.size() == 2)
@ -64,7 +64,7 @@ struct FunctionPortImpl : public IFunction
typename ColumnVector<UInt16>::Container & vec_res = col_res->getData();
vec_res.resize(url_column->size());
vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res);
vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res, input_rows_count);
return col_res;
}
else
@ -73,12 +73,10 @@ struct FunctionPortImpl : public IFunction
}
private:
static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt16> & res)
static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt16> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = extractPort(default_port, data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];

View File

@ -67,7 +67,7 @@ namespace
return date_time_type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (arguments.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name);
@ -81,11 +81,10 @@ namespace
if (WhichDataType(arg1.type).isDateTime())
{
const auto & date_time_col = checkAndGetColumn<ColumnDateTime>(*arg1.column);
size_t col_size = date_time_col.size();
using ColVecTo = DataTypeDateTime::ColumnType;
typename ColVecTo::MutablePtr result_column = ColVecTo::create(col_size);
typename ColVecTo::MutablePtr result_column = ColVecTo::create(input_rows_count);
typename ColVecTo::Container & result_data = result_column->getData();
for (size_t i = 0; i < col_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
UInt32 date_time_val = date_time_col.getElement(i);
LocalDateTime date_time(date_time_val, Name::to ? utc_time_zone : DateLUT::instance(time_zone_val));
@ -97,14 +96,13 @@ namespace
else if (WhichDataType(arg1.type).isDateTime64())
{
const auto & date_time_col = checkAndGetColumn<ColumnDateTime64>(*arg1.column);
size_t col_size = date_time_col.size();
const DataTypeDateTime64 * date_time_type = static_cast<const DataTypeDateTime64 *>(arg1.type.get());
UInt32 col_scale = date_time_type->getScale();
Int64 scale_multiplier = DecimalUtils::scaleMultiplier<Int64>(col_scale);
using ColDecimalTo = DataTypeDateTime64::ColumnType;
typename ColDecimalTo::MutablePtr result_column = ColDecimalTo::create(col_size, col_scale);
typename ColDecimalTo::MutablePtr result_column = ColDecimalTo::create(input_rows_count, col_scale);
typename ColDecimalTo::Container & result_data = result_column->getData();
for (size_t i = 0; i < col_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
DateTime64 date_time_val = date_time_col.getElement(i);
Int64 seconds = date_time_val.value / scale_multiplier;

View File

@ -143,13 +143,13 @@ private:
const IColumn & scores,
const IColumn & labels,
const ColumnArray::Offsets & offsets,
PaddedPODArray<Float64> & result)
PaddedPODArray<Float64> & result,
size_t input_rows_count)
{
size_t size = offsets.size();
result.resize(size);
result.resize(input_rows_count);
ColumnArray::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
auto next_offset = offsets[i];
result[i] = apply(scores, labels, current_offset, next_offset);
@ -179,7 +179,7 @@ public:
return std::make_shared<DataTypeFloat64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst();
ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst();
@ -203,7 +203,8 @@ public:
col_array1->getData(),
col_array2->getData(),
col_array1->getOffsets(),
col_res->getData());
col_res->getData(),
input_rows_count);
return col_res;
}

View File

@ -40,7 +40,6 @@ ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & argume
if (result_type->onlyNull())
return result_type->createColumnConstWithDefaultValue(input_rows_count);
size_t rows = input_rows_count;
size_t num_args = arguments.size();
Columns preprocessed_columns(num_args);
@ -69,7 +68,7 @@ ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & argume
}
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows));
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count));
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName());
}

View File

@ -132,7 +132,7 @@ private:
/// Hash a set of keys into a UInt128 value.
static inline UInt128 ALWAYS_INLINE hash128depths(const std::vector<size_t> & indices, const ColumnRawPtrs & key_columns)
static UInt128 hash128depths(const std::vector<size_t> & indices, const ColumnRawPtrs & key_columns)
{
SipHash hash;
for (size_t j = 0, keys_size = key_columns.size(); j < keys_size; ++j)

View File

@ -16,40 +16,38 @@ struct LengthImpl
{
static constexpr auto is_fixed_to_constant = true;
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res)
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = offsets[i] - 1 - offsets[i - 1];
}
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res)
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res, size_t)
{
res = n;
}
static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray<UInt64> & /*res*/)
static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray<UInt64> & /*res*/, size_t)
{
}
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res)
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = offsets[i] - offsets[i - 1];
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &)
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument");
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &)
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument");
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &)
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument");
}

View File

@ -23,47 +23,43 @@ struct AsciiImpl
using ReturnType = Int32;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res)
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = doAscii(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
[[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/)
[[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars &, size_t, Int32 &, size_t)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", AsciiName::name);
}
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res)
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res, size_t input_rows_count)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = doAscii(data, i * n, n);
}
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", AsciiName::name);
}
[[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", AsciiName::name);
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", AsciiName::name);
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", AsciiName::name);
}

View File

@ -219,42 +219,42 @@ SOFTWARE.
static constexpr bool is_fixed_to_constant = false;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = offsets.size();
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = isValidUTF8(data.data() + prev_offset, offsets[i] - 1 - prev_offset);
prev_offset = offsets[i];
}
}
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {}
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
}
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = isValidUTF8(data.data() + i * n, n);
}
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &)
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to Array argument");
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt8> &)
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt8> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to UUID argument");
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt8> &)
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt8> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv6 argument");
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt8> &)
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt8> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv4 argument");
}

View File

@ -54,7 +54,7 @@ class ExtractKeyValuePairs : public IFunction
return builder.build();
}
ColumnPtr extract(ColumnPtr data_column, std::shared_ptr<KeyValuePairExtractor> extractor) const
ColumnPtr extract(ColumnPtr data_column, std::shared_ptr<KeyValuePairExtractor> extractor, size_t input_rows_count) const
{
auto offsets = ColumnUInt64::create();
@ -63,7 +63,7 @@ class ExtractKeyValuePairs : public IFunction
uint64_t offset = 0u;
for (auto i = 0u; i < data_column->size(); i++)
for (auto i = 0u; i < input_rows_count; i++)
{
auto row = data_column->getDataAt(i).toView();
@ -97,13 +97,13 @@ public:
return std::make_shared<ExtractKeyValuePairs>(context);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto parsed_arguments = ArgumentExtractor::extract(arguments);
auto extractor = getExtractor(parsed_arguments);
return extract(parsed_arguments.data_column, extractor);
return extract(parsed_arguments.data_column, extractor, input_rows_count);
}
DataTypePtr getReturnTypeImpl(const DataTypes &) const override

View File

@ -23,48 +23,42 @@ struct LengthUTF8Impl
{
static constexpr auto is_fixed_to_constant = false;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res)
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = UTF8::countCodePoints(&data[prev_offset], offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/)
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/, size_t)
{
}
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt64> & res)
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = UTF8::countCodePoints(&data[i * n], n);
}
}
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt64> &)
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to Array argument");
}
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &)
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to UUID argument");
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &)
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv6 argument");
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &)
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray<UInt64> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv4 argument");
}

View File

@ -114,7 +114,7 @@ public:
return to_type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
auto from_type = arguments[0].type;
@ -136,9 +136,9 @@ public:
ColumnFixedString * dst_concrete = assert_cast<ColumnFixedString *>(dst.get());
if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN())
executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN());
executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN(), input_rows_count);
else
executeToFixedString(src, *dst_concrete, dst_concrete->getN());
executeToFixedString(src, *dst_concrete, dst_concrete->getN(), input_rows_count);
result = std::move(dst);
@ -156,7 +156,7 @@ public:
MutableColumnPtr dst = result_type->createColumn();
ColumnString * dst_concrete = assert_cast<ColumnString *>(dst.get());
executeToString(src, *dst_concrete);
executeToString(src, *dst_concrete, input_rows_count);
result = std::move(dst);
@ -174,12 +174,11 @@ public:
const auto & data_from = col_from->getChars();
const auto & offsets_from = col_from->getOffsets();
size_t size = offsets_from.size();
auto & vec_res = col_res->getData();
vec_res.resize_fill(size);
vec_res.resize_fill(input_rows_count);
size_t offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t copy_size = std::min(static_cast<UInt64>(sizeof(ToFieldType)), offsets_from[i] - offset - 1);
if constexpr (std::endian::native == std::endian::little)
@ -209,7 +208,6 @@ public:
const auto& data_from = col_from_fixed->getChars();
size_t step = col_from_fixed->getN();
size_t size = data_from.size() / step;
auto & vec_res = col_res->getData();
size_t offset = 0;
@ -217,11 +215,11 @@ public:
size_t index = data_from.size() - copy_size;
if (sizeof(ToFieldType) <= step)
vec_res.resize(size);
vec_res.resize(input_rows_count);
else
vec_res.resize_fill(size);
vec_res.resize_fill(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (std::endian::native == std::endian::little)
memcpy(&vec_res[i], &data_from[offset], copy_size);
@ -251,12 +249,11 @@ public:
auto & from = column_from->getData();
auto & to = column_to->getData();
size_t size = from.size();
to.resize_fill(size);
to.resize_fill(input_rows_count);
static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To));
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (std::endian::native == std::endian::little)
memcpy(static_cast<void*>(&to[i]), static_cast<const void*>(&from[i]), copy_size);
@ -307,14 +304,13 @@ private:
type.isDecimal();
}
static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n, size_t input_rows_count)
{
size_t rows = src.size();
ColumnFixedString::Chars & data_to = dst.getChars();
data_to.resize_fill(n * rows);
data_to.resize_fill(n * input_rows_count);
ColumnFixedString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
std::string_view data = src.getDataAt(i).toView();
@ -327,11 +323,10 @@ private:
}
}
static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n)
static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n, size_t input_rows_count)
{
size_t rows = src.size();
ColumnFixedString::Chars & data_to = dst.getChars();
data_to.resize(n * rows);
data_to.resize(n * input_rows_count);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
memcpy(data_to.data(), src.getRawData().data(), data_to.size());
@ -340,15 +335,14 @@ private:
#endif
}
static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst)
static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst, size_t input_rows_count)
{
size_t rows = src.size();
ColumnString::Chars & data_to = dst.getChars();
ColumnString::Offsets & offsets_to = dst.getOffsets();
offsets_to.resize(rows);
offsets_to.resize(input_rows_count);
ColumnString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
StringRef data = src.getDataAt(i);

View File

@ -66,9 +66,8 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & src = arguments[0];
size_t size = input_rows_count;
auto res_col = ColumnUInt64::create(size);
auto res_col = ColumnUInt64::create(input_rows_count);
auto & res_data = assert_cast<ColumnUInt64 &>(*res_col).getData();
/// For simplicity reasons, the function is implemented by serializing into temporary buffer.
@ -76,7 +75,7 @@ public:
String tmp;
FormatSettings format_settings;
auto serialization = src.type->getDefaultSerialization();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
{
WriteBufferFromString out(tmp);

View File

@ -166,12 +166,12 @@ class FunctionWidthBucket : public IFunction
result_column->reserve(1);
auto & result_data = result_column->getData();
for (const auto row_index : collections::range(0, input_rows_count))
for (size_t row = 0; row < input_rows_count; ++row)
{
const auto operand = getValue<Float64>(operands_col_const, operands_vec, row_index);
const auto low = getValue<Float64>(lows_col_const, lows_vec, row_index);
const auto high = getValue<Float64>(highs_col_const, highs_vec, row_index);
const auto count = getValue<TCountType>(counts_col_const, counts_vec, row_index);
const auto operand = getValue<Float64>(operands_col_const, operands_vec, row);
const auto low = getValue<Float64>(lows_col_const, lows_vec, row);
const auto high = getValue<Float64>(highs_col_const, highs_vec, row);
const auto count = getValue<TCountType>(counts_col_const, counts_vec, row);
result_data.push_back(calculate<ResultType>(operand, low, high, count));
}

View File

@ -2997,7 +2997,11 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari
std::unique_ptr<ThreadPool> thread_pool;
if (max_threads > 1 && total_input_rows > 100000) /// TODO Make a custom threshold.
thread_pool = std::make_unique<ThreadPool>(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, max_threads);
thread_pool = std::make_unique<ThreadPool>(
CurrentMetrics::AggregatorThreads,
CurrentMetrics::AggregatorThreadsActive,
CurrentMetrics::AggregatorThreadsScheduled,
max_threads);
for (const auto & bucket_blocks : bucket_to_blocks)
{
@ -3009,7 +3013,10 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari
result.aggregates_pools.push_back(std::make_shared<Arena>());
Arena * aggregates_pool = result.aggregates_pools.back().get();
auto task = [group = CurrentThread::getGroup(), bucket, &merge_bucket, aggregates_pool]{ merge_bucket(bucket, aggregates_pool, group); };
/// if we don't use thread pool we don't need to attach and definitely don't want to detach from the thread group
/// because this thread is already attached
auto task = [group = thread_pool != nullptr ? CurrentThread::getGroup() : nullptr, bucket, &merge_bucket, aggregates_pool]
{ merge_bucket(bucket, aggregates_pool, group); };
if (thread_pool)
thread_pool->scheduleOrThrowOnError(task);

View File

@ -1373,8 +1373,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (need_add_to_database)
database = DatabaseCatalog::instance().tryGetDatabase(database_name);
bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create;
if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
bool allow_heavy_populate = getContext()->getSettingsRef().database_replicated_allow_heavy_create && create.is_populate;
if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
{
bool is_storage_replicated = false;
@ -1392,10 +1392,18 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
if (!allow_create_select_for_replicated)
{
/// POPULATE can be enabled with setting, provide hint in error message
if (create.is_populate)
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. "
"Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution");
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. "
"Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution");
"CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries.");
}
}
if (database && database->shouldReplicateQuery(getContext(), query_ptr))

View File

@ -420,7 +420,7 @@ void ParquetBlockInputFormat::initializeIfNeeded()
int num_row_groups = metadata->num_row_groups();
row_group_batches.reserve(num_row_groups);
auto adative_chunk_size = [&](int row_group_idx) -> size_t
auto adaptive_chunk_size = [&](int row_group_idx) -> size_t
{
size_t total_size = 0;
auto row_group_meta = metadata->RowGroup(row_group_idx);
@ -457,7 +457,7 @@ void ParquetBlockInputFormat::initializeIfNeeded()
row_group_batches.back().row_groups_idxs.push_back(row_group);
row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows();
row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size();
auto rows = adative_chunk_size(row_group);
auto rows = adaptive_chunk_size(row_group);
row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size;
}
}

View File

@ -766,7 +766,9 @@ def _upload_build_artifacts(
int(job_report.duration),
GITHUB_JOB_API_URL(),
head_ref=pr_info.head_ref,
pr_number=pr_info.number,
# PRInfo fetches pr number for release branches as well - set pr_number to 0 for release
# so that build results are not mistakenly treated as feature branch builds
pr_number=pr_info.number if pr_info.is_pr else 0,
)
report_url = ci_cache.upload_build_report(build_result)
print(f"Report file has been uploaded to [{report_url}]")

View File

@ -519,10 +519,10 @@ class CI:
runner_type=Runners.STYLE_CHECKER,
),
JobNames.DOCKER_SERVER: CommonJobConfigs.DOCKER_SERVER.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE]
required_builds=[BuildNames.PACKAGE_RELEASE, BuildNames.PACKAGE_AARCH64]
),
JobNames.DOCKER_KEEPER: CommonJobConfigs.DOCKER_SERVER.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE]
required_builds=[BuildNames.PACKAGE_RELEASE, BuildNames.PACKAGE_AARCH64]
),
JobNames.DOCS_CHECK: JobConfig(
digest=DigestConfig(

View File

@ -128,17 +128,9 @@ def parse_args() -> argparse.Namespace:
def retry_popen(cmd: str, log_file: Path) -> int:
max_retries = 2
for retry in range(max_retries):
# From time to time docker build may failed. Curl issues, or even push
# It will sleep progressively 5, 15, 30 and 50 seconds between retries
progressive_sleep = 5 * sum(i + 1 for i in range(retry))
if progressive_sleep:
logging.warning(
"The following command failed, sleep %s before retry: %s",
progressive_sleep,
cmd,
)
time.sleep(progressive_sleep)
sleep_seconds = 10
retcode = -1
for _retry in range(max_retries):
with TeePopen(
cmd,
log_file=log_file,
@ -146,7 +138,14 @@ def retry_popen(cmd: str, log_file: Path) -> int:
retcode = process.wait()
if retcode == 0:
return 0
else:
# From time to time docker build may failed. Curl issues, or even push
logging.error(
"The following command failed, sleep %s before retry: %s",
sleep_seconds,
cmd,
)
time.sleep(sleep_seconds)
return retcode
@ -377,21 +376,6 @@ def main():
direct_urls: Dict[str, List[str]] = {}
for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")):
if args.bucket_prefix:
assert not args.allow_build_reuse
repo_urls[arch] = f"{args.bucket_prefix}/{build_name}"
elif args.sha:
# CreateRelease workflow only. TODO
version = args.version
repo_urls[arch] = (
f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/"
f"{version.major}.{version.minor}/{args.sha}/{build_name}"
)
else:
# In all other cases urls must be fetched from build reports. TODO: script needs refactoring
repo_urls[arch] = ""
assert args.allow_build_reuse
if args.allow_build_reuse:
# read s3 urls from pre-downloaded build reports
if "clickhouse-server" in image_repo:
@ -413,6 +397,21 @@ def main():
for url in urls
if any(package in url for package in PACKAGES) and "-dbg" not in url
]
elif args.bucket_prefix:
assert not args.allow_build_reuse
repo_urls[arch] = f"{args.bucket_prefix}/{build_name}"
print(f"Bucket prefix is set: Fetching packages from [{repo_urls}]")
elif args.sha:
version = args.version
repo_urls[arch] = (
f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/"
f"{version.major}.{version.minor}/{args.sha}/{build_name}"
)
print(f"Fetching packages from [{repo_urls}]")
else:
assert (
False
), "--sha, --bucket_prefix or --allow-build-reuse (to fetch packages from build report) must be provided"
if push:
docker_login()

View File

@ -776,7 +776,6 @@ def test_system_users():
def test_system_functions():
node1.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;")
node1.query("CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');")
backup_name = new_backup_name()
@ -817,6 +816,9 @@ def test_system_functions():
[[0, "even"], [1, "odd"], [2, "even"]]
)
node1.query("DROP FUNCTION linear_equation")
node1.query("DROP FUNCTION parity_str")
def test_projection():
node1.query(

View File

@ -9,13 +9,13 @@ import re
cluster = ClickHouseCluster(__file__)
num_nodes = 10
num_nodes = 2
def generate_cluster_def():
path = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"./_gen/cluster_for_concurrency_test.xml",
"./_gen/cluster_for_test_disallow_concurrency.xml",
)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w") as f:
@ -111,22 +111,56 @@ def create_and_fill_table():
nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)")
def wait_for_fail_backup(node, backup_id, backup_name):
def get_status_and_error(node, backup_or_restore_id):
return (
node.query(
f"SELECT status, error FROM system.backups WHERE id == '{backup_or_restore_id}'"
)
.rstrip("\n")
.split("\t")
)
def wait_for_backup(node, backup_id):
assert_eq_with_retry(
node,
f"SELECT status FROM system.backups WHERE id = '{backup_id}'",
"BACKUP_CREATED",
sleep_time=2,
retry_count=50,
)
def wait_for_restore(node, restore_id):
assert_eq_with_retry(
node,
f"SELECT status FROM system.backups WHERE id == '{restore_id}'",
"RESTORED",
sleep_time=2,
retry_count=50,
)
def check_backup_error(error):
expected_errors = [
"Concurrent backups not supported",
f"Backup {backup_name} already exists",
"BACKUP_ALREADY_EXISTS",
]
status = node.query(
f"SELECT status FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
assert any([expected_error in error for expected_error in expected_errors])
def check_restore_error(error):
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
assert any([expected_error in error for expected_error in expected_errors])
def wait_for_backup_failure(node, backup_id):
status, error = get_status_and_error(node, backup_id)
# It is possible that the second backup was picked up first, and then the async backup
if status == "BACKUP_FAILED":
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
assert any([expected_error in error for expected_error in expected_errors])
return
elif status == "CREATING_BACKUP":
if status == "CREATING_BACKUP":
assert_eq_with_retry(
node,
f"SELECT status FROM system.backups WHERE id = '{backup_id}'",
@ -134,31 +168,17 @@ def wait_for_fail_backup(node, backup_id, backup_name):
sleep_time=2,
retry_count=50,
)
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
assert re.search(f"Backup {backup_name} already exists", error)
return
status, error = get_status_and_error(node, backup_id)
if status == "BACKUP_FAILED":
check_backup_error(error)
else:
assert False, "Concurrent backups both passed, when one is expected to fail"
def wait_for_fail_restore(node, restore_id):
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
status = node.query(
f"SELECT status FROM system.backups WHERE id == '{restore_id}'"
).rstrip("\n")
def wait_for_restore_failure(node, restore_id):
status, error = get_status_and_error(node, restore_id)
# It is possible that the second backup was picked up first, and then the async backup
if status == "RESTORE_FAILED":
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{restore_id}'"
).rstrip("\n")
assert any([expected_error in error for expected_error in expected_errors])
return
elif status == "RESTORING":
if status == "RESTORING":
assert_eq_with_retry(
node,
f"SELECT status FROM system.backups WHERE id = '{restore_id}'",
@ -166,14 +186,9 @@ def wait_for_fail_restore(node, restore_id):
sleep_time=2,
retry_count=50,
)
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{restore_id}'"
).rstrip("\n")
assert re.search(
"Cannot restore the table default.tbl because it already contains some data",
error,
)
return
status, error = get_status_and_error(node, restore_id)
if status == "RESTORE_FAILED":
check_restore_error(error)
else:
assert False, "Concurrent restores both passed, when one is expected to fail"
@ -188,39 +203,28 @@ def test_concurrent_backups_on_same_node():
backup_name = new_backup_name()
id = (
# Backup #1.
id, status = (
nodes[0]
.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC")
.split("\t")[0]
.rstrip("\n")
.split("\t")
)
status = (
nodes[0]
.query(f"SELECT status FROM system.backups WHERE id == '{id}'")
.rstrip("\n")
)
assert status in ["CREATING_BACKUP", "BACKUP_CREATED"]
result, error = nodes[0].query_and_get_answer_with_error(
# Backup #2.
_, error = nodes[0].query_and_get_answer_with_error(
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
)
expected_errors = [
"Concurrent backups not supported",
f"Backup {backup_name} already exists",
]
if not error:
wait_for_fail_backup(nodes[0], id, backup_name)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE id = '{id}'",
"BACKUP_CREATED",
sleep_time=2,
retry_count=50,
)
if error:
# Backup #2 failed, backup #1 should be successful.
check_backup_error(error)
wait_for_backup(nodes[0], id)
else:
# Backup #2 was successful, backup #1 should fail.
wait_for_backup_failure(nodes[0], id)
# This restore part is added to confirm creating an internal backup & restore work
# even when a concurrent backup is stopped
@ -238,40 +242,38 @@ def test_concurrent_backups_on_different_nodes():
backup_name = new_backup_name()
id = (
nodes[1]
# Backup #1.
id, status = (
nodes[0]
.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC")
.split("\t")[0]
.rstrip("\n")
.split("\t")
)
status = (
nodes[1]
.query(f"SELECT status FROM system.backups WHERE id == '{id}'")
.rstrip("\n")
)
assert status in ["CREATING_BACKUP", "BACKUP_CREATED"]
result, error = nodes[0].query_and_get_answer_with_error(
# Backup #2.
_, error = nodes[1].query_and_get_answer_with_error(
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
)
expected_errors = [
"Concurrent backups not supported",
f"Backup {backup_name} already exists",
]
if error:
# Backup #2 failed, backup #1 should be successful.
check_backup_error(error)
wait_for_backup(nodes[0], id)
else:
# Backup #2 was successful, backup #1 should fail.
wait_for_backup_failure(nodes[0], id)
if not error:
wait_for_fail_backup(nodes[1], id, backup_name)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
nodes[1],
f"SELECT status FROM system.backups WHERE id = '{id}'",
"BACKUP_CREATED",
sleep_time=2,
retry_count=50,
# This restore part is added to confirm creating an internal backup & restore work
# even when a concurrent backup is stopped
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC",
settings={
"distributed_ddl_task_timeout": 360,
},
)
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
def test_concurrent_restores_on_same_node():
@ -288,40 +290,28 @@ def test_concurrent_restores_on_same_node():
},
)
restore_id = (
# Restore #1.
restore_id, status = (
nodes[0]
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
.split("\t")[0]
.rstrip("\n")
.split("\t")
)
status = (
nodes[0]
.query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'")
.rstrip("\n")
)
assert status in ["RESTORING", "RESTORED"]
result, error = nodes[0].query_and_get_answer_with_error(
# Restore #2.
_, error = nodes[0].query_and_get_answer_with_error(
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
)
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
if not error:
wait_for_fail_restore(nodes[0], restore_id)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE id == '{restore_id}'",
"RESTORED",
sleep_time=2,
retry_count=50,
)
if error:
# Restore #2 failed, restore #1 should be successful.
check_restore_error(error)
wait_for_restore(nodes[0], restore_id)
else:
# Restore #2 was successful, restore #1 should fail.
wait_for_restore_failure(nodes[0], restore_id)
def test_concurrent_restores_on_different_node():
@ -338,37 +328,25 @@ def test_concurrent_restores_on_different_node():
},
)
restore_id = (
# Restore #1.
restore_id, status = (
nodes[0]
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
.split("\t")[0]
.rstrip("\n")
.split("\t")
)
status = (
nodes[0]
.query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'")
.rstrip("\n")
)
assert status in ["RESTORING", "RESTORED"]
result, error = nodes[1].query_and_get_answer_with_error(
# Restore #2.
_, error = nodes[1].query_and_get_answer_with_error(
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
)
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
if not error:
wait_for_fail_restore(nodes[0], restore_id)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE id == '{restore_id}'",
"RESTORED",
sleep_time=2,
retry_count=50,
)
if error:
# Restore #2 failed, restore #1 should be successful.
check_restore_error(error)
wait_for_restore(nodes[0], restore_id)
else:
# Restore #2 was successful, restore #1 should fail.
wait_for_restore_failure(nodes[0], restore_id)

View File

@ -0,0 +1,5 @@
<clickhouse>
<access_control_improvements>
<table_engines_require_grant>false</table_engines_require_grant>
</access_control_improvements>
</clickhouse>

View File

@ -5,7 +5,7 @@ from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance(
"instance",
main_configs=["configs/config.xml"],
main_configs=["configs/config_with_table_engine_grant.xml"],
user_configs=["configs/users.d/users.xml"],
)

View File

@ -0,0 +1,82 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance(
"instance",
main_configs=["configs/config_without_table_engine_grant.xml"],
user_configs=["configs/users.d/users.xml"],
)
@pytest.fixture(scope="module", autouse=True)
def start_cluster():
try:
cluster.start()
instance.query("CREATE DATABASE test")
yield cluster
finally:
cluster.shutdown()
@pytest.fixture(autouse=True)
def cleanup_after_test():
try:
yield
finally:
instance.query("DROP USER IF EXISTS A")
instance.query("DROP TABLE IF EXISTS test.table1")
def test_table_engine_and_source_grant():
instance.query("DROP USER IF EXISTS A")
instance.query("CREATE USER A")
instance.query("GRANT CREATE TABLE ON test.table1 TO A")
instance.query("GRANT POSTGRES ON *.* TO A")
instance.query(
"""
CREATE TABLE test.table1(a Integer)
engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy');
""",
user="A",
)
instance.query("DROP TABLE test.table1")
instance.query("REVOKE POSTGRES ON *.* FROM A")
assert "Not enough privileges" in instance.query_and_get_error(
"""
CREATE TABLE test.table1(a Integer)
engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy');
""",
user="A",
)
# expecting grant POSTGRES instead of grant PostgreSQL due to discrepancy between source access type and table engine
# similarily, other sources should also use their own defined name instead of the name of table engine
assert "grant POSTGRES ON *.*" in instance.query_and_get_error(
"""
CREATE TABLE test.table1(a Integer)
engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy');
""",
user="A",
)
instance.query("GRANT SOURCES ON *.* TO A")
instance.query(
"""
CREATE TABLE test.table1(a Integer)
engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy');
""",
user="A",
)
instance.query("DROP TABLE test.table1")

View File

@ -139,6 +139,6 @@ SELECT {CLICKHOUSE_DATABASE:String} || '.dict3' as n, dictGet(n, 'some_column',
DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict;
SYSTEM RELOAD DICTIONARIES; -- {serverError UNKNOWN_TABLE}
SYSTEM RELOAD DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict3; -- {serverError UNKNOWN_TABLE}
SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12));

View File

@ -1,3 +1,6 @@
-- Tags: no-parallel
-- Does not allow if other tests do SYSTEM RELOAD DICTIONARIES at the same time.
CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory();
CREATE DICTIONARY dict
(

View File

@ -7,6 +7,16 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
function query()
{
local query_id
if [[ $1 == --query_id ]]; then
query_id="&query_id=$2"
shift 2
fi
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}$query_id" -d "$*"
}
function wait_until()
{
local expr=$1 && shift
@ -17,73 +27,68 @@ function wait_until()
function get_buffer_delay()
{
local buffer_insert_id=$1 && shift
$CLICKHOUSE_CLIENT -nm -q "
SYSTEM FLUSH LOGS;
WITH
(SELECT event_time_microseconds FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_,
(SELECT max(event_time) FROM data_01256) AS end_
SELECT dateDiff('seconds', begin_, end_)::UInt64;
query "SYSTEM FLUSH LOGS"
query "
WITH
(SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_,
(SELECT max(event_time) FROM data_01256) AS end_
SELECT dateDiff('seconds', begin_, end_)::UInt64
"
}
$CLICKHOUSE_CLIENT -nm -q "
drop table if exists data_01256;
drop table if exists buffer_01256;
create table data_01256 (key UInt64, event_time DateTime(6) MATERIALIZED now64(6)) Engine=Memory();
"
query "drop table if exists data_01256"
query "drop table if exists buffer_01256"
query "create table data_01256 (key UInt64, event_time DateTime(6) MATERIALIZED now64(6)) Engine=Memory()"
echo "min"
$CLICKHOUSE_CLIENT -q "
create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1,
query "
create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1,
2, 100, /* time */
4, 100, /* rows */
1, 1e6 /* bytes */
)
"
min_query_id=$(random_str 10)
$CLICKHOUSE_CLIENT --query_id="$min_query_id" -q "insert into buffer_01256 select * from system.numbers limit 5"
$CLICKHOUSE_CLIENT -q "select count() from data_01256"
wait_until '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]'
query --query_id "$min_query_id" "insert into buffer_01256 select * from system.numbers limit 5"
query "select count() from data_01256"
wait_until '[[ $(query "select count() from data_01256") -eq 5 ]]'
sec=$(get_buffer_delay "$min_query_id")
[[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec"
[[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec"
$CLICKHOUSE_CLIENT -q "select count() from data_01256"
$CLICKHOUSE_CLIENT -q "drop table buffer_01256"
query "select count() from data_01256"
query "drop table buffer_01256"
echo "max"
$CLICKHOUSE_CLIENT -q "
create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1,
query "
create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1,
100, 2, /* time */
0, 100, /* rows */
0, 1e6 /* bytes */
);
)
"
max_query_id=$(random_str 10)
$CLICKHOUSE_CLIENT --query_id="$max_query_id" -q "insert into buffer_01256 select * from system.numbers limit 5"
$CLICKHOUSE_CLIENT -q "select count() from data_01256"
wait_until '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]'
query --query_id "$max_query_id" "insert into buffer_01256 select * from system.numbers limit 5"
query "select count() from data_01256"
wait_until '[[ $(query "select count() from data_01256") -eq 10 ]]'
sec=$(get_buffer_delay "$max_query_id")
[[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec"
$CLICKHOUSE_CLIENT -q "select count() from data_01256"
$CLICKHOUSE_CLIENT -q "drop table buffer_01256"
query "select count() from data_01256"
query "drop table buffer_01256"
echo "direct"
$CLICKHOUSE_CLIENT -nm -q "
create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1,
query "
create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1,
100, 100, /* time */
0, 9, /* rows */
0, 1e6 /* bytes */
);
insert into buffer_01256 select * from system.numbers limit 10;
select count() from data_01256;
)
"
query "insert into buffer_01256 select * from system.numbers limit 10"
query "select count() from data_01256"
echo "drop"
$CLICKHOUSE_CLIENT -nm -q "
insert into buffer_01256 select * from system.numbers limit 10;
drop table if exists buffer_01256;
select count() from data_01256;
"
query "insert into buffer_01256 select * from system.numbers limit 10"
query "drop table if exists buffer_01256"
query "select count() from data_01256"
$CLICKHOUSE_CLIENT -q "drop table data_01256"
query "drop table data_01256"

File diff suppressed because one or more lines are too long

View File

@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "drop table if exists rmt sync;"
$CLICKHOUSE_CLIENT -q "create table rmt (n int) engine=ReplicatedMergeTree('/test/02444/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') order by n"
$CLICKHOUSE_CLIENT -q "create table rmt (n int) engine=ReplicatedMergeTree('/test/02444/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') order by n settings old_parts_lifetime=600"
$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt values (1);"
$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt values (2);"

View File

@ -1,20 +1,4 @@
-- { echoOn }
insert into buffer_02572 values (1);
-- ensure that the flush was not direct
select * from data_02572;
select * from copy_02572;
-- we cannot use OPTIMIZE, this will attach query context, so let's wait
SET function_sleep_max_microseconds_per_block = 6000000;
select sleepEachRow(1) from numbers(3*2) format Null;
select * from data_02572;
OK
1
select * from copy_02572;
1
system flush logs;
select count() > 0, lower(status::String), errorCodeToName(exception_code)
from system.query_views_log where
view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and
view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572')
group by 2, 3
;
1 queryfinish OK

View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
# INSERT buffer_02572 -> data_02572 -> copy_02572
# ^^
# push to system.query_views_log
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572;
drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;"
${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();"
${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();"
${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);"
${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;"
${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);"
# ensure that the flush was not direct
${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;"
# we cannot use OPTIMIZE, this will attach query context, so let's wait
for _ in {1..100}; do
$CLICKHOUSE_CLIENT -q "select * from data_02572;" | grep -q "1" && echo 'OK' && break
sleep 0.5
done
${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;"
${CLICKHOUSE_CLIENT} --query="system flush logs;"
${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code)
from system.query_views_log where
view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and
view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572')
group by 2, 3;"

View File

@ -1,36 +0,0 @@
-- INSERT buffer_02572 -> data_02572 -> copy_02572
-- ^^
-- push to system.query_views_log
drop table if exists buffer_02572;
drop table if exists data_02572;
drop table if exists copy_02572;
drop table if exists mv_02572;
create table copy_02572 (key Int) engine=Memory();
create table data_02572 (key Int) engine=Memory();
create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1,
/* never direct flush for flush from background thread */
/* min_time= */ 3, 3,
1, 1e9,
1, 1e9);
create materialized view mv_02572 to copy_02572 as select * from data_02572;
-- { echoOn }
insert into buffer_02572 values (1);
-- ensure that the flush was not direct
select * from data_02572;
select * from copy_02572;
-- we cannot use OPTIMIZE, this will attach query context, so let's wait
SET function_sleep_max_microseconds_per_block = 6000000;
select sleepEachRow(1) from numbers(3*2) format Null;
select * from data_02572;
select * from copy_02572;
system flush logs;
select count() > 0, lower(status::String), errorCodeToName(exception_code)
from system.query_views_log where
view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and
view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572')
group by 2, 3
;

View File

@ -18,8 +18,12 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIAL
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED"
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED"
# But it is allowed with the special setting
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1
# POPULATE is allowed with the special setting
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --compatibility='24.6'
# AS SELECT is forbidden even with the setting
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 |& grep -cm1 "SUPPORT_IS_DISABLED"
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --compatibility='24.6' |& grep -cm1 "SUPPORT_IS_DISABLED"
${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db"

View File

@ -1870,6 +1870,7 @@ joinGetOrNull
json
jsonMergePatch
jsonasstring
jsonasobject
jsoncolumns
jsoncolumnsmonoblock
jsoncompact