From e348186ba26072eb76e549cd2a0adcd801c92bc6 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 18 Jun 2024 21:30:37 +0000 Subject: [PATCH 01/34] clear hint in table engine and sources --- src/Access/Common/AccessRightsElement.cpp | 6 +- src/Access/Common/AccessRightsElement.h | 1 + src/Access/ContextAccess.cpp | 80 ++++++++++++++++++----- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index 24ff4e7631b..2ee13d6b94f 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -224,7 +224,11 @@ void AccessRightsElement::replaceEmptyDatabase(const String & current_database) String AccessRightsElement::toString() const { return toStringImpl(*this, true); } String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); } - +String AccessRightsElement::toStringWithoutONClause() const +{ + String result{access_flags.toKeywords().front()}; + return result + " ON {db.table}"; +} bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } diff --git a/src/Access/Common/AccessRightsElement.h b/src/Access/Common/AccessRightsElement.h index ba625fc43df..49764fc727f 100644 --- a/src/Access/Common/AccessRightsElement.h +++ b/src/Access/Common/AccessRightsElement.h @@ -89,6 +89,7 @@ struct AccessRightsElement /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; String toStringWithoutOptions() const; + String toStringWithoutONClause() const; }; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 2a658d7aaa2..3ce30a0b681 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -37,6 +37,24 @@ namespace ErrorCodes namespace { + static const std::vector> source_and_table_engines = { + {AccessType::FILE, "File"}, + {AccessType::URL, "URL"}, + {AccessType::REMOTE, "Distributed"}, + {AccessType::MONGO, "MongoDB"}, + {AccessType::REDIS, "Redis"}, + {AccessType::MYSQL, "MySQL"}, + {AccessType::POSTGRES, "PostgreSQL"}, + {AccessType::SQLITE, "SQLite"}, + {AccessType::ODBC, "ODBC"}, + {AccessType::JDBC, "JDBC"}, + {AccessType::HDFS, "HDFS"}, + {AccessType::S3, "S3"}, + {AccessType::HIVE, "Hive"}, + {AccessType::AZURE, "AzureBlobStorage"} + }; + + AccessRights mixAccessRightsFromUserAndRoles(const User & user, const EnabledRolesInfo & roles_info) { AccessRights res = user.access; @@ -205,22 +223,6 @@ namespace } /// There is overlap between AccessType sources and table engines, so the following code avoids user granting twice. - static const std::vector> source_and_table_engines = { - {AccessType::FILE, "File"}, - {AccessType::URL, "URL"}, - {AccessType::REMOTE, "Distributed"}, - {AccessType::MONGO, "MongoDB"}, - {AccessType::REDIS, "Redis"}, - {AccessType::MYSQL, "MySQL"}, - {AccessType::POSTGRES, "PostgreSQL"}, - {AccessType::SQLITE, "SQLite"}, - {AccessType::ODBC, "ODBC"}, - {AccessType::JDBC, "JDBC"}, - {AccessType::HDFS, "HDFS"}, - {AccessType::S3, "S3"}, - {AccessType::HIVE, "Hive"}, - {AccessType::AZURE, "AzureBlobStorage"} - }; /// Sync SOURCE and TABLE_ENGINE, so only need to check TABLE_ENGINE later. if (access_control.doesTableEnginesRequireGrant()) @@ -555,6 +557,18 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() return nothing_granted; } +/// Just Dummy to pass compile. +template +static std::string_view getTableEngineName(const Args &... args[[maybe_unused]]) +{ + return ""; +} + +template +static std::string_view getTableEngineName(std::string_view name, const Args &... args[[maybe_unused]]) +{ + return name; +} template bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const @@ -611,6 +625,40 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (!granted) { + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading since + /// SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. + if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) + { + AccessFlags newFlags; + + String table_engine_name{getTableEngineName(args...)}; + for (const auto & source_and_table_engine : source_and_table_engines) + { + const auto & table_engine = std::get<1>(source_and_table_engine); + if (table_engine != table_engine_name) continue; + const auto & source = std::get<0>(source_and_table_engine); + /// Set the flags from Table Engine to SOURCES so that prompts can be meaningful. + newFlags = source; + break; + } + + if (newFlags == AccessType::NONE) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); + + if (grant_option && acs->isGranted(flags, args...)) + { + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", + AccessRightsElement{newFlags}.toStringWithoutONClause()); + } + + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", + AccessRightsElement{newFlags}.toStringWithoutONClause() + (grant_option ? " WITH GRANT OPTION" : "")); + } + if (grant_option && acs->isGranted(flags, args...)) { return access_denied(ErrorCodes::ACCESS_DENIED, From f7e81e1ae2752020c076990395349ccd2d69cf2b Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 18 Jun 2024 21:59:07 +0000 Subject: [PATCH 02/34] fix --- src/Access/ContextAccess.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 3ce30a0b681..de0e7e3d777 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -625,8 +625,8 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (!granted) { - /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading since - /// SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading + /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) { AccessFlags newFlags; From a2ee0668f12c8cd1b88b8c4ad46c15271a5a1fd2 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 19 Jun 2024 02:20:22 +0000 Subject: [PATCH 03/34] fix --- src/Access/ContextAccess.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index de0e7e3d777..4620561053b 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -37,7 +37,7 @@ namespace ErrorCodes namespace { - static const std::vector> source_and_table_engines = { + const std::vector> source_and_table_engines = { {AccessType::FILE, "File"}, {AccessType::URL, "URL"}, {AccessType::REMOTE, "Distributed"}, @@ -268,6 +268,11 @@ namespace template std::string_view getDatabase(std::string_view arg1, const OtherArgs &...) { return arg1; } + + std::string_view getTableEngine() { return {}; } + + template + std::string_view getTableEngine(std::string_view arg1, const OtherArgs &...) { return arg1; } } @@ -557,18 +562,6 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() return nothing_granted; } -/// Just Dummy to pass compile. -template -static std::string_view getTableEngineName(const Args &... args[[maybe_unused]]) -{ - return ""; -} - -template -static std::string_view getTableEngineName(std::string_view name, const Args &... args[[maybe_unused]]) -{ - return name; -} template bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const @@ -631,7 +624,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg { AccessFlags newFlags; - String table_engine_name{getTableEngineName(args...)}; + String table_engine_name{getTableEngine(args...)}; for (const auto & source_and_table_engine : source_and_table_engines) { const auto & table_engine = std::get<1>(source_and_table_engine); @@ -642,7 +635,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg break; } - if (newFlags == AccessType::NONE) + if (newFlags.isEmpty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); if (grant_option && acs->isGranted(flags, args...)) From c998ec1e4f1b91f8ca20c2bd5a7acb6ac8d2e1b1 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 29 Jun 2024 02:40:22 +0000 Subject: [PATCH 04/34] add test and better naming --- src/Access/Common/AccessRightsElement.cpp | 4 +- src/Access/Common/AccessRightsElement.h | 2 +- src/Access/ContextAccess.cpp | 10 +-- ...xml => config_with_table_engine_grant.xml} | 0 .../config_without_table_engine_grant.xml | 5 ++ ...est.py => test_with_table_engine_grant.py} | 2 +- .../test_without_table_engine_grant.py | 81 +++++++++++++++++++ 7 files changed, 95 insertions(+), 9 deletions(-) rename tests/integration/test_grant_and_revoke/configs/{config.xml => config_with_table_engine_grant.xml} (100%) create mode 100644 tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml rename tests/integration/test_grant_and_revoke/{test.py => test_with_table_engine_grant.py} (99%) create mode 100644 tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index 2ee13d6b94f..63bda09a51b 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -224,10 +224,10 @@ void AccessRightsElement::replaceEmptyDatabase(const String & current_database) String AccessRightsElement::toString() const { return toStringImpl(*this, true); } String AccessRightsElement::toStringWithoutOptions() const { return toStringImpl(*this, false); } -String AccessRightsElement::toStringWithoutONClause() const +String AccessRightsElement::toStringForAccessTypeSource() const { String result{access_flags.toKeywords().front()}; - return result + " ON {db.table}"; + return result + " ON *.*"; } bool AccessRightsElements::empty() const { return std::all_of(begin(), end(), [](const AccessRightsElement & e) { return e.empty(); }); } diff --git a/src/Access/Common/AccessRightsElement.h b/src/Access/Common/AccessRightsElement.h index 49764fc727f..78e94e6f2e4 100644 --- a/src/Access/Common/AccessRightsElement.h +++ b/src/Access/Common/AccessRightsElement.h @@ -89,7 +89,7 @@ struct AccessRightsElement /// Returns a human-readable representation like "GRANT SELECT, UPDATE(x, y) ON db.table". String toString() const; String toStringWithoutOptions() const; - String toStringWithoutONClause() const; + String toStringForAccessTypeSource() const; }; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 4620561053b..8ff1fc8ed21 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -622,7 +622,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) { - AccessFlags newFlags; + AccessFlags new_flags; String table_engine_name{getTableEngine(args...)}; for (const auto & source_and_table_engine : source_and_table_engines) @@ -631,11 +631,11 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (table_engine != table_engine_name) continue; const auto & source = std::get<0>(source_and_table_engine); /// Set the flags from Table Engine to SOURCES so that prompts can be meaningful. - newFlags = source; + new_flags = source; break; } - if (newFlags.isEmpty()) + if (new_flags.isEmpty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); if (grant_option && acs->isGranted(flags, args...)) @@ -644,12 +644,12 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg "{}: Not enough privileges. " "The required privileges have been granted, but without grant option. " "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", - AccessRightsElement{newFlags}.toStringWithoutONClause()); + AccessRightsElement{new_flags}.toStringForAccessTypeSource()); } return access_denied(ErrorCodes::ACCESS_DENIED, "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", - AccessRightsElement{newFlags}.toStringWithoutONClause() + (grant_option ? " WITH GRANT OPTION" : "")); + AccessRightsElement{new_flags}.toStringForAccessTypeSource() + (grant_option ? " WITH GRANT OPTION" : "")); } if (grant_option && acs->isGranted(flags, args...)) diff --git a/tests/integration/test_grant_and_revoke/configs/config.xml b/tests/integration/test_grant_and_revoke/configs/config_with_table_engine_grant.xml similarity index 100% rename from tests/integration/test_grant_and_revoke/configs/config.xml rename to tests/integration/test_grant_and_revoke/configs/config_with_table_engine_grant.xml diff --git a/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml b/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml new file mode 100644 index 00000000000..d3571f281f5 --- /dev/null +++ b/tests/integration/test_grant_and_revoke/configs/config_without_table_engine_grant.xml @@ -0,0 +1,5 @@ + + + false + + diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py similarity index 99% rename from tests/integration/test_grant_and_revoke/test.py rename to tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py index e533cced1e4..25ca7913e4e 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py @@ -5,7 +5,7 @@ from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", - main_configs=["configs/config.xml"], + main_configs=["configs/config_with_table_engine_grant.xml"], user_configs=["configs/users.d/users.xml"], ) diff --git a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py new file mode 100644 index 00000000000..210bb8ec465 --- /dev/null +++ b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py @@ -0,0 +1,81 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/config_without_table_engine_grant.xml"], + user_configs=["configs/users.d/users.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + + instance.query("CREATE DATABASE test") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP USER IF EXISTS A") + instance.query("DROP TABLE IF EXISTS test.table1") + + +def test_table_engine_and_source_grant(): + instance.query("DROP USER IF EXISTS A") + instance.query("CREATE USER A") + instance.query("GRANT CREATE TABLE ON test.table1 TO A") + + instance.query("GRANT POSTGRES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") + + instance.query("REVOKE POSTGRES ON *.* FROM A") + + assert "Not enough privileges" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + # expecting grant POSTGRES instead of grant PostgreSQL due to discrepancy between source access type and table engine + assert "grant POSTGRES ON *.*" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("GRANT SOURCES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") From 57818990f201562d0b6938c1b8de78d16bac471f Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 2 Jul 2024 20:41:08 +0000 Subject: [PATCH 05/34] fix the test --- src/Access/ContextAccess.cpp | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 8ff1fc8ed21..f534c334318 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -618,6 +618,22 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (!granted) { + auto access_denied_no_grant = [&](AccessFlags access_flags, FmtArgs && ...fmt_args) + { + if (grant_option && acs->isGranted(access_flags, fmt_args...)) + { + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. " + "The required privileges have been granted, but without grant option. " + "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", + AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions()); + } + + return access_denied(ErrorCodes::ACCESS_DENIED, + "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", + AccessRightsElement{access_flags, fmt_args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : "")); + }; + /// As we check the SOURCES from the Table Engine logic, direct prompt about Table Engine would be misleading /// since SOURCES is not granted actually. In order to solve this, turn the prompt logic back to Sources. if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) @@ -635,8 +651,9 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg break; } + /// Might happen in the case of grant Table Engine on A (but not source), then revoke A. if (new_flags.isEmpty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Didn't find the target Source from the Table Engine"); + return access_denied_no_grant(flags, args...); if (grant_option && acs->isGranted(flags, args...)) { @@ -652,18 +669,7 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg AccessRightsElement{new_flags}.toStringForAccessTypeSource() + (grant_option ? " WITH GRANT OPTION" : "")); } - if (grant_option && acs->isGranted(flags, args...)) - { - return access_denied(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. " - "The required privileges have been granted, but without grant option. " - "To execute this query, it's necessary to have the grant {} WITH GRANT OPTION", - AccessRightsElement{flags, args...}.toStringWithoutOptions()); - } - - return access_denied(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query, it's necessary to have the grant {}", - AccessRightsElement{flags, args...}.toStringWithoutOptions() + (grant_option ? " WITH GRANT OPTION" : "")); + return access_denied_no_grant(flags, args...); } struct PrecalculatedFlags From b5cb264b017e965037dbb0bd4623df5f5a65ec0b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 15:41:30 +0000 Subject: [PATCH 06/34] Bump ICU to 71 --- contrib/icu | 2 +- contrib/icu-cmake/CMakeLists.txt | 10 +++++----- contrib/icudata | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/contrib/icu b/contrib/icu index a56dde820dc..c205e7ee49a 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668 +Subproject commit c205e7ee49a7086a28b9c275fcfdac9ca3dc815d diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index 0a650f2bcc0..f23b0002b8d 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -4,7 +4,9 @@ else () option(ENABLE_ICU "Enable ICU" 0) endif () -if (NOT ENABLE_ICU) +# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they re-generated +# the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255 +if (NOT ENABLE_ICU OR ARCH_S390X) message(STATUS "Not using ICU") return() endif() @@ -12,8 +14,6 @@ endif() set(ICU_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/source") set(ICUDATA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icudata/") -set (CMAKE_CXX_STANDARD 17) - # These lists of sources were generated from build log of the original ICU build system (configure + make). set(ICUUC_SOURCES @@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) if (ARCH_S390X) - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71b_dat.S" ) else() - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71l_dat.S" ) endif() set(ICUDATA_SOURCES diff --git a/contrib/icudata b/contrib/icudata index c8e717892a5..e7488edd1f1 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab +Subproject commit e7488edd1f141b0664553a985a6fcd0125279527 From 2132ce52e0f72afe90e72e756d5ef494ad081ea9 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 15 Jul 2024 06:48:39 +0000 Subject: [PATCH 07/34] Bump ICU to 75 --- contrib/icu | 2 +- contrib/icu-cmake/CMakeLists.txt | 6 +++--- contrib/icudata | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/contrib/icu b/contrib/icu index c205e7ee49a..7750081bda4 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit c205e7ee49a7086a28b9c275fcfdac9ca3dc815d +Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index f23b0002b8d..f9d05f7fe97 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -4,7 +4,7 @@ else () option(ENABLE_ICU "Enable ICU" 0) endif () -# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they re-generated +# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they generated # the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255 if (NOT ENABLE_ICU OR ARCH_S390X) message(STATUS "Not using ICU") @@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) if (ARCH_S390X) - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71b_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S" ) else() - set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt71l_dat.S" ) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" ) endif() set(ICUDATA_SOURCES diff --git a/contrib/icudata b/contrib/icudata index e7488edd1f1..d345d6ac22f 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit e7488edd1f141b0664553a985a6fcd0125279527 +Subproject commit d345d6ac22f381c882420de9053d30ae1ff38d75 From ae5eccbf20b7198d6a3cc908e0186a384aba038a Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 22 Jul 2024 13:39:48 +0000 Subject: [PATCH 08/34] just a commit to trigger CI --- .../test_grant_and_revoke/test_without_table_engine_grant.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py index 210bb8ec465..4a5dfb83f79 100644 --- a/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py +++ b/tests/integration/test_grant_and_revoke/test_without_table_engine_grant.py @@ -60,6 +60,7 @@ def test_table_engine_and_source_grant(): ) # expecting grant POSTGRES instead of grant PostgreSQL due to discrepancy between source access type and table engine + # similarily, other sources should also use their own defined name instead of the name of table engine assert "grant POSTGRES ON *.*" in instance.query_and_get_error( """ CREATE TABLE test.table1(a Integer) From 9d55553225c4c5e253e32fb0de9944a2e29b7bcf Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 23 Jul 2024 18:52:50 +0000 Subject: [PATCH 09/34] Forbid create as select even when database_replicated_allow_heavy_create is set --- src/Interpreters/InterpreterCreateQuery.cpp | 16 ++++++++++++---- ...ed_database_forbid_create_as_select.reference | 2 ++ ...eplicated_database_forbid_create_as_select.sh | 8 ++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ea10ad59db4..2f837fe4d2b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1329,8 +1329,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; - if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_populate = getContext()->getSettingsRef().database_replicated_allow_heavy_create && create.is_populate; + if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; if (create.storage && create.storage->engine) @@ -1342,10 +1342,18 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) + { + /// POPULATE can be enabled with setting, provide hint in error message + if (create.is_populate) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " - "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + "CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries."); + } } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 15f169d880f..b587549cb60 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -18,8 +18,12 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIAL ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -# But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +# POPULATE is allowed with the special setting ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --compatibility='24.6' + +# AS SELECT is forbidden even with the setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --compatibility='24.6' |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From e4b50c18c2c1918905bf44a8e1183f0cddd5a811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 24 Jul 2024 22:26:46 +0200 Subject: [PATCH 10/34] getauxval: Avoid crash under sanitizer re-exec due to high ASLR entropy --- base/glibc-compatibility/musl/getauxval.c | 38 +++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index ea5cff9fc11..86f9a546ee4 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -75,6 +75,44 @@ unsigned long NO_SANITIZE_THREAD __getauxval_procfs(unsigned long type) } static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) { +#if defined(__x86_64__) && defined(__has_feature) +# if __has_feature(memory_sanitizer) || __has_feature(thread_sanitizer) + /// Sanitizers are not compatible with high ASLR entropy, which is the default on modern Linux distributions, and + /// to workaround this limitation, TSAN and MSAN (couldn't see other sanitizers doing the same), re-exec the binary + /// without ASLR (see https://github.com/llvm/llvm-project/commit/0784b1eefa36d4acbb0dacd2d18796e26313b6c5) + + /// The problem we face is that, in order to re-exec, the sanitizer wants to use the original pathname in the call + /// and to get its value it uses getauxval (https://github.com/llvm/llvm-project/blob/20eff684203287828d6722fc860b9d3621429542/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L985-L988). + /// Since we provide getauxval ourselves (to minimize the version dependency on runtime glibc), we are the ones + // being called and we fail horribly: + /// + /// ==301455==ERROR: MemorySanitizer: SEGV on unknown address 0x2ffc6d721550 (pc 0x5622c1cc0073 bp 0x000000000003 sp 0x7ffc6d721530 T301455) + /// ==301455==The signal is caused by a WRITE memory access. + /// #0 0x5622c1cc0073 in __auxv_init_procfs ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:129:5 + /// #1 0x5622c1cbffe9 in getauxval ./ClickHouse/base/glibc-compatibility/musl/getauxval.c:240:12 + /// #2 0x5622c0d7bfb4 in __sanitizer::ReExec() crtstuff.c + /// #3 0x5622c0df7bfc in __msan::InitShadowWithReExec(bool) crtstuff.c + /// #4 0x5622c0d95356 in __msan_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x256356) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + /// #5 0x5622c0dfe878 in msan.module_ctor main.cc + /// #6 0x5622c1cc156c in __libc_csu_init (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x118256c) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + /// #7 0x73dc05dd7ea3 in __libc_start_main /usr/src/debug/glibc/glibc/csu/../csu/libc-start.c:343:6 + /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) + + /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as + /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliar vector. + /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very + /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise + // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). + + /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and it we used + /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and + /// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in + /// our case, as we don't load any libraries. That's the theory at least. + if (type == AT_EXECFN) + return (unsigned long)"/proc/self/exe"; +# endif +#endif + // For debugging: // - od -t dL /proc/self/auxv // - LD_SHOW_AUX= ls From fda11dc62d81b717b9ab06c8adc8554c827764bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 24 Jul 2024 22:51:26 +0200 Subject: [PATCH 11/34] Typo --- base/glibc-compatibility/musl/getauxval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 86f9a546ee4..b5bd2f114c2 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -99,7 +99,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as - /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliar vector. + /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliary vector. /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). From 7fedc0ffbee9d04e0352037021a127cea93cbbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 12:26:37 +0200 Subject: [PATCH 12/34] Update base/glibc-compatibility/musl/getauxval.c Co-authored-by: Alexander Gololobov --- base/glibc-compatibility/musl/getauxval.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index b5bd2f114c2..28cb0f8d005 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -99,12 +99,12 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) /// #8 0x5622c0d6b7cd in _start (./ClickHouse/build_msan/contrib/google-protobuf-cmake/protoc+0x22c7cd) (BuildId: 6411d3c88b898ba3f7d49760555977d3e61f0741) /// The source of the issue above is that, at this point in time during __msan_init, we can't really do much as - /// most global variables aren't initialized or available yet, so we we can't initiate the auxiliary vector. + /// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector. /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). - /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and it we used + /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and /// unconditionally return "/proc/self/exe" without any preparation. Theoretically this should be fine in /// our case, as we don't load any libraries. That's the theory at least. From f2e83f092d1f677c4e0240e749f96766ff6e205c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 25 Jul 2024 21:56:42 +0200 Subject: [PATCH 13/34] Patch getauxval for tsan re-exec --- base/glibc-compatibility/CMakeLists.txt | 10 ++++++++++ base/glibc-compatibility/musl/getauxval.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index c967fa5b11b..8948e25cb8e 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -18,6 +18,16 @@ if (GLIBC_COMPATIBILITY) message (FATAL_ERROR "glibc_compatibility can only be used on x86_64 or aarch64.") endif () + if (SANITIZE STREQUAL thread) + # Disable TSAN instrumentation that conflicts with re-exec due to high ASLR entropy using getauxval + # See longer comment in __auxv_init_procfs + # In the case of tsan we need to make sure getauxval is not instrumented as that would introduce tsan + # internal calls to functions that depend on a state that isn't initialized yet + set_source_files_properties( + musl/getauxval.c + PROPERTIES COMPILE_FLAGS "-mllvm -tsan-instrument-func-entry-exit=false") + endif() + # Need to omit frame pointers to match the performance of glibc set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer") diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 28cb0f8d005..ec2cce1e4aa 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -102,7 +102,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) /// most global variables aren't initialized or available yet, so we can't initiate the auxiliary vector. /// Normal glibc / musl getauxval doesn't have this problem since they initiate their auxval vector at the very /// start of __libc_start_main (just keeping track of argv+argc+1), but we don't have such option (otherwise - // this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). + /// this complexity of reading "/proc/self/auxv" or using __environ would not be necessary). /// To avoid this crashes on the re-exec call (see above how it would fail when creating `aux`, and if we used /// __auxv_init_environ then it would SIGSEV on READing `__environ`) we capture this call for `AT_EXECFN` and @@ -237,7 +237,7 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_environ(unsigned long type) // - __auxv_init_procfs -> __auxv_init_environ -> __getauxval_environ static void * volatile getauxval_func = (void *)__auxv_init_procfs; -unsigned long getauxval(unsigned long type) +unsigned long NO_SANITIZE_THREAD getauxval(unsigned long type) { return ((unsigned long (*)(unsigned long))getauxval_func)(type); } From 3f70977cd660e4617d9bbd68cc229020adc57f98 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 25 Jul 2024 21:02:30 +0000 Subject: [PATCH 14/34] try to fix --- ...2572_query_views_log_background_thread.reference | 13 +++++++++---- .../02572_query_views_log_background_thread.sql | 8 ++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index 22dfaf93781..f867fd0d085 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -1,15 +1,14 @@ -- { echoOn } insert into buffer_02572 values (1); -- ensure that the flush was not direct +select * from buffer_02572; +1 select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; -1 -select * from copy_02572; -1 +select sleepEachRow(1) from numbers(3*2) format Null; system flush logs; select count() > 0, lower(status::String), errorCodeToName(exception_code) from system.query_views_log where @@ -18,3 +17,9 @@ select count() > 0, lower(status::String), errorCodeToName(exception_code) group by 2, 3 ; 1 queryfinish OK +select * from buffer_02572; +1 +select * from data_02572; +1 +select * from copy_02572; +1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql index 939c189c5fe..2e9a62b71da 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql @@ -19,13 +19,13 @@ create materialized view mv_02572 to copy_02572 as select * from data_02572; -- { echoOn } insert into buffer_02572 values (1); -- ensure that the flush was not direct +select * from buffer_02572; select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; -select * from copy_02572; +select sleepEachRow(1) from numbers(3*2) format Null; system flush logs; select count() > 0, lower(status::String), errorCodeToName(exception_code) @@ -34,3 +34,7 @@ select count() > 0, lower(status::String), errorCodeToName(exception_code) view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') group by 2, 3 ; + +select * from buffer_02572; +select * from data_02572; +select * from copy_02572; \ No newline at end of file From 1ba44252cd20ab660d374970257a1ceb438236dd Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 26 Jul 2024 18:33:50 +0000 Subject: [PATCH 15/34] turn sql to bash --- ...uery_views_log_background_thread.reference | 25 +----------- ...02572_query_views_log_background_thread.sh | 38 ++++++++++++++++++ ...2572_query_views_log_background_thread.sql | 40 ------------------- 3 files changed, 40 insertions(+), 63 deletions(-) create mode 100755 tests/queries/0_stateless/02572_query_views_log_background_thread.sh delete mode 100644 tests/queries/0_stateless/02572_query_views_log_background_thread.sql diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index f867fd0d085..d7f2272f5b4 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -1,25 +1,4 @@ --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from buffer_02572; +OK +1 1 -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select sleepEachRow(1) from numbers(3*2) format Null; -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; 1 queryfinish OK -select * from buffer_02572; -1 -select * from data_02572; -1 -select * from copy_02572; -1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh new file mode 100755 index 00000000000..a3e428e75c8 --- /dev/null +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# INSERT buffer_02572 -> data_02572 -> copy_02572 +# ^^ +# push to system.query_views_log + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; + drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" + +${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);" +${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;" + +${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" + +# ensure that the flush was not direct +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +# we cannot use OPTIMIZE, this will attach query context, so let's wait +for _ in {1..100}; do + $CLICKHOUSE_CLIENT -q "select * from data_02572;" | grep -q "1" && echo 'OK' && break + sleep 0.5 +done + + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +${CLICKHOUSE_CLIENT} --query="system flush logs;" +${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) + from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') + group by 2, 3;" \ No newline at end of file diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql deleted file mode 100644 index 2e9a62b71da..00000000000 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ /dev/null @@ -1,40 +0,0 @@ --- INSERT buffer_02572 -> data_02572 -> copy_02572 --- ^^ --- push to system.query_views_log - -drop table if exists buffer_02572; -drop table if exists data_02572; -drop table if exists copy_02572; -drop table if exists mv_02572; - -create table copy_02572 (key Int) engine=Memory(); -create table data_02572 (key Int) engine=Memory(); -create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, - /* never direct flush for flush from background thread */ - /* min_time= */ 3, 3, - 1, 1e9, - 1, 1e9); -create materialized view mv_02572 to copy_02572 as select * from data_02572; - --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from buffer_02572; -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select sleepEachRow(1) from numbers(3*2) format Null; - -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; - -select * from buffer_02572; -select * from data_02572; -select * from copy_02572; \ No newline at end of file From 1ed6ea746917018fb02b00d78c5b0c4593919efd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 26 Jul 2024 10:28:01 +0000 Subject: [PATCH 16/34] Functions: Iterate over input_rows_count where appropriate --- src/Functions/CRC.cpp | 31 ++-- src/Functions/CountSubstringsImpl.h | 17 +- src/Functions/CustomWeekTransforms.h | 15 +- src/Functions/DateTimeTransforms.h | 17 +- src/Functions/EmptyImpl.h | 23 ++- src/Functions/ExtractString.h | 2 +- src/Functions/FunctionBitTestMany.h | 49 +++--- src/Functions/FunctionChar.cpp | 5 +- .../FunctionDateOrDateTimeAddInterval.h | 61 +++---- src/Functions/FunctionMathBinaryFloat64.h | 31 ++-- src/Functions/FunctionMathUnary.h | 20 +-- src/Functions/FunctionNumericPredicate.h | 30 ++-- src/Functions/FunctionSpaceFillingCurve.h | 2 - src/Functions/FunctionStringOrArrayToT.h | 16 +- src/Functions/FunctionStringReplace.h | 17 +- src/Functions/FunctionTokens.h | 9 +- .../FunctionsBinaryRepresentation.cpp | 16 +- src/Functions/FunctionsBitToArray.cpp | 64 ++++---- src/Functions/FunctionsBitmap.h | 24 +-- .../FunctionsCharsetClassification.cpp | 15 +- src/Functions/FunctionsCodingIP.cpp | 34 ++-- src/Functions/FunctionsCodingUUID.cpp | 35 ++-- src/Functions/FunctionsDecimalArithmetics.h | 70 ++++---- src/Functions/FunctionsEmbeddedDictionaries.h | 33 ++-- src/Functions/FunctionsHashing.h | 155 +++++++++--------- src/Functions/FunctionsJSON.cpp | 4 +- .../FunctionsLanguageClassification.cpp | 11 +- src/Functions/FunctionsLogical.cpp | 2 +- .../FunctionsMultiStringFuzzySearch.h | 8 +- src/Functions/FunctionsMultiStringSearch.h | 8 +- .../FunctionsProgrammingClassification.cpp | 9 +- src/Functions/FunctionsRandom.h | 3 +- src/Functions/FunctionsStringDistance.cpp | 17 +- src/Functions/FunctionsStringHash.cpp | 9 +- src/Functions/FunctionsStringHash.h | 14 +- .../FunctionsStringHashFixedString.cpp | 17 +- src/Functions/FunctionsStringSearch.h | 17 +- src/Functions/FunctionsStringSearchToString.h | 4 +- src/Functions/FunctionsStringSimilarity.cpp | 27 +-- src/Functions/FunctionsStringSimilarity.h | 11 +- src/Functions/FunctionsTextClassification.h | 10 +- src/Functions/FunctionsTimeWindow.cpp | 113 +++++++------ .../FunctionsTonalityClassification.cpp | 8 +- src/Functions/FunctionsVisitParam.h | 10 +- src/Functions/HasTokenImpl.h | 5 +- src/Functions/Kusto/KqlArraySort.cpp | 24 ++- src/Functions/MatchImpl.h | 44 +++-- src/Functions/MultiMatchAllIndicesImpl.h | 25 +-- src/Functions/MultiMatchAnyImpl.h | 29 ++-- src/Functions/MultiSearchFirstIndexImpl.h | 22 +-- src/Functions/MultiSearchFirstPositionImpl.h | 20 +-- src/Functions/MultiSearchImpl.h | 18 +- src/Functions/PolygonUtils.h | 8 +- src/Functions/PositionImpl.h | 22 ++- src/Functions/ReplaceRegexpImpl.h | 42 ++--- src/Functions/ReplaceStringImpl.h | 43 +++-- .../URL/FirstSignificantSubdomainCustomImpl.h | 14 +- src/Functions/URL/cutURLParameter.cpp | 9 +- src/Functions/URL/extractURLParameter.cpp | 7 +- src/Functions/URL/port.cpp | 10 +- src/Functions/UTCTimestampTransform.cpp | 12 +- src/Functions/array/arrayAUC.cpp | 13 +- src/Functions/array/arrayConcat.cpp | 3 +- src/Functions/array/arrayEnumerateRanked.h | 2 +- src/Functions/array/length.cpp | 20 +-- src/Functions/ascii.cpp | 22 +-- src/Functions/extract.cpp | 7 +- src/Functions/isValidUTF8.cpp | 24 +-- .../keyvaluepair/extractKeyValuePairs.cpp | 8 +- src/Functions/lengthUTF8.cpp | 24 +-- src/Functions/reinterpretAs.cpp | 44 +++-- src/Functions/visibleWidth.cpp | 5 +- src/Functions/widthBucket.cpp | 10 +- 73 files changed, 778 insertions(+), 851 deletions(-) diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 0ba976669a3..73318d523b5 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -81,46 +81,43 @@ struct CRCFunctionWrapper static constexpr auto is_fixed_to_constant = true; using ReturnType = typename Impl::ReturnType; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = doCRC(data, prev_offset, offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } - static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = doCRC(data, 0, n); } - - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res, size_t) { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { - res[i] = doCRC(data, i * n, n); - } + res = doCRC(data, 0, n); } - [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) + res[i] = doCRC(data, i * n, n); + } + + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", std::string(Impl::name)); } - [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", std::string(Impl::name)); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", std::string(Impl::name)); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", std::string(Impl::name)); } diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 9ff3e4e1f2a..8e91bc3aeb4 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -37,7 +37,8 @@ struct CountSubstringsImpl const std::string & needle, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t /*input_rows_count*/) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -150,7 +151,8 @@ struct CountSubstringsImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -158,9 +160,7 @@ struct CountSubstringsImpl ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; - size_t size = haystack_offsets.size(); - - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; @@ -207,7 +207,8 @@ struct CountSubstringsImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -215,9 +216,7 @@ struct CountSubstringsImpl /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; - size_t size = needle_offsets.size(); - - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = 0; auto start = start_pos != nullptr ? std::max(start_pos->getUInt(i), UInt64(1)) : UInt64(1); diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 75fb3c32f16..5b4de765362 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -32,13 +32,12 @@ struct WeekTransformer {} template - void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const + void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone, size_t input_rows_count) const { using ValueType = typename ToVectorType::value_type; - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (is_extended_result) vec_to[i] = static_cast(transform.executeExtendedResult(vec_from[i], week_mode, time_zone)); @@ -56,7 +55,7 @@ template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {}) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count, Transform transform = {}) { const auto op = WeekTransformer{transform}; @@ -77,9 +76,9 @@ struct CustomWeekTransformImpl const auto * sources = checkAndGetColumn(source_col.get()); auto col_to = ToDataType::ColumnType::create(); - col_to->getData().resize(sources->size()); + col_to->getData().resize(input_rows_count); - for (size_t i = 0; i < sources->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { DateTime64 dt64; ReadBufferFromString buf(sources->getDataAt(i).toView()); @@ -92,7 +91,7 @@ struct CustomWeekTransformImpl else if (const auto * sources = checkAndGetColumn(source_col.get())) { auto col_to = ToDataType::ColumnType::create(); - op.vector(sources->getData(), col_to->getData(), week_mode, time_zone); + op.vector(sources->getData(), col_to->getData(), week_mode, time_zone, input_rows_count); return col_to; } else diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 5f745f3ccad..a7bd398cdaa 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -24,7 +24,7 @@ namespace DB static constexpr auto millisecond_multiplier = 1'000; static constexpr auto microsecond_multiplier = 1'000'000; -static constexpr auto nanosecond_multiplier = 1'000'000'000; +static constexpr auto nanosecond_multiplier = 1'000'000'000; static constexpr FormatSettings::DateTimeOverflowBehavior default_date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore; @@ -2134,13 +2134,12 @@ struct Transformer { template static void vector(const FromTypeVector & vec_from, ToTypeVector & vec_to, const DateLUTImpl & time_zone, const Transform & transform, - [[maybe_unused]] ColumnUInt8::Container * vec_null_map_to) + [[maybe_unused]] ColumnUInt8::Container * vec_null_map_to, size_t input_rows_count) { using ValueType = typename ToTypeVector::value_type; - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v || std::is_same_v) { @@ -2178,7 +2177,7 @@ struct DateTimeTransformImpl { template static ColumnPtr execute( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {}) + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const Transform & transform = {}) { using Op = Transformer; @@ -2200,7 +2199,7 @@ struct DateTimeTransformImpl if (result_data_type.isDateTime() || result_data_type.isDateTime64()) { const auto & time_zone = dynamic_cast(*result_type).getTimeZone(); - Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to); + Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count); } else { @@ -2209,15 +2208,13 @@ struct DateTimeTransformImpl time_zone_argument_position = 2; const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0); - Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to); + Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count); } if constexpr (std::is_same_v) { if (vec_null_map_to) - { return ColumnNullable::create(std::move(mutable_result_col), std::move(col_null_map_to)); - } } return mutable_result_col; diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h index d3b2dda024b..03675254eb6 100644 --- a/src/Functions/EmptyImpl.h +++ b/src/Functions/EmptyImpl.h @@ -21,11 +21,10 @@ struct EmptyImpl /// If the function will return constant value for FixedString data type. static constexpr auto is_fixed_to_constant = false; - static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); ColumnString::Offset prev_offset = 1; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = negative ^ (offsets[i] == prev_offset); prev_offset = offsets[i] + 1; @@ -33,42 +32,40 @@ struct EmptyImpl } /// Only make sense if is_fixed_to_constant. - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) + static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t) { throw Exception(ErrorCodes::LOGICAL_ERROR, "'vectorFixedToConstant method' is called"); } - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) { - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = negative ^ memoryIsZeroSmallAllowOverflow15(data.data() + i * n, n); } - static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = negative ^ (offsets[i] == prev_offset); prev_offset = offsets[i]; } } - static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray & res) + static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray & res, size_t) { for (size_t i = 0; i < n; ++i) res[i] = negative ^ (container[i].toUnderType() == 0); } - static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray & res) + static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray & res, size_t) { for (size_t i = 0; i < n; ++i) res[i] = negative ^ (container[i].toUnderType() == 0); } - static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray & res) + static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray & res, size_t) { for (size_t i = 0; i < n; ++i) res[i] = negative ^ (container[i].toUnderType() == 0); diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h index 6beb8be830a..9f039f86b18 100644 --- a/src/Functions/ExtractString.h +++ b/src/Functions/ExtractString.h @@ -20,7 +20,7 @@ namespace DB // includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word struct ExtractStringImpl { - static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) + static const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end) { // jump separators while (pos < end && isUTF8Sep(*pos)) diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 950e4ab4ea8..514b78ce59f 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -46,7 +46,7 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", first_arg->getName(), getName()); - for (const auto i : collections::range(1, arguments.size())) + for (size_t i = 1; i < arguments.size(); ++i) { const auto & pos_arg = arguments[i]; @@ -57,19 +57,19 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto * value_col = arguments.front().column.get(); ColumnPtr res; - if (!((res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)) - || (res = execute(arguments, result_type, value_col)))) + if (!((res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)) + || (res = execute(arguments, result_type, value_col, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", value_col->getName(), getName()); return res; @@ -79,28 +79,28 @@ private: template ColumnPtr execute( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, - const IColumn * const value_col_untyped) const + const IColumn * const value_col_untyped, + size_t input_rows_count) const { if (const auto value_col = checkAndGetColumn>(value_col_untyped)) { - const auto size = value_col->size(); bool is_const; const auto const_mask = createConstMaskIfConst(arguments, is_const); const auto & val = value_col->getData(); - auto out_col = ColumnVector::create(size); + auto out_col = ColumnVector::create(input_rows_count); auto & out = out_col->getData(); if (is_const) { - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out[i] = Impl::apply(val[i], const_mask); } else { - const auto mask = createMask(size, arguments); + const auto mask = createMask(input_rows_count, arguments); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out[i] = Impl::apply(val[i], mask[i]); } @@ -108,23 +108,22 @@ private: } else if (const auto value_col_const = checkAndGetColumnConst>(value_col_untyped)) { - const auto size = value_col_const->size(); bool is_const; const auto const_mask = createConstMaskIfConst(arguments, is_const); const auto val = value_col_const->template getValue(); if (is_const) { - return result_type->createColumnConst(size, toField(Impl::apply(val, const_mask))); + return result_type->createColumnConst(input_rows_count, toField(Impl::apply(val, const_mask))); } else { - const auto mask = createMask(size, arguments); - auto out_col = ColumnVector::create(size); + const auto mask = createMask(input_rows_count, arguments); + auto out_col = ColumnVector::create(input_rows_count); auto & out = out_col->getData(); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out[i] = Impl::apply(val, mask[i]); return out_col; @@ -140,7 +139,7 @@ private: out_is_const = true; ValueType mask = 0; - for (const auto i : collections::range(1, arguments.size())) + for (size_t i = 1; i < arguments.size(); ++i) { if (auto pos_col_const = checkAndGetColumnConst>(arguments[i].column.get())) { @@ -166,7 +165,7 @@ private: { PaddedPODArray mask(size, ValueType{}); - for (const auto i : collections::range(1, arguments.size())) + for (size_t i = 1; i < arguments.size(); ++i) { const auto * pos_col = arguments[i].column.get(); @@ -187,7 +186,7 @@ private: { const auto & pos = pos_col->getData(); - for (const auto i : collections::range(0, mask.size())) + for (size_t i = 0; i < mask.size(); ++i) if (pos[i] < 8 * sizeof(ValueType)) mask[i] = mask[i] | (ValueType(1) << pos[i]); else @@ -205,7 +204,7 @@ private: const auto new_mask = ValueType(1) << pos; - for (const auto i : collections::range(0, mask.size())) + for (size_t i = 0; i < mask.size(); ++i) mask[i] = mask[i] | new_mask; return true; diff --git a/src/Functions/FunctionChar.cpp b/src/Functions/FunctionChar.cpp index 79e346a3ea4..dbdb7692177 100644 --- a/src/Functions/FunctionChar.cpp +++ b/src/Functions/FunctionChar.cpp @@ -103,14 +103,11 @@ private: const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data); if (!src_data_concrete) - { return false; - } for (size_t row = 0; row < rows; ++row) - { out_vec[row * size_per_row + column_idx] = static_cast(src_data_concrete->getInt(row)); - } + return true; } }; diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index f50b1415622..25231b8887b 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -428,19 +428,17 @@ struct Processor {} template - void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const + void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); if constexpr (std::is_same_v) { - const auto & offsets_from = col_from.getOffsets(); auto & vec_to = col_to.getData(); - size_t size = offsets_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0 ; i < size; ++i) + for (size_t i = 0 ; i < input_rows_count; ++i) { std::string_view from = col_from.getDataAt(i).toView(); vec_to[i] = transform.execute(from, checkOverflow(delta), time_zone, utc_time_zone, scale); @@ -451,32 +449,31 @@ struct Processor const auto & vec_from = col_from.getData(); auto & vec_to = col_to.getData(); - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, utc_time_zone, scale); } } template - void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const + void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { castTypeToEither< ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale); return true; }); + &delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale, input_rows_count); return true; }); } template - void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const + void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { castTypeToEither< ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale); return true; }); + &delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale, input_rows_count); return true; }); } private: @@ -491,19 +488,17 @@ private: template NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( - const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const + const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); if constexpr (std::is_same_v) { - const auto & offsets_from = col_from.getOffsets(); auto & vec_to = col_to.getData(); - size_t size = offsets_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0 ; i < size; ++i) + for (size_t i = 0 ; i < input_rows_count; ++i) { std::string_view from = col_from.getDataAt(i).toView(); vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); @@ -514,26 +509,24 @@ private: const auto & vec_from = col_from.getData(); auto & vec_to = col_to.getData(); - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); } } template NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( - const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const + const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const { static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); auto & vec_to = col_to.getData(); - size_t size = delta.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); } }; @@ -542,7 +535,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale, size_t input_rows_count) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -561,15 +554,15 @@ struct DateTimeAddIntervalImpl if (const auto * sources = checkAndGetColumn(&source_column)) { if (const auto * delta_const_column = typeid_cast(&delta_column)) - processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale); + processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale, input_rows_count); else - processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale); + processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale, input_rows_count); } else if (const auto * sources_const = checkAndGetColumnConst(&source_column)) { processor.constantVector( sources_const->template getValue(), - *col_to, delta_column, time_zone, scale); + *col_to, delta_column, time_zone, scale, input_rows_count); } else { @@ -708,25 +701,25 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); if (which.isDate()) - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count); else if (which.isDate32()) - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count); else if (which.isDateTime()) - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count); else if (which.isDateTime64()) { const auto * datetime64_type = assert_cast(from_type); auto from_scale = datetime64_type->getScale(); - return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, from_scale); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, from_scale, input_rows_count); } else if (which.isString()) - return DateTimeAddIntervalImpl::execute(Transform{}, arguments, result_type, 3); + return DateTimeAddIntervalImpl::execute(Transform{}, arguments, result_type, 3, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", arguments[0].type->getName(), getName()); } diff --git a/src/Functions/FunctionMathBinaryFloat64.h b/src/Functions/FunctionMathBinaryFloat64.h index 1b75ee688f4..82ff6ae7fbf 100644 --- a/src/Functions/FunctionMathBinaryFloat64.h +++ b/src/Functions/FunctionMathBinaryFloat64.h @@ -54,7 +54,7 @@ private: } template - static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg) + static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg, size_t input_rows_count) { if (const auto right_arg_typed = checkAndGetColumn>(right_arg)) { @@ -63,12 +63,11 @@ private: LeftType left_src_data[Impl::rows_per_iteration]; std::fill(std::begin(left_src_data), std::end(left_src_data), left_arg->template getValue()); const auto & right_src_data = right_arg_typed->getData(); - const auto src_size = right_src_data.size(); auto & dst_data = dst->getData(); - dst_data.resize(src_size); + dst_data.resize(input_rows_count); - const auto rows_remaining = src_size % Impl::rows_per_iteration; - const auto rows_size = src_size - rows_remaining; + const auto rows_remaining = input_rows_count % Impl::rows_per_iteration; + const auto rows_size = input_rows_count - rows_remaining; for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration) Impl::execute(left_src_data, &right_src_data[i], &dst_data[i]); @@ -92,7 +91,7 @@ private: } template - static ColumnPtr executeTyped(const ColumnVector * left_arg, const IColumn * right_arg) + static ColumnPtr executeTyped(const ColumnVector * left_arg, const IColumn * right_arg, size_t input_rows_count) { if (const auto right_arg_typed = checkAndGetColumn>(right_arg)) { @@ -100,12 +99,11 @@ private: const auto & left_src_data = left_arg->getData(); const auto & right_src_data = right_arg_typed->getData(); - const auto src_size = left_src_data.size(); auto & dst_data = dst->getData(); - dst_data.resize(src_size); + dst_data.resize(input_rows_count); - const auto rows_remaining = src_size % Impl::rows_per_iteration; - const auto rows_size = src_size - rows_remaining; + const auto rows_remaining = input_rows_count % Impl::rows_per_iteration; + const auto rows_size = input_rows_count - rows_remaining; for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration) Impl::execute(&left_src_data[i], &right_src_data[i], &dst_data[i]); @@ -136,12 +134,11 @@ private: const auto & left_src_data = left_arg->getData(); RightType right_src_data[Impl::rows_per_iteration]; std::fill(std::begin(right_src_data), std::end(right_src_data), right_arg_typed->template getValue()); - const auto src_size = left_src_data.size(); auto & dst_data = dst->getData(); - dst_data.resize(src_size); + dst_data.resize(input_rows_count); - const auto rows_remaining = src_size % Impl::rows_per_iteration; - const auto rows_size = src_size - rows_remaining; + const auto rows_remaining = input_rows_count % Impl::rows_per_iteration; + const auto rows_size = input_rows_count - rows_remaining; for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration) Impl::execute(&left_src_data[i], right_src_data, &dst_data[i]); @@ -165,7 +162,7 @@ private: return nullptr; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_left = arguments[0]; const ColumnWithTypeAndName & col_right = arguments[1]; @@ -202,7 +199,7 @@ private: if (const auto left_arg_typed = checkAndGetColumn(left_arg)) { - if ((res = executeTyped(left_arg_typed, right_arg))) + if ((res = executeTyped(left_arg_typed, right_arg, input_rows_count))) return true; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}", @@ -210,7 +207,7 @@ private: } if (const auto left_arg_typed = checkAndGetColumnConst(left_arg)) { - if ((res = executeTyped(left_arg_typed, right_arg))) + if ((res = executeTyped(left_arg_typed, right_arg, input_rows_count))) return true; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}", diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index 9f400932356..3cc8bf391b4 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -106,42 +106,40 @@ private: } template - static ColumnPtr execute(const ColumnVector * col) + static ColumnPtr execute(const ColumnVector * col, size_t input_rows_count) { const auto & src_data = col->getData(); - const size_t size = src_data.size(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); - dst_data.resize(size); + dst_data.resize(input_rows_count); - executeInIterations(src_data.data(), dst_data.data(), size); + executeInIterations(src_data.data(), dst_data.data(), input_rows_count); return dst; } template - static ColumnPtr execute(const ColumnDecimal * col) + static ColumnPtr execute(const ColumnDecimal * col, size_t input_rows_count) { const auto & src_data = col->getData(); - const size_t size = src_data.size(); UInt32 scale = col->getScale(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); - dst_data.resize(size); + dst_data.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) dst_data[i] = DecimalUtils::convertTo(src_data[i], scale); - executeInIterations(dst_data.data(), dst_data.data(), size); + executeInIterations(dst_data.data(), dst_data.data(), input_rows_count); return dst; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col = arguments[0]; ColumnPtr res; @@ -156,7 +154,7 @@ private: const auto col_vec = checkAndGetColumn(col.column.get()); if (col_vec == nullptr) return false; - return (res = execute(col_vec)) != nullptr; + return (res = execute(col_vec, input_rows_count)) != nullptr; }; if (!callOnBasicType(col.type->getTypeId(), call)) diff --git a/src/Functions/FunctionNumericPredicate.h b/src/Functions/FunctionNumericPredicate.h index fd495d7e8e7..97a3639734b 100644 --- a/src/Functions/FunctionNumericPredicate.h +++ b/src/Functions/FunctionNumericPredicate.h @@ -53,39 +53,37 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto * in = arguments.front().column.get(); ColumnPtr res; - if (!((res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)) - || (res = execute(in)))) + if (!((res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)) + || (res = execute(in, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); return res; } template - ColumnPtr execute(const IColumn * in_untyped) const + ColumnPtr execute(const IColumn * in_untyped, size_t input_rows_count) const { if (const auto in = checkAndGetColumn>(in_untyped)) { - const auto size = in->size(); - - auto out = ColumnUInt8::create(size); + auto out = ColumnUInt8::create(input_rows_count); const auto & in_data = in->getData(); auto & out_data = out->getData(); - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) out_data[i] = Impl::execute(in_data[i]); return out; diff --git a/src/Functions/FunctionSpaceFillingCurve.h b/src/Functions/FunctionSpaceFillingCurve.h index ac9215f88e1..76f6678e847 100644 --- a/src/Functions/FunctionSpaceFillingCurve.h +++ b/src/Functions/FunctionSpaceFillingCurve.h @@ -132,9 +132,7 @@ public: } DataTypes types(tuple_size); for (size_t i = 0; i < tuple_size; i++) - { types[i] = std::make_shared(); - } return std::make_shared(types); } }; diff --git a/src/Functions/FunctionStringOrArrayToT.h b/src/Functions/FunctionStringOrArrayToT.h index 26c740f1fac..40f780d82a8 100644 --- a/src/Functions/FunctionStringOrArrayToT.h +++ b/src/Functions/FunctionStringOrArrayToT.h @@ -71,7 +71,7 @@ public: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col->size()); - Impl::vector(col->getChars(), col->getOffsets(), vec_res); + Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count); return col_res; } @@ -80,7 +80,7 @@ public: if (Impl::is_fixed_to_constant) { ResultType res = 0; - Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res); + Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res, input_rows_count); return result_type->createColumnConst(col_fixed->size(), toField(res)); } @@ -90,7 +90,7 @@ public: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_fixed->size()); - Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res); + Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res, input_rows_count); return col_res; } @@ -101,7 +101,7 @@ public: typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_arr->size()); - Impl::array(col_arr->getOffsets(), vec_res); + Impl::array(col_arr->getOffsets(), vec_res, input_rows_count); return col_res; } @@ -112,7 +112,7 @@ public: vec_res.resize(col_map->size()); const auto & col_nested = col_map->getNestedColumn(); - Impl::array(col_nested.getOffsets(), vec_res); + Impl::array(col_nested.getOffsets(), vec_res, input_rows_count); return col_res; } else if (const ColumnUUID * col_uuid = checkAndGetColumn(column.get())) @@ -120,7 +120,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_uuid->size()); - Impl::uuid(col_uuid->getData(), input_rows_count, vec_res); + Impl::uuid(col_uuid->getData(), input_rows_count, vec_res, input_rows_count); return col_res; } else if (const ColumnIPv6 * col_ipv6 = checkAndGetColumn(column.get())) @@ -128,7 +128,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_ipv6->size()); - Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res); + Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res, input_rows_count); return col_res; } else if (const ColumnIPv4 * col_ipv4 = checkAndGetColumn(column.get())) @@ -136,7 +136,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(col_ipv4->size()); - Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res); + Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res, input_rows_count); return col_res; } else diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h index b4bcfa514a8..432e03bfe9e 100644 --- a/src/Functions/FunctionStringReplace.h +++ b/src/Functions/FunctionStringReplace.h @@ -45,7 +45,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr column_haystack = arguments[0].column; column_haystack = column_haystack->convertToFullColumnIfConst(); @@ -70,7 +70,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_const->getValue(), col_replacement_const->getValue(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack && col_needle_vector && col_replacement_const) @@ -79,7 +80,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), col_replacement_const->getValue(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack && col_needle_const && col_replacement_vector) @@ -88,7 +90,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_const->getValue(), col_replacement_vector->getChars(), col_replacement_vector->getOffsets(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack && col_needle_vector && col_replacement_vector) @@ -97,7 +100,8 @@ public: col_haystack->getChars(), col_haystack->getOffsets(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), col_replacement_vector->getChars(), col_replacement_vector->getOffsets(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else if (col_haystack_fixed && col_needle_const && col_replacement_const) @@ -106,7 +110,8 @@ public: col_haystack_fixed->getChars(), col_haystack_fixed->getN(), col_needle_const->getValue(), col_replacement_const->getValue(), - col_res->getChars(), col_res->getOffsets()); + col_res->getChars(), col_res->getOffsets(), + input_rows_count); return col_res; } else diff --git a/src/Functions/FunctionTokens.h b/src/Functions/FunctionTokens.h index f1435ca5651..b6d8e9ee589 100644 --- a/src/Functions/FunctionTokens.h +++ b/src/Functions/FunctionTokens.h @@ -84,7 +84,7 @@ public: return std::make_shared(std::make_shared()); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { Generator generator; generator.init(arguments, max_substrings_includes_remaining_string); @@ -107,18 +107,17 @@ public: const ColumnString::Chars & src_chars = col_str->getChars(); const ColumnString::Offsets & src_offsets = col_str->getOffsets(); - res_offsets.reserve(src_offsets.size()); - res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random. + res_offsets.reserve(input_rows_count); + res_strings_offsets.reserve(input_rows_count * 5); /// Constant 5 - at random. res_strings_chars.reserve(src_chars.size()); Pos token_begin = nullptr; Pos token_end = nullptr; - size_t size = src_offsets.size(); ColumnString::Offset current_src_offset = 0; ColumnArray::Offset current_dst_offset = 0; ColumnString::Offset current_dst_strings_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Pos pos = reinterpret_cast(&src_chars[current_src_offset]); current_src_offset = src_offsets[i]; diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index f77d2f1f350..b3861f0394d 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -632,7 +632,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; @@ -646,11 +646,10 @@ public: const ColumnString::Chars & in_vec = col->getChars(); const ColumnString::Offsets & in_offsets = col->getOffsets(); - size_t size = in_offsets.size(); - out_offsets.resize(size); + out_offsets.resize(input_rows_count); size_t max_out_len = 0; - for (size_t i = 0; i < in_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; @@ -662,7 +661,7 @@ public: char * pos = begin; size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t new_offset = in_offsets[i]; @@ -691,15 +690,14 @@ public: const ColumnString::Chars & in_vec = col_fix_string->getChars(); const size_t n = col_fix_string->getN(); - size_t size = col_fix_string->size(); - out_offsets.resize(size); - out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); + out_offsets.resize(input_rows_count); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * input_rows_count); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t new_offset = prev_offset + n; diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index beaaccad6db..0a14516268a 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -60,17 +60,17 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) + if (!((res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -98,7 +98,7 @@ private: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & columns) const + ColumnPtr executeType(const ColumnsWithTypeAndName & columns, size_t input_rows_count) const { if (const ColumnVector * col_from = checkAndGetColumn>(columns[0].column.get())) { @@ -107,13 +107,12 @@ private: const typename ColumnVector::Container & vec_from = col_from->getData(); ColumnString::Chars & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); + data_to.resize(input_rows_count * 2); + offsets_to.resize(input_rows_count); WriteBufferFromVector buf_to(data_to); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { writeBitmask(vec_from[i], buf_to); writeChar(0, buf_to); @@ -244,7 +243,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } template - ColumnPtr executeType(const IColumn * column) const + ColumnPtr executeType(const IColumn * column, size_t input_rows_count) const { const ColumnVector * col_from = checkAndGetColumn>(column); if (!col_from) @@ -257,13 +256,12 @@ public: auto & result_array_offsets_data = result_array_offsets->getData(); auto & vec_from = col_from->getData(); - size_t size = vec_from.size(); - result_array_offsets_data.resize(size); - result_array_values_data.reserve(size * 2); + result_array_offsets_data.resize(input_rows_count); + result_array_values_data.reserve(input_rows_count * 2); using UnsignedType = make_unsigned_t; - for (size_t row = 0; row < size; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { UnsignedType x = static_cast(vec_from[row]); @@ -302,24 +300,24 @@ public: return result_column; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * in_column = arguments[0].column.get(); ColumnPtr result_column; - if (!((result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)))) + if (!((result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)) + || (result_column = executeType(in_column, input_rows_count)))) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 92ec71a3118..12b2b1a662a 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -155,7 +155,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); const auto * array_type = typeid_cast(from_type); @@ -165,21 +165,21 @@ public: WhichDataType which(nested_type); if (which.isUInt8()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isUInt16()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isUInt32()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isUInt64()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt8()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt16()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt32()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else if (which.isInt64()) - return executeBitmapData(argument_types, arguments); + return executeBitmapData(argument_types, arguments, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected type {} of argument of function {}", from_type->getName(), getName()); @@ -187,7 +187,7 @@ public: private: template - ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { // input data const ColumnArray * array = typeid_cast(arguments[0].column.get()); @@ -203,10 +203,10 @@ private: AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get( AggregateFunctionGroupBitmapData::name(), action, argument_types, params_row, properties); auto col_to = ColumnAggregateFunction::create(bitmap_function); - col_to->reserve(offsets.size()); + col_to->reserve(input_rows_count); size_t pos = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { col_to->insertDefault(); AggregateFunctionGroupBitmapData & bitmap_data diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 0a332ab70a9..f01ede64cc0 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -23,7 +23,7 @@ namespace constexpr size_t max_string_size = 1UL << 15; template - ALWAYS_INLINE inline Float64 naiveBayes( + Float64 naiveBayes( const FrequencyHolder::EncodingMap & standard, const ModelMap & model, Float64 max_result) @@ -51,7 +51,7 @@ namespace /// Count how many times each bigram occurs in the text. template - ALWAYS_INLINE inline void calculateStats( + void calculateStats( const UInt8 * data, const size_t size, ModelMap & model) @@ -77,24 +77,25 @@ struct CharsetClassificationImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { const auto & encodings_freq = FrequencyHolder::getInstance().getEncodingsFrequency(); if constexpr (detect_language) /// 2 chars for ISO code + 1 zero byte - res_data.reserve(offsets.size() * 3); + res_data.reserve(input_rows_count * 3); else /// Mean charset length is 8 - res_data.reserve(offsets.size() * 8); + res_data.reserve(input_rows_count * 8); - res_offsets.resize(offsets.size()); + res_offsets.resize(input_rows_count); size_t current_result_offset = 0; double zero_frequency_log = log(zero_frequency); - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * str = data.data() + offsets[i - 1]; const size_t str_len = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp index 0a97d029f84..0416df8f462 100644 --- a/src/Functions/FunctionsCodingIP.cpp +++ b/src/Functions/FunctionsCodingIP.cpp @@ -341,7 +341,7 @@ class FunctionIPv4NumToString : public IFunction { private: template - ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const + ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { using ColumnType = ColumnVector; @@ -356,12 +356,12 @@ private: ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 - offsets_res.resize(vec_in.size()); + vec_res.resize(input_rows_count * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 + offsets_res.resize(input_rows_count); char * begin = reinterpret_cast(vec_res.data()); char * pos = begin; - for (size_t i = 0; i < vec_in.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { DB::formatIPv4(reinterpret_cast(&vec_in[i]), sizeof(ArgType), pos, mask_tail_octets, "xxx"); offsets_res[i] = pos - begin; @@ -532,7 +532,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -542,11 +542,11 @@ public: auto col_res = ColumnIPv6::create(); auto & vec_res = col_res->getData(); - vec_res.resize(col_in->size()); + vec_res.resize(input_rows_count); const auto & vec_in = col_in->getData(); - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) mapIPv4ToIPv6(vec_in[i], reinterpret_cast(&vec_res[i].toUnderType())); return col_res; @@ -557,7 +557,7 @@ public: auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); auto & vec_res = col_res->getChars(); - vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH); + vec_res.resize(input_rows_count * IPV6_BINARY_LENGTH); const auto & vec_in = col_in->getData(); @@ -742,7 +742,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; @@ -751,13 +751,13 @@ public: auto col_res = ColumnUInt64::create(); ColumnUInt64::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); + vec_res.resize(input_rows_count); const ColumnString::Chars & vec_src = col->getChars(); const ColumnString::Offsets & offsets_src = col->getOffsets(); size_t prev_offset = 0; - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t current_offset = offsets_src[i]; size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte @@ -1054,7 +1054,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnString * input_column = checkAndGetColumn(arguments[0].column.get()); @@ -1067,14 +1067,14 @@ public: auto col_res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(input_column->size()); + vec_res.resize(input_rows_count); const ColumnString::Chars & vec_src = input_column->getChars(); const ColumnString::Offsets & offsets_src = input_column->getOffsets(); size_t prev_offset = 0; UInt32 result = 0; - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { vec_res[i] = DB::parseIPv4whole(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&result)); prev_offset = offsets_src[i]; @@ -1110,7 +1110,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnString * input_column = checkAndGetColumn(arguments[0].column.get()); if (!input_column) @@ -1122,14 +1122,14 @@ public: auto col_res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(input_column->size()); + vec_res.resize(input_rows_count); const ColumnString::Chars & vec_src = input_column->getChars(); const ColumnString::Offsets & offsets_src = input_column->getOffsets(); size_t prev_offset = 0; char buffer[IPV6_BINARY_LENGTH]; - for (size_t i = 0; i < vec_res.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { vec_res[i] = DB::parseIPv6whole(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&vec_src[offsets_src[i] - 1]), diff --git a/src/Functions/FunctionsCodingUUID.cpp b/src/Functions/FunctionsCodingUUID.cpp index 83fdcbc4af9..179ba1bf97a 100644 --- a/src/Functions/FunctionsCodingUUID.cpp +++ b/src/Functions/FunctionsCodingUUID.cpp @@ -177,7 +177,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -189,21 +189,20 @@ public: "Illegal type {} of column {} argument of function {}, expected FixedString({})", col_type_name.type->getName(), col_in->getName(), getName(), uuid_bytes_length); - const auto size = col_in->size(); const auto & vec_in = col_in->getChars(); auto col_res = ColumnString::create(); ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (uuid_text_length + 1)); - offsets_res.resize(size); + vec_res.resize(input_rows_count * (uuid_text_length + 1)); + offsets_res.resize(input_rows_count); size_t src_offset = 0; size_t dst_offset = 0; const UUIDSerializer uuid_serializer(variant); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { uuid_serializer.deserialize(&vec_in[src_offset], &vec_res[dst_offset]); src_offset += uuid_bytes_length; @@ -256,7 +255,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -266,17 +265,16 @@ public: { const auto & vec_in = col_in->getChars(); const auto & offsets_in = col_in->getOffsets(); - const size_t size = offsets_in.size(); auto col_res = ColumnFixedString::create(uuid_bytes_length); ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); + vec_res.resize(input_rows_count * uuid_bytes_length); size_t src_offset = 0; size_t dst_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { /// If string has incorrect length - then return zero UUID. /// If string has correct length but contains something not like UUID - implementation specific behaviour. @@ -300,18 +298,17 @@ public: "Illegal type {} of column {} argument of function {}, expected FixedString({})", col_type_name.type->getName(), col_in_fixed->getName(), getName(), uuid_text_length); - const auto size = col_in_fixed->size(); const auto & vec_in = col_in_fixed->getChars(); auto col_res = ColumnFixedString::create(uuid_bytes_length); ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); + vec_res.resize(input_rows_count * uuid_bytes_length); size_t src_offset = 0; size_t dst_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { uuid_serializer.serialize(&vec_in[src_offset], &vec_res[dst_offset]); src_offset += uuid_text_length; @@ -359,7 +356,7 @@ public: return std::make_shared(uuid_bytes_length); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -370,16 +367,15 @@ public: { const auto & vec_in = col_in->getData(); const UUID * uuids = vec_in.data(); - const size_t size = vec_in.size(); auto col_res = ColumnFixedString::create(uuid_bytes_length); ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); + vec_res.resize(input_rows_count * uuid_bytes_length); size_t dst_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]); uint64_t loBytes = DB::UUIDHelpers::getLowBytes(uuids[i]); @@ -448,7 +444,7 @@ public: return std::make_shared(datetime_scale, timezone); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & col_type_name = arguments[0]; const ColumnPtr & column = col_type_name.column; @@ -457,12 +453,11 @@ public: { const auto & vec_in = col_in->getData(); const UUID * uuids = vec_in.data(); - const size_t size = vec_in.size(); - auto col_res = ColumnDateTime64::create(size, datetime_scale); + auto col_res = ColumnDateTime64::create(input_rows_count, datetime_scale); auto & vec_res = col_res->getData(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]); const uint64_t ms = ((hiBytes & 0xf000) == 0x7000) ? (hiBytes >> 16) : 0; diff --git a/src/Functions/FunctionsDecimalArithmetics.h b/src/Functions/FunctionsDecimalArithmetics.h index e26ad7362b3..9b9045f7c69 100644 --- a/src/Functions/FunctionsDecimalArithmetics.h +++ b/src/Functions/FunctionsDecimalArithmetics.h @@ -151,36 +151,36 @@ struct Processor template void NO_INLINE vectorConstant(const FirstArgVectorType & vec_first, const SecondArgType second_value, - PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const + PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale, + size_t input_rows_count) const { - size_t size = vec_first.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_first[i], second_value, scale_a, scale_b, result_scale); } template void NO_INLINE vectorVector(const FirstArgVectorType & vec_first, const SecondArgVectorType & vec_second, - PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const + PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale, + size_t input_rows_count) const { - size_t size = vec_first.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(vec_first[i], vec_second[i], scale_a, scale_b, result_scale); } template void NO_INLINE constantVector(const FirstArgType & first_value, const SecondArgVectorType & vec_second, - PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const + PaddedPODArray & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale, + size_t input_rows_count) const { - size_t size = vec_second.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = transform.execute(first_value, vec_second[i], scale_a, scale_b, result_scale); } }; @@ -189,7 +189,7 @@ struct Processor template struct DecimalArithmeticsImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) { using FirstArgValueType = typename FirstArgType::FieldType; using FirstArgColumnType = typename FirstArgType::ColumnType; @@ -214,13 +214,13 @@ struct DecimalArithmeticsImpl if (first_col) { if (second_col_const) - op.vectorConstant(first_col->getData(), second_col_const->template getValue(), col_to->getData(), scale_a, scale_b, result_scale); + op.vectorConstant(first_col->getData(), second_col_const->template getValue(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count); else - op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale); + op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count); } else if (first_col_const) { - op.constantVector(first_col_const->template getValue(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale); + op.constantVector(first_col_const->template getValue(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count); } else { @@ -293,14 +293,14 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { - return resolveOverload(arguments, result_type); + return resolveOverload(arguments, result_type, input_rows_count); } private: // long resolver to call proper templated func - ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const { WhichDataType which_dividend(arguments[0].type.get()); WhichDataType which_divisor(arguments[1].type.get()); @@ -309,26 +309,26 @@ private: { using DividendType = DataTypeDecimal32; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } else if (which_dividend.isDecimal64()) { using DividendType = DataTypeDecimal64; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } @@ -336,13 +336,13 @@ private: { using DividendType = DataTypeDecimal128; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } @@ -350,13 +350,13 @@ private: { using DividendType = DataTypeDecimal256; if (which_divisor.isDecimal32()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal64()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal128()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); else if (which_divisor.isDecimal256()) - return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type); + return DecimalArithmeticsImpl::execute(Transform{}, arguments, result_type, input_rows_count); } diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index 2f270bf999a..f27934ce5a9 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -181,7 +181,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -205,10 +205,9 @@ public: const typename ColumnVector::Container & vec_from = col_from->getData(); typename ColumnVector::Container & vec_to = col_to->getData(); - size_t size = vec_from.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(vec_from[i], dict); return col_to; @@ -273,7 +272,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -303,10 +302,9 @@ public: const typename ColumnVector::Container & vec_from1 = col_vec1->getData(); const typename ColumnVector::Container & vec_from2 = col_vec2->getData(); typename ColumnUInt8::Container & vec_to = col_to->getData(); - size_t size = vec_from1.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(vec_from1[i], vec_from2[i], dict); return col_to; @@ -318,10 +316,9 @@ public: const typename ColumnVector::Container & vec_from1 = col_vec1->getData(); const T const_from2 = col_const2->template getValue(); typename ColumnUInt8::Container & vec_to = col_to->getData(); - size_t size = vec_from1.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(vec_from1[i], const_from2, dict); return col_to; @@ -333,10 +330,9 @@ public: const T const_from1 = col_const1->template getValue(); const typename ColumnVector::Container & vec_from2 = col_vec2->getData(); typename ColumnUInt8::Container & vec_to = col_to->getData(); - size_t size = vec_from2.size(); - vec_to.resize(size); + vec_to.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Transform::apply(const_from1, vec_from2[i], dict); return col_to; @@ -405,7 +401,7 @@ public: bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { /// The dictionary key that defines the "point of view". std::string dict_key; @@ -432,11 +428,10 @@ public: auto & res_values = col_values->getData(); const typename ColumnVector::Container & vec_from = col_from->getData(); - size_t size = vec_from.size(); - res_offsets.resize(size); - res_values.reserve(size * 4); + res_offsets.resize(input_rows_count); + res_values.reserve(input_rows_count * 4); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { T cur = vec_from[i]; for (size_t depth = 0; cur && depth < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++depth) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 95c54ac9528..91e49bf5035 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -715,7 +715,7 @@ private: using ToType = typename Impl::ReturnType; template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { using ColVecType = ColumnVectorOrDecimal; @@ -726,9 +726,8 @@ private: const typename ColVecType::Container & vec_from = col_from->getData(); typename ColumnVector::Container & vec_to = col_to->getData(); - size_t size = vec_from.size(); - vec_to.resize(size); - for (size_t i = 0; i < size; ++i) + vec_to.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) vec_to[i] = Impl::apply(vec_from[i]); return col_to; @@ -759,39 +758,39 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); if (which.isUInt8()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isUInt16()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isUInt32()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isUInt64()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isInt8()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isInt16()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isInt32()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isInt64()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isDate()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isDate32()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isDateTime()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isDecimal32()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isDecimal64()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else if (which.isIPv4()) - return executeType(arguments); + return executeType(arguments, input_rows_count); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0].type->getName(), getName()); @@ -843,7 +842,7 @@ private: using ToType = typename Impl::ReturnType; template - void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const { using ColVecType = ColumnVectorOrDecimal; KeyType key{}; @@ -853,8 +852,7 @@ private: if (const ColVecType * col_from = checkAndGetColumn(column)) { const typename ColVecType::Container & vec_from = col_from->getData(); - const size_t size = vec_from.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ToType hash; @@ -894,7 +892,7 @@ private: if (!key_cols.is_const) { ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeIntType(key_cols, full_column.get(), vec_to); + return executeIntType(key_cols, full_column.get(), vec_to, input_rows_count); } } auto value = col_from_const->template getValue(); @@ -938,7 +936,7 @@ private: } template - void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const { using ColVecType = ColumnVectorOrDecimal; KeyType key{}; @@ -958,8 +956,7 @@ private: if (const ColVecType * col_from = checkAndGetColumn(column)) { const typename ColVecType::Container & vec_from = col_from->getData(); - size_t size = vec_from.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ToType hash; if constexpr (Keyed) @@ -987,7 +984,7 @@ private: if (!key_cols.is_const) { ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeBigIntType(key_cols, full_column.get(), vec_to); + return executeBigIntType(key_cols, full_column.get(), vec_to, input_rows_count); } } auto value = col_from_const->template getValue(); @@ -1014,12 +1011,12 @@ private: } template - void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const { KeyType key{}; if constexpr (Keyed) key = Impl::getKey(key_cols, 0); - for (size_t i = 0, size = column->size(); i < size; ++i) + for (size_t i = 0, size = input_rows_count; i < size; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1034,7 +1031,7 @@ private: } template - void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const { KeyType key{}; if constexpr (Keyed) @@ -1043,10 +1040,9 @@ private: { const typename ColumnString::Chars & data = col_from->getChars(); const typename ColumnString::Offsets & offsets = col_from->getOffsets(); - size_t size = offsets.size(); ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1067,9 +1063,8 @@ private: { const typename ColumnString::Chars & data = col_from_fixed->getChars(); size_t n = col_from_fixed->getN(); - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1088,7 +1083,7 @@ private: if (!key_cols.is_const) { ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeString(key_cols, full_column.get(), vec_to); + return executeString(key_cols, full_column.get(), vec_to, input_rows_count); } } String value = col_from_const->getValue(); @@ -1114,7 +1109,7 @@ private: } template - void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const { const IDataType * nested_type = typeid_cast(*type).getNestedType().get(); @@ -1131,18 +1126,16 @@ private: { KeyColumnsType key_cols_tmp{key_cols}; key_cols_tmp.offsets = &offsets; - executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first); + executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first, input_rows_count); } else - executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); - - const size_t size = offsets.size(); + executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first, input_rows_count); ColumnArray::Offset current_offset = 0; KeyType key{}; if constexpr (Keyed) key = Impl::getKey(key_cols, 0); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1170,7 +1163,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key_cols, type, full_column.get(), vec_to); + executeArray(key_cols, type, full_column.get(), vec_to, input_rows_count); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1178,7 +1171,7 @@ private: } template - void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const + void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to, size_t input_rows_count) const { WhichDataType which(from_type); @@ -1190,43 +1183,43 @@ private: if (key_cols.size() != vec_to.size() && key_cols.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName()); - if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to); + if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result - else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isString()) executeString(key_cols, icolumn, vec_to); - else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to); - else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to); - else executeGeneric(key_cols, icolumn, vec_to); + else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isString()) executeString(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to, input_rows_count); + else executeGeneric(key_cols, icolumn, vec_to, input_rows_count); } /// Return a fixed random-looking magic number when input is empty. static constexpr auto filler = 0xe28dbde7fe22e41c; - void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const + void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first, size_t input_rows_count) const { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -1240,7 +1233,7 @@ private: hash = static_cast(filler); for (size_t i = 0; i < tuple_size; ++i) - executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); + executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first, input_rows_count); } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { @@ -1255,24 +1248,24 @@ private: for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); - executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first); + executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first, input_rows_count); } } else if (const auto * map = checkAndGetColumn(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first, input_rows_count); } else if (const auto * const_map = checkAndGetColumnConst(column)) { - executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); + executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first, input_rows_count); } else { if (is_first) - executeAny(key_cols, type, column, vec_to); + executeAny(key_cols, type, column, vec_to, input_rows_count); else - executeAny(key_cols, type, column, vec_to); + executeAny(key_cols, type, column, vec_to, input_rows_count); } is_first = false; @@ -1325,7 +1318,7 @@ public: for (size_t i = first_data_argument; i < arguments.size(); ++i) { const auto & col = arguments[i]; - executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); + executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument, input_rows_count); } } diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 848856c500f..c35df8ba72d 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -125,7 +125,7 @@ public: } String error; - for (const auto i : collections::range(0, input_rows_count)) + for (size_t i = 0; i < input_rows_count; ++i) { if (!col_json_const) { @@ -314,7 +314,7 @@ private: static size_t calculateMaxSize(const ColumnString::Offsets & offsets) { size_t max_size = 0; - for (const auto i : collections::range(0, offsets.size())) + for (size_t i = 0; i < offsets.size(); ++i) { size_t size = offsets[i] - offsets[i - 1]; max_size = std::max(max_size, size); diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 55485d41ce0..410eea0f437 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED; struct FunctionDetectLanguageImpl { - static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string) + static std::string_view codeISO(std::string_view code_string) { if (code_string.ends_with("-Latn")) code_string.remove_suffix(code_string.size() - 5); @@ -63,16 +63,17 @@ struct FunctionDetectLanguageImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { /// Constant 3 is based on the fact that in general we need 2 characters for ISO code + 1 zero byte - res_data.reserve(offsets.size() * 3); - res_offsets.resize(offsets.size()); + res_data.reserve(input_rows_count * 3); + res_offsets.resize(input_rows_count); bool is_reliable; size_t res_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * str = data.data() + offsets[i - 1]; const size_t str_len = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index e1814150da6..65d7473b945 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -48,7 +48,7 @@ using UInt8Container = ColumnUInt8::Container; using UInt8ColumnPtrs = std::vector; -MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, const bool make_nullable) +MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, bool make_nullable) { const size_t rows_count = ternary_data.size(); diff --git a/src/Functions/FunctionsMultiStringFuzzySearch.h b/src/Functions/FunctionsMultiStringFuzzySearch.h index a92a6570279..8346380c35d 100644 --- a/src/Functions/FunctionsMultiStringFuzzySearch.h +++ b/src/Functions/FunctionsMultiStringFuzzySearch.h @@ -71,7 +71,7 @@ public: return Impl::getReturnType(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & haystack_ptr = arguments[0].column; const ColumnPtr & edit_distance_ptr = arguments[1].column; @@ -114,14 +114,16 @@ public: col_needles_const->getValue(), vec_res, offsets_res, edit_distance, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, + input_rows_count); else Impl::vectorVector( col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needles_vector->getData(), col_needles_vector->getOffsets(), vec_res, offsets_res, edit_distance, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, + input_rows_count); // the combination of const haystack + const needle is not implemented because // useDefaultImplementationForConstants() == true makes upper layers convert both to diff --git a/src/Functions/FunctionsMultiStringSearch.h b/src/Functions/FunctionsMultiStringSearch.h index 03db2651fd0..6bcc8581a38 100644 --- a/src/Functions/FunctionsMultiStringSearch.h +++ b/src/Functions/FunctionsMultiStringSearch.h @@ -81,7 +81,7 @@ public: return Impl::getReturnType(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & haystack_ptr = arguments[0].column; const ColumnPtr & needles_ptr = arguments[1].column; @@ -110,13 +110,15 @@ public: col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needles_const->getValue(), vec_res, offsets_res, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, + input_rows_count); else Impl::vectorVector( col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needles_vector->getData(), col_needles_vector->getOffsets(), vec_res, offsets_res, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, + input_rows_count); // the combination of const haystack + const needle is not implemented because // useDefaultImplementationForConstants() == true makes upper layers convert both to diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index c01e47ad0d7..bbedef024d5 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -40,17 +40,18 @@ struct FunctionDetectProgrammingLanguageImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { const auto & programming_freq = FrequencyHolder::getInstance().getProgrammingFrequency(); /// Constant 5 is arbitrary - res_data.reserve(offsets.size() * 5); - res_offsets.resize(offsets.size()); + res_data.reserve(input_rows_count * 5); + res_offsets.resize(input_rows_count); size_t res_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * str = data.data() + offsets[i - 1]; const size_t str_len = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 36448c6f689..83075ca01cb 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -80,8 +80,7 @@ public: auto col_to = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_to->getData(); - size_t size = input_rows_count; - vec_to.resize(size); + vec_to.resize(input_rows_count); RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(ToType)); return col_to; diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp index 48f4aaf4e09..5aae92a8141 100644 --- a/src/Functions/FunctionsStringDistance.cpp +++ b/src/Functions/FunctionsStringDistance.cpp @@ -37,12 +37,12 @@ struct FunctionStringDistanceImpl const ColumnString::Offsets & haystack_offsets, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { - size_t size = res.size(); const char * haystack = reinterpret_cast(haystack_data.data()); const char * needle = reinterpret_cast(needle_data.data()); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = Op::process( haystack + haystack_offsets[i - 1], @@ -56,13 +56,13 @@ struct FunctionStringDistanceImpl const String & haystack, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { const char * haystack_data = haystack.data(); size_t haystack_size = haystack.size(); const char * needle = reinterpret_cast(needle_data.data()); - size_t size = res.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = Op::process(haystack_data, haystack_size, needle + needle_offsets[i - 1], needle_offsets[i] - needle_offsets[i - 1] - 1); @@ -73,9 +73,10 @@ struct FunctionStringDistanceImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const String & needle, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { - constantVector(needle, data, offsets, res); + constantVector(needle, data, offsets, res, input_rows_count); } }; diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index 0bf6e39e651..bd7d45d781a 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -315,9 +315,9 @@ struct SimHashImpl return getSimHash(finger_vec); } - static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray & res) + static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray & res, size_t input_rows_count) { - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * one_data = &data[offsets[i - 1]]; const size_t data_size = offsets[i] - offsets[i - 1] - 1; @@ -543,12 +543,13 @@ struct MinHashImpl PaddedPODArray * res1, PaddedPODArray * res2, ColumnTuple * res1_strings, - ColumnTuple * res2_strings) + ColumnTuple * res2_strings, + size_t input_rows_count) { MinHeap min_heap; MaxHeap max_heap; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * one_data = &data[offsets[i - 1]]; const size_t data_size = offsets[i] - offsets[i - 1] - 1; diff --git a/src/Functions/FunctionsStringHash.h b/src/Functions/FunctionsStringHash.h index fcd4c970a47..f790c660e21 100644 --- a/src/Functions/FunctionsStringHash.h +++ b/src/Functions/FunctionsStringHash.h @@ -135,7 +135,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; @@ -152,9 +152,9 @@ public: { auto col_res = ColumnVector::create(); auto & vec_res = col_res->getData(); - vec_res.resize(column->size()); + vec_res.resize(input_rows_count); const ColumnString & col_str_vector = checkAndGetColumn(*column); - Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res); + Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res, input_rows_count); return col_res; } else if constexpr (is_arg) // Min hash arg @@ -171,7 +171,7 @@ public: auto max_tuple = ColumnTuple::create(std::move(max_columns)); const ColumnString & col_str_vector = checkAndGetColumn(*column); - Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get()); + Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get(), input_rows_count); MutableColumns tuple_columns; tuple_columns.emplace_back(std::move(min_tuple)); @@ -184,10 +184,10 @@ public: auto col_h2 = ColumnVector::create(); auto & vec_h1 = col_h1->getData(); auto & vec_h2 = col_h2->getData(); - vec_h1.resize(column->size()); - vec_h2.resize(column->size()); + vec_h1.resize(input_rows_count); + vec_h2.resize(input_rows_count); const ColumnString & col_str_vector = checkAndGetColumn(*column); - Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr); + Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr, input_rows_count); MutableColumns tuple_columns; tuple_columns.emplace_back(std::move(col_h1)); tuple_columns.emplace_back(std::move(col_h2)); diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp index 01e989a7f2c..9474fe2629e 100644 --- a/src/Functions/FunctionsStringHashFixedString.cpp +++ b/src/Functions/FunctionsStringHashFixedString.cpp @@ -224,7 +224,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) { @@ -233,11 +233,10 @@ public: const typename ColumnString::Chars & data = col_from->getChars(); const typename ColumnString::Offsets & offsets = col_from->getOffsets(); auto & chars_to = col_to->getChars(); - const auto size = offsets.size(); - chars_to.resize(size * Impl::length); + chars_to.resize(input_rows_count * Impl::length); ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Impl::apply( reinterpret_cast(&data[current_offset]), @@ -253,11 +252,10 @@ public: { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnFixedString::Chars & data = col_from_fix->getChars(); - const auto size = col_from_fix->size(); auto & chars_to = col_to->getChars(); const auto length = col_from_fix->getN(); - chars_to.resize(size * Impl::length); - for (size_t i = 0; i < size; ++i) + chars_to.resize(input_rows_count * Impl::length); + for (size_t i = 0; i < input_rows_count; ++i) { Impl::apply( reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); @@ -268,11 +266,10 @@ public: { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnIPv6::Container & data = col_from_ip->getData(); - const auto size = col_from_ip->size(); auto & chars_to = col_to->getChars(); const auto length = sizeof(IPv6::UnderlyingType); - chars_to.resize(size * Impl::length); - for (size_t i = 0; i < size; ++i) + chars_to.resize(input_rows_count * Impl::length); + for (size_t i = 0; i < input_rows_count; ++i) { Impl::apply( reinterpret_cast(&data[i]), length, reinterpret_cast(&chars_to[i * Impl::length])); diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index fba6336ebff..7ec0076e395 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -163,7 +163,7 @@ public: return return_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const ColumnPtr & column_haystack = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[0].column : arguments[1].column; const ColumnPtr & column_needle = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[1].column : arguments[0].column; @@ -236,7 +236,8 @@ public: col_needle_vector->getOffsets(), column_start_pos, vec_res, - null_map.get()); + null_map.get(), + input_rows_count); else if (col_haystack_vector && col_needle_const) Impl::vectorConstant( col_haystack_vector->getChars(), @@ -244,7 +245,8 @@ public: col_needle_const->getValue(), column_start_pos, vec_res, - null_map.get()); + null_map.get(), + input_rows_count); else if (col_haystack_vector_fixed && col_needle_vector) Impl::vectorFixedVector( col_haystack_vector_fixed->getChars(), @@ -253,14 +255,16 @@ public: col_needle_vector->getOffsets(), column_start_pos, vec_res, - null_map.get()); + null_map.get(), + input_rows_count); else if (col_haystack_vector_fixed && col_needle_const) Impl::vectorFixedConstant( col_haystack_vector_fixed->getChars(), col_haystack_vector_fixed->getN(), col_needle_const->getValue(), vec_res, - null_map.get()); + null_map.get(), + input_rows_count); else if (col_haystack_const && col_needle_vector) Impl::constantVector( col_haystack_const->getValue(), @@ -268,7 +272,8 @@ public: col_needle_vector->getOffsets(), column_start_pos, vec_res, - null_map.get()); + null_map.get(), + input_rows_count); else throw Exception( ErrorCodes::ILLEGAL_COLUMN, diff --git a/src/Functions/FunctionsStringSearchToString.h b/src/Functions/FunctionsStringSearchToString.h index 978a84de472..c889cf062a3 100644 --- a/src/Functions/FunctionsStringSearchToString.h +++ b/src/Functions/FunctionsStringSearchToString.h @@ -60,7 +60,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column = arguments[0].column; const ColumnPtr column_needle = arguments[1].column; @@ -75,7 +75,7 @@ public: ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), vec_res, offsets_res); + Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), vec_res, offsets_res, input_rows_count); return col_res; } diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 7b3f2337c89..5e26e4ad482 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -90,7 +90,7 @@ struct NgramDistanceImpl ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...); } - static ALWAYS_INLINE size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end) + static size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end) { /// Offset before which we copy some data. constexpr size_t padding_offset = default_padding - N + 1; @@ -120,7 +120,7 @@ struct NgramDistanceImpl return default_padding; } - static ALWAYS_INLINE size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end) + static size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end) { /// The same copying as described in the function above. memcpy(code_points, code_points + default_padding - N + 1, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(CodePoint)); @@ -195,7 +195,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline size_t calculateNeedleStats( + static inline size_t calculateNeedleStats( const char * data, const size_t size, NgramCount * ngram_stats, @@ -228,7 +228,7 @@ struct NgramDistanceImpl } template - static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric( + static inline UInt64 calculateHaystackStatsAndMetric( const char * data, const size_t size, NgramCount * ngram_stats, @@ -318,9 +318,9 @@ struct NgramDistanceImpl const ColumnString::Offsets & haystack_offsets, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { - const size_t haystack_offsets_size = haystack_offsets.size(); size_t prev_haystack_offset = 0; size_t prev_needle_offset = 0; @@ -331,7 +331,7 @@ struct NgramDistanceImpl std::unique_ptr needle_ngram_storage(new UInt16[max_string_size]); std::unique_ptr haystack_ngram_storage(new UInt16[max_string_size]); - for (size_t i = 0; i < haystack_offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * haystack = reinterpret_cast(&haystack_data[prev_haystack_offset]); const size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; @@ -391,12 +391,13 @@ struct NgramDistanceImpl std::string haystack, const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { /// For symmetric version it is better to use vector_constant if constexpr (symmetric) { - vectorConstant(needle_data, needle_offsets, std::move(haystack), res); + vectorConstant(needle_data, needle_offsets, std::move(haystack), res, input_rows_count); } else { @@ -404,7 +405,6 @@ struct NgramDistanceImpl haystack.resize(haystack_size + default_padding); /// For logic explanation see vector_vector function. - const size_t needle_offsets_size = needle_offsets.size(); size_t prev_offset = 0; std::unique_ptr common_stats{new NgramCount[map_size]{}}; @@ -412,7 +412,7 @@ struct NgramDistanceImpl std::unique_ptr needle_ngram_storage(new UInt16[max_string_size]); std::unique_ptr haystack_ngram_storage(new UInt16[max_string_size]); - for (size_t i = 0; i < needle_offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * needle = reinterpret_cast(&needle_data[prev_offset]); const size_t needle_size = needle_offsets[i] - prev_offset - 1; @@ -456,7 +456,8 @@ struct NgramDistanceImpl const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string needle, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { /// zeroing our map std::unique_ptr common_stats{new NgramCount[map_size]{}}; @@ -472,7 +473,7 @@ struct NgramDistanceImpl size_t distance = needle_stats_size; size_t prev_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const UInt8 * haystack = &data[prev_offset]; const size_t haystack_size = offsets[i] - prev_offset - 1; diff --git a/src/Functions/FunctionsStringSimilarity.h b/src/Functions/FunctionsStringSimilarity.h index e148730054d..c2cf7137286 100644 --- a/src/Functions/FunctionsStringSimilarity.h +++ b/src/Functions/FunctionsStringSimilarity.h @@ -57,7 +57,7 @@ public: return std::make_shared>(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { using ResultType = typename Impl::ResultType; @@ -90,7 +90,7 @@ public: auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(column_haystack->size()); + vec_res.resize(input_rows_count); const ColumnString * col_haystack_vector = checkAndGetColumn(&*column_haystack); const ColumnString * col_needle_vector = checkAndGetColumn(&*column_needle); @@ -110,7 +110,7 @@ public: Impl::max_string_size); } } - Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res); + Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res, input_rows_count); } else if (col_haystack_vector && col_needle_vector) { @@ -119,7 +119,8 @@ public: col_haystack_vector->getOffsets(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), - vec_res); + vec_res, + input_rows_count); } else if (col_haystack_const && col_needle_vector) { @@ -136,7 +137,7 @@ public: Impl::max_string_size); } } - Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res); + Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res, input_rows_count); } else { diff --git a/src/Functions/FunctionsTextClassification.h b/src/Functions/FunctionsTextClassification.h index 90e8af06ccc..d5cba690f81 100644 --- a/src/Functions/FunctionsTextClassification.h +++ b/src/Functions/FunctionsTextClassification.h @@ -55,7 +55,7 @@ public: return arguments[0]; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; const ColumnString * col = checkAndGetColumn(column.get()); @@ -65,7 +65,7 @@ public: arguments[0].column->getName(), getName()); auto col_res = ColumnString::create(); - Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } }; @@ -104,7 +104,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnPtr & column = arguments[0].column; const ColumnString * col = checkAndGetColumn(column.get()); @@ -115,9 +115,9 @@ public: auto col_res = ColumnVector::create(); ColumnVector::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); + vec_res.resize(input_rows_count); - Impl::vector(col->getChars(), col->getOffsets(), vec_res); + Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count); return col_res; } }; diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index f93a885ee65..77d740803be 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -130,7 +130,7 @@ struct TimeWindowImpl static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name); - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name); + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count); }; template @@ -196,7 +196,7 @@ struct TimeWindowImpl return std::make_shared(DataTypes{data_type, data_type}); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; @@ -214,38 +214,37 @@ struct TimeWindowImpl { /// TODO: add proper support for fractional seconds case IntervalKind::Kind::Second: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Minute: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Hour: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Day: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Week: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Month: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Quarter: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); case IntervalKind::Kind::Year: - return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + return executeTumble(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count); default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } } template - static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone) + static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone, size_t input_rows_count) { const auto & time_data = time_column.getData(); - size_t size = time_column.size(); auto start = ColumnVector::create(); auto end = ColumnVector::create(); auto & start_data = start->getData(); auto & end_data = end->getData(); - start_data.resize(size); - end_data.resize(size); - for (size_t i = 0; i != size; ++i) + start_data.resize(input_rows_count); + end_data.resize(input_rows_count); + for (size_t i = 0; i != input_rows_count; ++i) { start_data[i] = ToStartOfTransform::execute(time_data[i], num_units, time_zone); end_data[i] = AddTime::execute(start_data[i], num_units, time_zone); @@ -283,7 +282,7 @@ struct TimeWindowImpl } } - [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -296,7 +295,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 0, function_name); } }; @@ -311,7 +310,7 @@ struct TimeWindowImpl return TimeWindowImpl::getReturnType(arguments, function_name); } - [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name) + [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -324,7 +323,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 1, function_name); } }; @@ -372,7 +371,7 @@ struct TimeWindowImpl return std::make_shared(DataTypes{data_type, data_type}); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto & hop_interval_column = arguments[1]; @@ -396,28 +395,28 @@ struct TimeWindowImpl /// TODO: add proper support for fractional seconds case IntervalKind::Kind::Second: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Minute: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Hour: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Day: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Week: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Month: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Quarter: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Year: return executeHop( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } @@ -425,18 +424,17 @@ struct TimeWindowImpl template static ColumnPtr - executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count) { const auto & time_data = time_column.getData(); - size_t size = time_column.size(); auto start = ColumnVector::create(); auto end = ColumnVector::create(); auto & start_data = start->getData(); auto & end_data = end->getData(); - start_data.resize(size); - end_data.resize(size); + start_data.resize(input_rows_count); + end_data.resize(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ToType wstart = ToStartOfTransform::execute(time_data[i], hop_num_units, time_zone); ToType wend = AddTime::execute(wstart, hop_num_units, time_zone); @@ -509,7 +507,7 @@ struct TimeWindowImpl return std::make_shared(); } - static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto & hop_interval_column = arguments[1]; @@ -533,28 +531,28 @@ struct TimeWindowImpl /// TODO: add proper support for fractional seconds case IntervalKind::Kind::Second: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Minute: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Hour: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Day: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Week: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Month: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Quarter: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); case IntervalKind::Kind::Year: return executeHopSlice( - *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count); default: throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet"); } @@ -563,17 +561,16 @@ struct TimeWindowImpl template static ColumnPtr - executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone) + executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count) { Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units); const auto & time_data = time_column.getData(); - size_t size = time_column.size(); auto end = ColumnVector::create(); auto & end_data = end->getData(); - end_data.resize(size); - for (size_t i = 0; i < size; ++i) + end_data.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) { ToType wstart = ToStartOfTransform::execute(time_data[i], hop_num_units, time_zone); ToType wend = AddTime::execute(wstart, hop_num_units, time_zone); @@ -593,23 +590,23 @@ struct TimeWindowImpl return end; } - static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { - ColumnPtr column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + ColumnPtr column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(column, 1, function_name); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { if (arguments.size() == 2) - return dispatchForTumbleColumns(arguments, function_name); + return dispatchForTumbleColumns(arguments, function_name, input_rows_count); else { const auto & third_column = arguments[2]; if (arguments.size() == 3 && WhichDataType(third_column.type).isString()) - return dispatchForTumbleColumns(arguments, function_name); + return dispatchForTumbleColumns(arguments, function_name, input_rows_count); else - return dispatchForHopColumns(arguments, function_name); + return dispatchForHopColumns(arguments, function_name, input_rows_count); } } }; @@ -639,7 +636,7 @@ struct TimeWindowImpl } } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -652,7 +649,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 0, function_name); } }; @@ -667,7 +664,7 @@ struct TimeWindowImpl return TimeWindowImpl::getReturnType(arguments, function_name); } - static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) + static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count) { const auto & time_column = arguments[0]; const auto which_type = WhichDataType(time_column.type); @@ -680,7 +677,7 @@ struct TimeWindowImpl result_column = time_column.column; } else - result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name, input_rows_count); return executeWindowBound(result_column, 1, function_name); } @@ -693,9 +690,9 @@ DataTypePtr FunctionTimeWindow::getReturnTypeImpl(const ColumnsWithTypeAnd } template -ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const +ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const { - return TimeWindowImpl::dispatchForColumns(arguments, name); + return TimeWindowImpl::dispatchForColumns(arguments, name, input_rows_count); } } diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index a9321819a26..7627c68c057 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -18,7 +18,7 @@ namespace DB */ struct FunctionDetectTonalityImpl { - static ALWAYS_INLINE inline Float32 detectTonality( + static Float32 detectTonality( const UInt8 * str, const size_t str_len, const FrequencyHolder::Map & emotional_dict) @@ -63,13 +63,13 @@ struct FunctionDetectTonalityImpl static void vector( const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - PaddedPODArray & res) + PaddedPODArray & res, + size_t input_rows_count) { const auto & emotional_dict = FrequencyHolder::getInstance().getEmotionalDict(); - size_t size = offsets.size(); size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = detectTonality(data.data() + prev_offset, offsets[i] - 1 - prev_offset, emotional_dict); prev_offset = offsets[i]; diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index 5e13fbbad5c..dbe291fcb93 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -93,7 +93,8 @@ struct ExtractParamImpl std::string needle, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t /*input_rows_count*/) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -168,11 +169,12 @@ struct ExtractParamToStringImpl { static void vector(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, std::string needle, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { /// Constant 5 is taken from a function that performs a similar task FunctionsStringSearch.h::ExtractImpl - res_data.reserve(haystack_data.size() / 5); - res_offsets.resize(haystack_offsets.size()); + res_data.reserve(input_rows_count / 5); + res_offsets.resize(input_rows_count); /// We are looking for a parameter simply as a substring of the form "name" needle = "\"" + needle + "\":"; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index a4ff49859cc..4943bf708c5 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -35,12 +35,13 @@ struct HasTokenImpl const std::string & pattern, const ColumnPtr & start_pos, PaddedPODArray & res, - ColumnUInt8 * res_null) + ColumnUInt8 * res_null, + size_t input_rows_count) { if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); - if (haystack_offsets.empty()) + if (input_rows_count == 0) return; const UInt8 * const begin = haystack_data.data(); diff --git a/src/Functions/Kusto/KqlArraySort.cpp b/src/Functions/Kusto/KqlArraySort.cpp index 11157aa53e6..fb3e6259ee4 100644 --- a/src/Functions/Kusto/KqlArraySort.cpp +++ b/src/Functions/Kusto/KqlArraySort.cpp @@ -73,13 +73,11 @@ public: size_t array_count = arguments.size(); const auto & last_arg = arguments[array_count - 1]; - size_t input_rows_count_local = input_rows_count; - bool null_last = true; if (!isArray(last_arg.type)) { --array_count; - null_last = check_condition(last_arg, context, input_rows_count_local); + null_last = check_condition(last_arg, context, input_rows_count); } ColumnsWithTypeAndName new_args; @@ -119,11 +117,11 @@ public: } auto zipped - = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local); + = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count); ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared(result_type), "zipped"}}); auto sorted_tuple - = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local); + = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count); auto null_type = std::make_shared(std::make_shared()); @@ -139,10 +137,10 @@ public: = std::make_shared(makeNullable(nested_types[i])); ColumnsWithTypeAndName null_array_arg({ - {null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"}, + {null_type->createColumnConstWithDefaultValue(input_rows_count), null_type, "NULL"}, }); - tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local); + tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count); tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst(); } else @@ -153,7 +151,7 @@ public: auto tuple_coulmn = FunctionFactory::instance() .get("tupleElement", context) ->build(untuple_args) - ->execute(untuple_args, result_type, input_rows_count_local); + ->execute(untuple_args, result_type, input_rows_count); auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); @@ -183,7 +181,7 @@ public: auto inside_null_type = nested_types[0]; ColumnsWithTypeAndName indexof_args({ arg_of_index, - {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"}, + {inside_null_type->createColumnConstWithDefaultValue(input_rows_count), inside_null_type, "NULL"}, }); auto null_index_datetype = std::make_shared(); @@ -192,7 +190,7 @@ public: slice_index.column = FunctionFactory::instance() .get("indexOf", context) ->build(indexof_args) - ->execute(indexof_args, result_type, input_rows_count_local); + ->execute(indexof_args, result_type, input_rows_count); auto null_index_in_array = slice_index.column->get64(0); if (null_index_in_array > 0) @@ -220,15 +218,15 @@ public: ColumnsWithTypeAndName slice_args_right( {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index}); ColumnWithTypeAndName arr_left{ - fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""}; + fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count), arg_type, ""}; ColumnWithTypeAndName arr_right{ - fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""}; + fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count), arg_type, ""}; ColumnsWithTypeAndName arr_cancat({arr_right, arr_left}); auto out_tmp = FunctionFactory::instance() .get("arrayConcat", context) ->build(arr_cancat) - ->execute(arr_cancat, arg_type, input_rows_count_local); + ->execute(arr_cancat, arg_type, input_rows_count); adjusted_columns[i] = std::move(out_tmp); } } diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 55b2fee5400..dd1dec9bdff 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -127,14 +127,13 @@ struct MatchImpl const String & needle, [[maybe_unused]] const ColumnPtr & start_pos_, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - const size_t haystack_size = haystack_offsets.size(); - - assert(haystack_size == res.size()); + assert(input_rows_count == res.size()); assert(start_pos_ == nullptr); if (haystack_offsets.empty()) @@ -202,11 +201,11 @@ struct MatchImpl if (required_substring.empty()) { if (!regexp.getRE2()) /// An empty regexp. Always matches. - memset(res.data(), !negate, haystack_size * sizeof(res[0])); + memset(res.data(), !negate, input_rows_count * sizeof(res[0])); else { size_t prev_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const bool match = regexp.getRE2()->Match( {reinterpret_cast(&haystack_data[prev_offset]), haystack_offsets[i] - prev_offset - 1}, @@ -291,14 +290,13 @@ struct MatchImpl size_t N, const String & needle, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - const size_t haystack_size = haystack.size() / N; - - assert(haystack_size == res.size()); + assert(input_rows_count == res.size()); if (haystack.empty()) return; @@ -370,11 +368,11 @@ struct MatchImpl if (required_substring.empty()) { if (!regexp.getRE2()) /// An empty regexp. Always matches. - memset(res.data(), !negate, haystack_size * sizeof(res[0])); + memset(res.data(), !negate, input_rows_count * sizeof(res[0])); else { size_t offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const bool match = regexp.getRE2()->Match( {reinterpret_cast(&haystack[offset]), N}, @@ -464,15 +462,14 @@ struct MatchImpl const ColumnString::Offsets & needle_offset, [[maybe_unused]] const ColumnPtr & start_pos_, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - const size_t haystack_size = haystack_offsets.size(); - - assert(haystack_size == needle_offset.size()); - assert(haystack_size == res.size()); + assert(input_rows_count == needle_offset.size()); + assert(input_rows_count == res.size()); assert(start_pos_ == nullptr); if (haystack_offsets.empty()) @@ -488,7 +485,7 @@ struct MatchImpl Regexps::LocalCacheTable cache; Regexps::RegexpPtr regexp; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -573,15 +570,14 @@ struct MatchImpl const ColumnString::Offsets & needle_offset, [[maybe_unused]] const ColumnPtr & start_pos_, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - const size_t haystack_size = haystack.size()/N; - - assert(haystack_size == needle_offset.size()); - assert(haystack_size == res.size()); + assert(input_rows_count == needle_offset.size()); + assert(input_rows_count == res.size()); assert(start_pos_ == nullptr); if (haystack.empty()) @@ -597,7 +593,7 @@ struct MatchImpl Regexps::LocalCacheTable cache; Regexps::RegexpPtr regexp; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack[prev_haystack_offset]; const size_t cur_haystack_length = N; diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 3e9c8fba215..17232bbc366 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -52,9 +52,10 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { - vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); } static void vectorConstant( @@ -67,7 +68,8 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); @@ -87,7 +89,7 @@ struct MultiMatchAllIndicesImpl throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Regular expression evaluation in vectorscan will be too slow. To ignore this error, disable setting 'reject_expensive_hyperscan_regexps'."); } - offsets.resize(haystack_offsets.size()); + offsets.resize(input_rows_count); if (needles_arr.empty()) { @@ -114,9 +116,8 @@ struct MultiMatchAllIndicesImpl static_cast*>(context)->push_back(id); return 0; }; - const size_t haystack_offsets_size = haystack_offsets.size(); UInt64 offset = 0; - for (size_t i = 0; i < haystack_offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; /// vectorscan restriction. @@ -160,9 +161,10 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { - vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); } static void vectorVector( @@ -176,12 +178,13 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); #if USE_VECTORSCAN - offsets.resize(haystack_offsets.size()); + offsets.resize(input_rows_count); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -189,7 +192,7 @@ struct MultiMatchAllIndicesImpl std::vector needles; - for (size_t i = 0; i < haystack_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 20b2150048b..b5872579ebb 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -66,9 +66,10 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { - vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); } static void vectorConstant( @@ -81,7 +82,8 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); @@ -101,7 +103,7 @@ struct MultiMatchAnyImpl throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Regular expression evaluation in vectorscan will be too slow. To ignore this error, disable setting 'reject_expensive_hyperscan_regexps'."); } - res.resize(haystack_offsets.size()); + res.resize(input_rows_count); if (needles_arr.empty()) { @@ -133,9 +135,8 @@ struct MultiMatchAnyImpl /// Once we hit the callback, there is no need to search for others. return 1; }; - const size_t haystack_offsets_size = haystack_offsets.size(); UInt64 offset = 0; - for (size_t i = 0; i < haystack_offsets_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; /// vectorscan restriction. @@ -162,7 +163,7 @@ struct MultiMatchAnyImpl PaddedPODArray accum(res.size()); memset(res.data(), 0, res.size() * sizeof(res.front())); memset(accum.data(), 0, accum.size()); - for (size_t j = 0; j < needles.size(); ++j) + for (size_t j = 0; j < input_rows_count; ++j) { MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum, nullptr); for (size_t i = 0; i < res.size(); ++i) @@ -186,9 +187,10 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { - vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); + vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); } static void vectorVector( @@ -202,12 +204,13 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps) + bool reject_expensive_hyperscan_regexps, + size_t input_rows_count) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); - res.resize(haystack_offsets.size()); + res.resize(input_rows_count); #if USE_VECTORSCAN size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -216,7 +219,7 @@ struct MultiMatchAnyImpl std::vector needles; - for (size_t i = 0; i < haystack_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); @@ -306,7 +309,7 @@ struct MultiMatchAnyImpl std::vector needles; - for (size_t i = 0; i < haystack_offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; diff --git a/src/Functions/MultiSearchFirstIndexImpl.h b/src/Functions/MultiSearchFirstIndexImpl.h index 36a5fd514d9..b80d9d3a124 100644 --- a/src/Functions/MultiSearchFirstIndexImpl.h +++ b/src/Functions/MultiSearchFirstIndexImpl.h @@ -33,7 +33,8 @@ struct MultiSearchFirstIndexImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/) + bool /*reject_expensive_hyperscan_regexps*/, + size_t input_rows_count) { // For performance of Volnitsky search, it is crucial to save only one byte for pattern number. if (needles_arr.size() > std::numeric_limits::max()) @@ -48,14 +49,13 @@ struct MultiSearchFirstIndexImpl auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - const size_t haystack_size = haystack_offsets.size(); - res.resize(haystack_size); + res.resize(input_rows_count); size_t iteration = 0; while (searcher.hasMoreToSearch()) { size_t prev_haystack_offset = 0; - for (size_t j = 0; j < haystack_size; ++j) + for (size_t j = 0; j < input_rows_count; ++j) { const auto * haystack = &haystack_data[prev_haystack_offset]; const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1; @@ -80,10 +80,10 @@ struct MultiSearchFirstIndexImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/) + bool /*reject_expensive_hyperscan_regexps*/, + size_t input_rows_count) { - const size_t haystack_size = haystack_offsets.size(); - res.resize(haystack_size); + res.resize(input_rows_count); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -92,14 +92,12 @@ struct MultiSearchFirstIndexImpl std::vector needles; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) - { needles.emplace_back(needles_data_string.getDataAt(j).toView()); - } auto searcher = Impl::createMultiSearcherInBigHaystack(needles); // sub-optimal @@ -110,15 +108,11 @@ struct MultiSearchFirstIndexImpl while (searcher.hasMoreToSearch()) { if (iteration == 0 || res[i] == 0) - { res[i] = searcher.searchOneFirstIndex(haystack, haystack_end); - } ++iteration; } if (iteration == 0) - { res[i] = 0; - } prev_haystack_offset = haystack_offsets[i]; prev_needles_offset = needles_offsets[i]; diff --git a/src/Functions/MultiSearchFirstPositionImpl.h b/src/Functions/MultiSearchFirstPositionImpl.h index ccdd82a0ee5..cd4e585e99b 100644 --- a/src/Functions/MultiSearchFirstPositionImpl.h +++ b/src/Functions/MultiSearchFirstPositionImpl.h @@ -33,7 +33,8 @@ struct MultiSearchFirstPositionImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/) + bool /*reject_expensive_hyperscan_regexps*/, + size_t input_rows_count) { // For performance of Volnitsky search, it is crucial to save only one byte for pattern number. if (needles_arr.size() > std::numeric_limits::max()) @@ -52,14 +53,13 @@ struct MultiSearchFirstPositionImpl }; auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - const size_t haystack_size = haystack_offsets.size(); - res.resize(haystack_size); + res.resize(input_rows_count); size_t iteration = 0; while (searcher.hasMoreToSearch()) { size_t prev_haystack_offset = 0; - for (size_t j = 0; j < haystack_size; ++j) + for (size_t j = 0; j < input_rows_count; ++j) { const auto * haystack = &haystack_data[prev_haystack_offset]; const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1; @@ -89,10 +89,10 @@ struct MultiSearchFirstPositionImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/) + bool /*reject_expensive_hyperscan_regexps*/, + size_t input_rows_count) { - const size_t haystack_size = haystack_offsets.size(); - res.resize(haystack_size); + res.resize(input_rows_count); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -106,14 +106,12 @@ struct MultiSearchFirstPositionImpl return 1 + Impl::countChars(reinterpret_cast(start), reinterpret_cast(end)); }; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) - { needles.emplace_back(needles_data_string.getDataAt(j).toView()); - } auto searcher = Impl::createMultiSearcherInBigHaystack(needles); // sub-optimal @@ -138,9 +136,7 @@ struct MultiSearchFirstPositionImpl ++iteration; } if (iteration == 0) - { res[i] = 0; - } prev_haystack_offset = haystack_offsets[i]; prev_needles_offset = needles_offsets[i]; diff --git a/src/Functions/MultiSearchImpl.h b/src/Functions/MultiSearchImpl.h index 467cc96a95f..909425f5a93 100644 --- a/src/Functions/MultiSearchImpl.h +++ b/src/Functions/MultiSearchImpl.h @@ -33,7 +33,8 @@ struct MultiSearchImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/) + bool /*reject_expensive_hyperscan_regexps*/, + size_t input_rows_count) { // For performance of Volnitsky search, it is crucial to save only one byte for pattern number. if (needles_arr.size() > std::numeric_limits::max()) @@ -48,14 +49,13 @@ struct MultiSearchImpl auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - const size_t haystack_size = haystack_offsets.size(); - res.resize(haystack_size); + res.resize(input_rows_count); size_t iteration = 0; while (searcher.hasMoreToSearch()) { size_t prev_haystack_offset = 0; - for (size_t j = 0; j < haystack_size; ++j) + for (size_t j = 0; j < input_rows_count; ++j) { const auto * haystack = &haystack_data[prev_haystack_offset]; const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1; @@ -79,10 +79,10 @@ struct MultiSearchImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/) + bool /*reject_expensive_hyperscan_regexps*/, + size_t input_rows_count) { - const size_t haystack_size = haystack_offsets.size(); - res.resize(haystack_size); + res.resize(input_rows_count); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -91,14 +91,12 @@ struct MultiSearchImpl std::vector needles; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) - { needles.emplace_back(needles_data_string.getDataAt(j).toView()); - } const auto * const haystack = &haystack_data[prev_haystack_offset]; const size_t haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 57f1243537d..bf8241774a6 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -124,7 +124,7 @@ public: bool hasEmptyBound() const { return has_empty_bound; } - inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const + inline bool contains(CoordinateType x, CoordinateType y) const { Point point(x, y); @@ -167,7 +167,7 @@ public: UInt64 getAllocatedBytes() const; - inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const; + bool contains(CoordinateType x, CoordinateType y) const; private: enum class CellType : uint8_t @@ -199,7 +199,7 @@ private: } /// Inner part of the HalfPlane is the left side of initialized vector. - bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; } + bool contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; } }; struct Cell @@ -233,7 +233,7 @@ private: void calcGridAttributes(Box & box); template - T ALWAYS_INLINE getCellIndex(T row, T col) const { return row * grid_size + col; } + T getCellIndex(T row, T col) const { return row * grid_size + col; } /// Complex case. Will check intersection directly. inline void addComplexPolygonCell(size_t index, const Box & box); diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index eeb9d8b6a59..e525b5fab57 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -193,7 +193,8 @@ struct PositionImpl const std::string & needle, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -214,13 +215,12 @@ struct PositionImpl } ColumnString::Offset prev_offset = 0; - size_t rows = haystack_offsets.size(); if (const ColumnConst * start_pos_const = typeid_cast(&*start_pos)) { /// Needle is empty and start_pos is constant UInt64 start = std::max(start_pos_const->getUInt(0), static_cast(1)); - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t haystack_size = Impl::countChars( reinterpret_cast(pos), reinterpret_cast(pos + haystack_offsets[i] - prev_offset - 1)); @@ -234,7 +234,7 @@ struct PositionImpl else { /// Needle is empty and start_pos is not constant - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t haystack_size = Impl::countChars( reinterpret_cast(pos), reinterpret_cast(pos + haystack_offsets[i] - prev_offset - 1)); @@ -359,7 +359,8 @@ struct PositionImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -367,9 +368,7 @@ struct PositionImpl ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; - size_t size = haystack_offsets.size(); - - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; @@ -423,7 +422,8 @@ struct PositionImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null) + [[maybe_unused]] ColumnUInt8 * res_null, + size_t input_rows_count) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -431,9 +431,7 @@ struct PositionImpl /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; - size_t size = needle_offsets.size(); - - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index f5fb08f71d2..14f5a2d7932 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -201,15 +201,15 @@ struct ReplaceRegexpImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr @@ -232,13 +232,13 @@ struct ReplaceRegexpImpl case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All; } }; - ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets); + ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets, input_rows_count); return; } Instructions instructions = createInstructions(replacement, num_captures); - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -257,19 +257,19 @@ struct ReplaceRegexpImpl const ColumnString::Offsets & needle_offsets, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { assert(haystack_offsets.size() == needle_offsets.size()); ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); @@ -302,7 +302,8 @@ struct ReplaceRegexpImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { assert(haystack_offsets.size() == replacement_offsets.size()); @@ -311,8 +312,7 @@ struct ReplaceRegexpImpl ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr @@ -323,7 +323,7 @@ struct ReplaceRegexpImpl int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); @@ -349,20 +349,20 @@ struct ReplaceRegexpImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { assert(haystack_offsets.size() == needle_offsets.size()); assert(needle_offsets.size() == replacement_offsets.size()); ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); @@ -399,15 +399,15 @@ struct ReplaceRegexpImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); ColumnString::Offset res_offset = 0; - size_t haystack_size = haystack_data.size() / n; res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); re2::RE2::Options regexp_options; regexp_options.set_log_errors(false); /// don't write error messages to stderr @@ -419,7 +419,7 @@ struct ReplaceRegexpImpl int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from = i * n; const char * hs_data = reinterpret_cast(haystack_data.data() + from); diff --git a/src/Functions/ReplaceStringImpl.h b/src/Functions/ReplaceStringImpl.h index de3942acbd8..7c56d657b3e 100644 --- a/src/Functions/ReplaceStringImpl.h +++ b/src/Functions/ReplaceStringImpl.h @@ -35,7 +35,8 @@ struct ReplaceStringImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); @@ -46,8 +47,7 @@ struct ReplaceStringImpl ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); - const size_t haystack_size = haystack_offsets.size(); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); /// The current index in the array of strings. size_t i = 0; @@ -124,21 +124,20 @@ struct ReplaceStringImpl const ColumnString::Offsets & needle_offsets, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { chassert(haystack_offsets.size() == needle_offsets.size()); - const size_t haystack_size = haystack_offsets.size(); - res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); ColumnString::Offset res_offset = 0; size_t prev_haystack_offset = 0; size_t prev_needle_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -195,24 +194,23 @@ struct ReplaceStringImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { chassert(haystack_offsets.size() == replacement_offsets.size()); if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); - const size_t haystack_size = haystack_offsets.size(); - res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); ColumnString::Offset res_offset = 0; size_t prev_haystack_offset = 0; size_t prev_replacement_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -267,15 +265,14 @@ struct ReplaceStringImpl const ColumnString::Chars & replacement_data, const ColumnString::Offsets & replacement_offsets, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { chassert(haystack_offsets.size() == needle_offsets.size()); chassert(needle_offsets.size() == replacement_offsets.size()); - const size_t haystack_size = haystack_offsets.size(); - res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); ColumnString::Offset res_offset = 0; @@ -283,7 +280,7 @@ struct ReplaceStringImpl size_t prev_needle_offset = 0; size_t prev_replacement_offset = 0; - for (size_t i = 0; i < haystack_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -345,7 +342,8 @@ struct ReplaceStringImpl const String & needle, const String & replacement, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { if (needle.empty()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); @@ -355,9 +353,8 @@ struct ReplaceStringImpl const UInt8 * pos = begin; ColumnString::Offset res_offset = 0; - size_t haystack_size = haystack_data.size() / n; res_data.reserve(haystack_data.size()); - res_offsets.resize(haystack_size); + res_offsets.resize(input_rows_count); /// The current index in the string array. size_t i = 0; @@ -384,13 +381,13 @@ struct ReplaceStringImpl /// Copy skipped strings without any changes but /// add zero byte to the end of each string. - while (i < haystack_size && begin + n * (i + 1) <= match) + while (i < input_rows_count && begin + n * (i + 1) <= match) { COPY_REST_OF_CURRENT_STRING(); } /// If you have reached the end, it's time to stop - if (i == haystack_size) + if (i == input_rows_count) break; /// Copy unchanged part of current string. diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 68582198ea3..22fec17654c 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -64,7 +64,7 @@ public: return arguments[0].type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get()); FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue()); @@ -72,7 +72,7 @@ public: if (const ColumnString * col = checkAndGetColumn(&*arguments[0].column)) { auto col_res = ColumnString::create(); - vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count); return col_res; } else @@ -82,11 +82,11 @@ public: static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { - size_t size = offsets.size(); - res_offsets.resize(size); - res_data.reserve(size * Extractor::getReserveLengthForElement()); + res_offsets.resize(input_rows_count); + res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement()); size_t prev_offset = 0; size_t res_offset = 0; @@ -95,7 +95,7 @@ public: Pos start; size_t length; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Extractor::execute(tld_lookup, reinterpret_cast(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length); diff --git a/src/Functions/URL/cutURLParameter.cpp b/src/Functions/URL/cutURLParameter.cpp index 7a2b96ec874..3ab9cad1ea7 100644 --- a/src/Functions/URL/cutURLParameter.cpp +++ b/src/Functions/URL/cutURLParameter.cpp @@ -44,7 +44,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnPtr column = arguments[0].column; const ColumnPtr column_needle = arguments[1].column; @@ -71,7 +71,7 @@ public: ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res); + vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res, input_rows_count); return col_res; } else @@ -130,7 +130,8 @@ public: const ColumnString::Offsets & offsets, const ColumnConst * col_needle, const ColumnArray * col_needle_const_array, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { res_data.reserve(data.size()); res_offsets.resize(offsets.size()); @@ -141,7 +142,7 @@ public: size_t res_offset = 0; size_t cur_res_offset; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { cur_offset = offsets[i]; cur_len = cur_offset - prev_offset; diff --git a/src/Functions/URL/extractURLParameter.cpp b/src/Functions/URL/extractURLParameter.cpp index f75875e0200..590c2779d9c 100644 --- a/src/Functions/URL/extractURLParameter.cpp +++ b/src/Functions/URL/extractURLParameter.cpp @@ -10,10 +10,11 @@ struct ExtractURLParameterImpl static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string pattern, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, + size_t input_rows_count) { res_data.reserve(data.size() / 5); - res_offsets.resize(offsets.size()); + res_offsets.resize(input_rows_count); pattern += '='; const char * param_str = pattern.c_str(); @@ -22,7 +23,7 @@ struct ExtractURLParameterImpl ColumnString::Offset prev_offset = 0; ColumnString::Offset res_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { ColumnString::Offset cur_offset = offsets[i]; diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index fac46281604..7492ebcb4e9 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -46,7 +46,7 @@ struct FunctionPortImpl : public IFunction } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { UInt16 default_port = 0; if (arguments.size() == 2) @@ -64,7 +64,7 @@ struct FunctionPortImpl : public IFunction typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(url_column->size()); - vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res); + vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res, input_rows_count); return col_res; } else @@ -73,12 +73,10 @@ struct FunctionPortImpl : public IFunction } private: - static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = extractPort(default_port, data, prev_offset, offsets[i] - prev_offset - 1); prev_offset = offsets[i]; diff --git a/src/Functions/UTCTimestampTransform.cpp b/src/Functions/UTCTimestampTransform.cpp index 36ec520068f..35015188078 100644 --- a/src/Functions/UTCTimestampTransform.cpp +++ b/src/Functions/UTCTimestampTransform.cpp @@ -67,7 +67,7 @@ namespace return date_time_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { if (arguments.size() != 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name); @@ -81,11 +81,10 @@ namespace if (WhichDataType(arg1.type).isDateTime()) { const auto & date_time_col = checkAndGetColumn(*arg1.column); - size_t col_size = date_time_col.size(); using ColVecTo = DataTypeDateTime::ColumnType; - typename ColVecTo::MutablePtr result_column = ColVecTo::create(col_size); + typename ColVecTo::MutablePtr result_column = ColVecTo::create(input_rows_count); typename ColVecTo::Container & result_data = result_column->getData(); - for (size_t i = 0; i < col_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { UInt32 date_time_val = date_time_col.getElement(i); LocalDateTime date_time(date_time_val, Name::to ? utc_time_zone : DateLUT::instance(time_zone_val)); @@ -97,14 +96,13 @@ namespace else if (WhichDataType(arg1.type).isDateTime64()) { const auto & date_time_col = checkAndGetColumn(*arg1.column); - size_t col_size = date_time_col.size(); const DataTypeDateTime64 * date_time_type = static_cast(arg1.type.get()); UInt32 col_scale = date_time_type->getScale(); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(col_scale); using ColDecimalTo = DataTypeDateTime64::ColumnType; - typename ColDecimalTo::MutablePtr result_column = ColDecimalTo::create(col_size, col_scale); + typename ColDecimalTo::MutablePtr result_column = ColDecimalTo::create(input_rows_count, col_scale); typename ColDecimalTo::Container & result_data = result_column->getData(); - for (size_t i = 0; i < col_size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { DateTime64 date_time_val = date_time_col.getElement(i); Int64 seconds = date_time_val.value / scale_multiplier; diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 3e2a3bf6863..7a61c9d368f 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -143,13 +143,13 @@ private: const IColumn & scores, const IColumn & labels, const ColumnArray::Offsets & offsets, - PaddedPODArray & result) + PaddedPODArray & result, + size_t input_rows_count) { - size_t size = offsets.size(); - result.resize(size); + result.resize(input_rows_count); ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto next_offset = offsets[i]; result[i] = apply(scores, labels, current_offset, next_offset); @@ -179,7 +179,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst(); ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); @@ -203,7 +203,8 @@ public: col_array1->getData(), col_array2->getData(), col_array1->getOffsets(), - col_res->getData()); + col_res->getData(), + input_rows_count); return col_res; } diff --git a/src/Functions/array/arrayConcat.cpp b/src/Functions/array/arrayConcat.cpp index cdb361b73b9..768877bac99 100644 --- a/src/Functions/array/arrayConcat.cpp +++ b/src/Functions/array/arrayConcat.cpp @@ -40,7 +40,6 @@ ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & argume if (result_type->onlyNull()) return result_type->createColumnConstWithDefaultValue(input_rows_count); - size_t rows = input_rows_count; size_t num_args = arguments.size(); Columns preprocessed_columns(num_args); @@ -69,7 +68,7 @@ ColumnPtr FunctionArrayConcat::executeImpl(const ColumnsWithTypeAndName & argume } if (const auto * argument_column_array = typeid_cast(argument_column.get())) - sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows)); + sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count)); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName()); } diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index ad325fe542a..269a7db6e92 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -132,7 +132,7 @@ private: /// Hash a set of keys into a UInt128 value. -static inline UInt128 ALWAYS_INLINE hash128depths(const std::vector & indices, const ColumnRawPtrs & key_columns) +static UInt128 hash128depths(const std::vector & indices, const ColumnRawPtrs & key_columns) { SipHash hash; for (size_t j = 0, keys_size = key_columns.size(); j < keys_size; ++j) diff --git a/src/Functions/array/length.cpp b/src/Functions/array/length.cpp index d81c071b55e..760506194fa 100644 --- a/src/Functions/array/length.cpp +++ b/src/Functions/array/length.cpp @@ -16,40 +16,38 @@ struct LengthImpl { static constexpr auto is_fixed_to_constant = true; - static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = offsets[i] - 1 - offsets[i - 1]; } - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res) + static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t n, UInt64 & res, size_t) { res = n; } - static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray & /*res*/) + static void vectorFixedToVector(const ColumnString::Chars & /*data*/, size_t /*n*/, PaddedPODArray & /*res*/, size_t) { } - static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void array(const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = offsets[i] - offsets[i - 1]; } - [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument"); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument"); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument"); } diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index 0d50e5d203b..e65996a90e1 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -23,47 +23,43 @@ struct AsciiImpl using ReturnType = Int32; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = doAscii(data, prev_offset, offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } - [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) + [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars &, size_t, Int32 &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", AsciiName::name); } - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) res[i] = doAscii(data, i * n, n); } - [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", AsciiName::name); } - [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", AsciiName::name); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", AsciiName::name); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", AsciiName::name); } diff --git a/src/Functions/extract.cpp b/src/Functions/extract.cpp index 6bbdaff0e3f..c78ee9898b7 100644 --- a/src/Functions/extract.cpp +++ b/src/Functions/extract.cpp @@ -16,10 +16,11 @@ struct ExtractImpl const ColumnString::Offsets & offsets, const std::string & pattern, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) + ColumnString::Offsets & res_offsets, + size_t input_rows_count) { res_data.reserve(data.size() / 5); - res_offsets.resize(offsets.size()); + res_offsets.resize(input_rows_count); const OptimizedRegularExpression regexp = Regexps::createRegexp(pattern); @@ -29,7 +30,7 @@ struct ExtractImpl size_t prev_offset = 0; size_t res_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t cur_offset = offsets[i]; diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp index d5f5e6a8986..1959502af06 100644 --- a/src/Functions/isValidUTF8.cpp +++ b/src/Functions/isValidUTF8.cpp @@ -219,42 +219,42 @@ SOFTWARE. static constexpr bool is_fixed_to_constant = false; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); size_t prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = isValidUTF8(data.data() + prev_offset, offsets[i] - 1 - prev_offset); prev_offset = offsets[i]; } } - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {} - - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t) { - size_t size = data.size() / n; - for (size_t i = 0; i < size; ++i) + } + + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) + { + for (size_t i = 0; i < input_rows_count; ++i) res[i] = isValidUTF8(data.data() + i * n, n); } - [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to Array argument"); } - [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to UUID argument"); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv6 argument"); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv4 argument"); } diff --git a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp index 1c5164e132d..cc9e57ac186 100644 --- a/src/Functions/keyvaluepair/extractKeyValuePairs.cpp +++ b/src/Functions/keyvaluepair/extractKeyValuePairs.cpp @@ -54,7 +54,7 @@ class ExtractKeyValuePairs : public IFunction return builder.build(); } - ColumnPtr extract(ColumnPtr data_column, std::shared_ptr extractor) const + ColumnPtr extract(ColumnPtr data_column, std::shared_ptr extractor, size_t input_rows_count) const { auto offsets = ColumnUInt64::create(); @@ -63,7 +63,7 @@ class ExtractKeyValuePairs : public IFunction uint64_t offset = 0u; - for (auto i = 0u; i < data_column->size(); i++) + for (auto i = 0u; i < input_rows_count; i++) { auto row = data_column->getDataAt(i).toView(); @@ -97,13 +97,13 @@ public: return std::make_shared(context); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto parsed_arguments = ArgumentExtractor::extract(arguments); auto extractor = getExtractor(parsed_arguments); - return extract(parsed_arguments.data_column, extractor); + return extract(parsed_arguments.data_column, extractor, input_rows_count); } DataTypePtr getReturnTypeImpl(const DataTypes &) const override diff --git a/src/Functions/lengthUTF8.cpp b/src/Functions/lengthUTF8.cpp index 59a0d532602..97a42816674 100644 --- a/src/Functions/lengthUTF8.cpp +++ b/src/Functions/lengthUTF8.cpp @@ -23,48 +23,42 @@ struct LengthUTF8Impl { static constexpr auto is_fixed_to_constant = false; - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res, size_t input_rows_count) { - size_t size = offsets.size(); - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { res[i] = UTF8::countCodePoints(&data[prev_offset], offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } - static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/) + static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/, size_t) { } - static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res, size_t input_rows_count) { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { + for (size_t i = 0; i < input_rows_count; ++i) res[i] = UTF8::countCodePoints(&data[i * n], n); - } } - [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) + [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to Array argument"); } - [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to UUID argument"); } - [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv6 argument"); } - [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &, size_t) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv4 argument"); } diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index 5293b688678..81c9d20ce82 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -114,7 +114,7 @@ public: return to_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { auto from_type = arguments[0].type; @@ -136,9 +136,9 @@ public: ColumnFixedString * dst_concrete = assert_cast(dst.get()); if (src.isFixedAndContiguous() && src.sizeOfValueIfFixed() == dst_concrete->getN()) - executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN()); + executeContiguousToFixedString(src, *dst_concrete, dst_concrete->getN(), input_rows_count); else - executeToFixedString(src, *dst_concrete, dst_concrete->getN()); + executeToFixedString(src, *dst_concrete, dst_concrete->getN(), input_rows_count); result = std::move(dst); @@ -156,7 +156,7 @@ public: MutableColumnPtr dst = result_type->createColumn(); ColumnString * dst_concrete = assert_cast(dst.get()); - executeToString(src, *dst_concrete); + executeToString(src, *dst_concrete, input_rows_count); result = std::move(dst); @@ -174,12 +174,11 @@ public: const auto & data_from = col_from->getChars(); const auto & offsets_from = col_from->getOffsets(); - size_t size = offsets_from.size(); auto & vec_res = col_res->getData(); - vec_res.resize_fill(size); + vec_res.resize_fill(input_rows_count); size_t offset = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t copy_size = std::min(static_cast(sizeof(ToFieldType)), offsets_from[i] - offset - 1); if constexpr (std::endian::native == std::endian::little) @@ -209,7 +208,6 @@ public: const auto& data_from = col_from_fixed->getChars(); size_t step = col_from_fixed->getN(); - size_t size = data_from.size() / step; auto & vec_res = col_res->getData(); size_t offset = 0; @@ -217,11 +215,11 @@ public: size_t index = data_from.size() - copy_size; if (sizeof(ToFieldType) <= step) - vec_res.resize(size); + vec_res.resize(input_rows_count); else - vec_res.resize_fill(size); + vec_res.resize_fill(input_rows_count); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::endian::native == std::endian::little) memcpy(&vec_res[i], &data_from[offset], copy_size); @@ -251,12 +249,11 @@ public: auto & from = column_from->getData(); auto & to = column_to->getData(); - size_t size = from.size(); - to.resize_fill(size); + to.resize_fill(input_rows_count); static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To)); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::endian::native == std::endian::little) memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); @@ -307,14 +304,13 @@ private: type.isDecimal(); } - static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) + static void NO_INLINE executeToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n, size_t input_rows_count) { - size_t rows = src.size(); ColumnFixedString::Chars & data_to = dst.getChars(); - data_to.resize_fill(n * rows); + data_to.resize_fill(n * input_rows_count); ColumnFixedString::Offset offset = 0; - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { std::string_view data = src.getDataAt(i).toView(); @@ -327,11 +323,10 @@ private: } } - static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n) + static void NO_INLINE executeContiguousToFixedString(const IColumn & src, ColumnFixedString & dst, size_t n, size_t input_rows_count) { - size_t rows = src.size(); ColumnFixedString::Chars & data_to = dst.getChars(); - data_to.resize(n * rows); + data_to.resize(n * input_rows_count); #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ memcpy(data_to.data(), src.getRawData().data(), data_to.size()); @@ -340,15 +335,14 @@ private: #endif } - static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst) + static void NO_INLINE executeToString(const IColumn & src, ColumnString & dst, size_t input_rows_count) { - size_t rows = src.size(); ColumnString::Chars & data_to = dst.getChars(); ColumnString::Offsets & offsets_to = dst.getOffsets(); - offsets_to.resize(rows); + offsets_to.resize(input_rows_count); ColumnString::Offset offset = 0; - for (size_t i = 0; i < rows; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { StringRef data = src.getDataAt(i); diff --git a/src/Functions/visibleWidth.cpp b/src/Functions/visibleWidth.cpp index ebd4a1ff713..3e70418a456 100644 --- a/src/Functions/visibleWidth.cpp +++ b/src/Functions/visibleWidth.cpp @@ -66,9 +66,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & src = arguments[0]; - size_t size = input_rows_count; - auto res_col = ColumnUInt64::create(size); + auto res_col = ColumnUInt64::create(input_rows_count); auto & res_data = assert_cast(*res_col).getData(); /// For simplicity reasons, the function is implemented by serializing into temporary buffer. @@ -76,7 +75,7 @@ public: String tmp; FormatSettings format_settings; auto serialization = src.type->getDefaultSerialization(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { { WriteBufferFromString out(tmp); diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp index d007cc968f0..ba24362034f 100644 --- a/src/Functions/widthBucket.cpp +++ b/src/Functions/widthBucket.cpp @@ -166,12 +166,12 @@ class FunctionWidthBucket : public IFunction result_column->reserve(1); auto & result_data = result_column->getData(); - for (const auto row_index : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { - const auto operand = getValue(operands_col_const, operands_vec, row_index); - const auto low = getValue(lows_col_const, lows_vec, row_index); - const auto high = getValue(highs_col_const, highs_vec, row_index); - const auto count = getValue(counts_col_const, counts_vec, row_index); + const auto operand = getValue(operands_col_const, operands_vec, row); + const auto low = getValue(lows_col_const, lows_vec, row); + const auto high = getValue(highs_col_const, highs_vec, row); + const auto count = getValue(counts_col_const, counts_vec, row); result_data.push_back(calculate(operand, low, high, count)); } From ca3b21a2be3b798bfad59b2a2bec01f364f8c7e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 29 Jul 2024 06:41:51 +0200 Subject: [PATCH 17/34] Faster and less flaky 01246_buffer_flush (by using HTTP over clickhouse-client) clickhouse-client is incredibly slow with sanitizers: This is two subsequent queries, that should be executed one, after another: 2024.07.27 19:18:49.371354 [ 11070 ] {ywjiyfmvjd} executeQuery: (from [::1]:47746) (comment: 01246_buffer_flush.sh) insert into buffer_01256 select * from system.numbers limit 5 (stage: Complete) 2024.07.27 19:18:49.374647 [ 11070 ] {ywjiyfmvjd} TCPHandler: Processed in 0.004721391 sec. 2024.07.27 19:18:54.293488 [ 11070 ] {30d1f5f7-9594-41e3-9d54-18e1ddfe72af} executeQuery: (from [::1]:47782) (comment: 01246_buffer_flush.sh) select count() from data_01256 (stage: Complete) While the delay is 5 seconds between them! Refs: https://github.com/ClickHouse/ClickHouse/issues/65745 Signed-off-by: Azat Khuzhin --- .../queries/0_stateless/01246_buffer_flush.sh | 81 ++++++++++--------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh index 27c3f01f216..aea91a0bf6b 100755 --- a/tests/queries/0_stateless/01246_buffer_flush.sh +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -7,6 +7,16 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e +function query() +{ + local query_id + if [[ $1 == --query_id ]]; then + query_id="&query_id=$2" + shift 2 + fi + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}$query_id" -d "$*" +} + function wait_until() { local expr=$1 && shift @@ -17,73 +27,68 @@ function wait_until() function get_buffer_delay() { local buffer_insert_id=$1 && shift - $CLICKHOUSE_CLIENT -nm -q " - SYSTEM FLUSH LOGS; - WITH - (SELECT event_time_microseconds FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, - (SELECT max(event_time) FROM data_01256) AS end_ - SELECT dateDiff('seconds', begin_, end_)::UInt64; + query "SYSTEM FLUSH LOGS" + query " + WITH + (SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, + (SELECT max(event_time) FROM data_01256) AS end_ + SELECT dateDiff('seconds', begin_, end_)::UInt64 " } -$CLICKHOUSE_CLIENT -nm -q " - drop table if exists data_01256; - drop table if exists buffer_01256; - - create table data_01256 (key UInt64, event_time DateTime(6) MATERIALIZED now64(6)) Engine=Memory(); -" +query "drop table if exists data_01256" +query "drop table if exists buffer_01256" +query "create table data_01256 (key UInt64, event_time DateTime(6) MATERIALIZED now64(6)) Engine=Memory()" echo "min" -$CLICKHOUSE_CLIENT -q " - create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1, +query " + create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1, 2, 100, /* time */ 4, 100, /* rows */ 1, 1e6 /* bytes */ ) " min_query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --query_id="$min_query_id" -q "insert into buffer_01256 select * from system.numbers limit 5" -$CLICKHOUSE_CLIENT -q "select count() from data_01256" -wait_until '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]' +query --query_id "$min_query_id" "insert into buffer_01256 select * from system.numbers limit 5" +query "select count() from data_01256" +wait_until '[[ $(query "select count() from data_01256") -eq 5 ]]' sec=$(get_buffer_delay "$min_query_id") [[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" [[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" -$CLICKHOUSE_CLIENT -q "select count() from data_01256" -$CLICKHOUSE_CLIENT -q "drop table buffer_01256" +query "select count() from data_01256" +query "drop table buffer_01256" echo "max" -$CLICKHOUSE_CLIENT -q " - create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1, +query " + create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1, 100, 2, /* time */ 0, 100, /* rows */ 0, 1e6 /* bytes */ - ); + ) " max_query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --query_id="$max_query_id" -q "insert into buffer_01256 select * from system.numbers limit 5" -$CLICKHOUSE_CLIENT -q "select count() from data_01256" -wait_until '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]' +query --query_id "$max_query_id" "insert into buffer_01256 select * from system.numbers limit 5" +query "select count() from data_01256" +wait_until '[[ $(query "select count() from data_01256") -eq 10 ]]' sec=$(get_buffer_delay "$max_query_id") [[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" -$CLICKHOUSE_CLIENT -q "select count() from data_01256" -$CLICKHOUSE_CLIENT -q "drop table buffer_01256" +query "select count() from data_01256" +query "drop table buffer_01256" echo "direct" -$CLICKHOUSE_CLIENT -nm -q " - create table buffer_01256 (key UInt64) Engine=Buffer(currentDatabase(), data_01256, 1, +query " + create table buffer_01256 (key UInt64) Engine=Buffer($CLICKHOUSE_DATABASE, data_01256, 1, 100, 100, /* time */ 0, 9, /* rows */ 0, 1e6 /* bytes */ - ); - insert into buffer_01256 select * from system.numbers limit 10; - select count() from data_01256; + ) " +query "insert into buffer_01256 select * from system.numbers limit 10" +query "select count() from data_01256" echo "drop" -$CLICKHOUSE_CLIENT -nm -q " - insert into buffer_01256 select * from system.numbers limit 10; - drop table if exists buffer_01256; - select count() from data_01256; -" +query "insert into buffer_01256 select * from system.numbers limit 10" +query "drop table if exists buffer_01256" +query "select count() from data_01256" -$CLICKHOUSE_CLIENT -q "drop table data_01256" +query "drop table data_01256" From e8c95bb130161bfd02e48b79d00976c3e51cf05b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Jun 2024 07:16:39 +0200 Subject: [PATCH 18/34] Fix 02444_async_broken_outdated_part_loading flakiness It uses already removed path to detect the absolute path: 2024.06.06 22:36:55.845743 [ 1055 ] {} test_858tcd7j.rmt (8d64bafa-bedf-4015-9673-8911de129a8f): Removing 2 parts from memory: Parts: [all_0_0_0, all_1_1_0] 2024.06.06 22:36:56.608589 [ 2065 ] {41c3a91d-2ee5-4006-bb79-92e7e8f005bb} executeQuery: (from [::1]:48792) (comment: 02444_async_broken_outdated_part_loading.sh) select path from system.parts where database='test_858tcd7j' and table='rmt' and name='all_1_1_0' (stage: Complete) CI: https://s3.amazonaws.com/clickhouse-test-reports/64856/d10027cc3b7737c524f4cfce262d46753fd03036/stateless_tests__debug__[5_5].html Signed-off-by: Azat Khuzhin --- .../0_stateless/02444_async_broken_outdated_part_loading.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh b/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh index d24c6afcef3..a1313ba16e5 100755 --- a/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh +++ b/tests/queries/0_stateless/02444_async_broken_outdated_part_loading.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "drop table if exists rmt sync;" -$CLICKHOUSE_CLIENT -q "create table rmt (n int) engine=ReplicatedMergeTree('/test/02444/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') order by n" +$CLICKHOUSE_CLIENT -q "create table rmt (n int) engine=ReplicatedMergeTree('/test/02444/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') order by n settings old_parts_lifetime=600" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt values (1);" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt values (2);" From 1ae3a1b1256a2ffda2e7d549189b60a0caf35554 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 29 Jul 2024 09:03:43 +0000 Subject: [PATCH 19/34] Fix build --- src/Functions/MultiMatchAllIndicesImpl.h | 2 ++ src/Functions/MultiMatchAnyImpl.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 17232bbc366..3aeac808880 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -147,6 +147,7 @@ struct MultiMatchAllIndicesImpl (void)max_hyperscan_regexp_length; (void)max_hyperscan_regexp_total_length; (void)reject_expensive_hyperscan_regexps; + (void)input_rows_count; throw Exception(ErrorCodes::NOT_IMPLEMENTED, "multi-search all indices is not implemented when vectorscan is off"); #endif // USE_VECTORSCAN } @@ -274,6 +275,7 @@ struct MultiMatchAllIndicesImpl (void)max_hyperscan_regexp_length; (void)max_hyperscan_regexp_total_length; (void)reject_expensive_hyperscan_regexps; + (void)input_rows_count; throw Exception(ErrorCodes::NOT_IMPLEMENTED, "multi-search all indices is not implemented when vectorscan is off"); #endif // USE_VECTORSCAN } diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index b5872579ebb..03b17321eea 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -165,7 +165,7 @@ struct MultiMatchAnyImpl memset(accum.data(), 0, accum.size()); for (size_t j = 0; j < input_rows_count; ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum, nullptr); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum, nullptr, input_rows_count); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) From f7c7d0b79bc4e24428264c33ed9074905ee684ea Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 29 Jul 2024 10:25:49 +0000 Subject: [PATCH 20/34] Fix tests --- src/Functions/CountSubstringsImpl.h | 17 +- src/Functions/FunctionsHashing.h | 155 +++++++++--------- .../FunctionsMultiStringFuzzySearch.h | 8 +- src/Functions/FunctionsMultiStringSearch.h | 8 +- src/Functions/FunctionsStringSearch.h | 17 +- src/Functions/FunctionsStringSearchToString.h | 4 +- src/Functions/FunctionsVisitParam.h | 10 +- src/Functions/HasTokenImpl.h | 5 +- src/Functions/MatchImpl.h | 44 ++--- src/Functions/MultiMatchAllIndicesImpl.h | 27 ++- src/Functions/MultiMatchAnyImpl.h | 31 ++-- src/Functions/MultiSearchFirstIndexImpl.h | 22 ++- src/Functions/MultiSearchFirstPositionImpl.h | 20 ++- src/Functions/MultiSearchImpl.h | 18 +- src/Functions/PositionImpl.h | 22 +-- src/Functions/URL/extractURLParameter.cpp | 7 +- src/Functions/extract.cpp | 7 +- 17 files changed, 213 insertions(+), 209 deletions(-) diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 8e91bc3aeb4..9ff3e4e1f2a 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -37,8 +37,7 @@ struct CountSubstringsImpl const std::string & needle, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t /*input_rows_count*/) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -151,8 +150,7 @@ struct CountSubstringsImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -160,7 +158,9 @@ struct CountSubstringsImpl ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + size_t size = haystack_offsets.size(); + + for (size_t i = 0; i < size; ++i) { size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; @@ -207,8 +207,7 @@ struct CountSubstringsImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -216,7 +215,9 @@ struct CountSubstringsImpl /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + size_t size = needle_offsets.size(); + + for (size_t i = 0; i < size; ++i) { res[i] = 0; auto start = start_pos != nullptr ? std::max(start_pos->getUInt(i), UInt64(1)) : UInt64(1); diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 91e49bf5035..95c54ac9528 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -715,7 +715,7 @@ private: using ToType = typename Impl::ReturnType; template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { using ColVecType = ColumnVectorOrDecimal; @@ -726,8 +726,9 @@ private: const typename ColVecType::Container & vec_from = col_from->getData(); typename ColumnVector::Container & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - for (size_t i = 0; i < input_rows_count; ++i) + size_t size = vec_from.size(); + vec_to.resize(size); + for (size_t i = 0; i < size; ++i) vec_to[i] = Impl::apply(vec_from[i]); return col_to; @@ -758,39 +759,39 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IDataType * from_type = arguments[0].type.get(); WhichDataType which(from_type); if (which.isUInt8()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isUInt16()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isUInt32()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isUInt64()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isInt8()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isInt16()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isInt32()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isInt64()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isDate()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isDate32()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isDateTime()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isDecimal32()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isDecimal64()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else if (which.isIPv4()) - return executeType(arguments, input_rows_count); + return executeType(arguments); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0].type->getName(), getName()); @@ -842,7 +843,7 @@ private: using ToType = typename Impl::ReturnType; template - void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const + void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; KeyType key{}; @@ -852,7 +853,8 @@ private: if (const ColVecType * col_from = checkAndGetColumn(column)) { const typename ColVecType::Container & vec_from = col_from->getData(); - for (size_t i = 0; i < input_rows_count; ++i) + const size_t size = vec_from.size(); + for (size_t i = 0; i < size; ++i) { ToType hash; @@ -892,7 +894,7 @@ private: if (!key_cols.is_const) { ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeIntType(key_cols, full_column.get(), vec_to, input_rows_count); + return executeIntType(key_cols, full_column.get(), vec_to); } } auto value = col_from_const->template getValue(); @@ -936,7 +938,7 @@ private: } template - void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const + void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; KeyType key{}; @@ -956,7 +958,8 @@ private: if (const ColVecType * col_from = checkAndGetColumn(column)) { const typename ColVecType::Container & vec_from = col_from->getData(); - for (size_t i = 0; i < input_rows_count; ++i) + size_t size = vec_from.size(); + for (size_t i = 0; i < size; ++i) { ToType hash; if constexpr (Keyed) @@ -984,7 +987,7 @@ private: if (!key_cols.is_const) { ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeBigIntType(key_cols, full_column.get(), vec_to, input_rows_count); + return executeBigIntType(key_cols, full_column.get(), vec_to); } } auto value = col_from_const->template getValue(); @@ -1011,12 +1014,12 @@ private: } template - void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const + void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { KeyType key{}; if constexpr (Keyed) key = Impl::getKey(key_cols, 0); - for (size_t i = 0, size = input_rows_count; i < size; ++i) + for (size_t i = 0, size = column->size(); i < size; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1031,7 +1034,7 @@ private: } template - void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const + void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const { KeyType key{}; if constexpr (Keyed) @@ -1040,9 +1043,10 @@ private: { const typename ColumnString::Chars & data = col_from->getChars(); const typename ColumnString::Offsets & offsets = col_from->getOffsets(); + size_t size = offsets.size(); ColumnString::Offset current_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < size; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1063,8 +1067,9 @@ private: { const typename ColumnString::Chars & data = col_from_fixed->getChars(); size_t n = col_from_fixed->getN(); + size_t size = data.size() / n; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < size; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1083,7 +1088,7 @@ private: if (!key_cols.is_const) { ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeString(key_cols, full_column.get(), vec_to, input_rows_count); + return executeString(key_cols, full_column.get(), vec_to); } } String value = col_from_const->getValue(); @@ -1109,7 +1114,7 @@ private: } template - void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, size_t input_rows_count) const + void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const { const IDataType * nested_type = typeid_cast(*type).getNestedType().get(); @@ -1126,16 +1131,18 @@ private: { KeyColumnsType key_cols_tmp{key_cols}; key_cols_tmp.offsets = &offsets; - executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first, input_rows_count); + executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first); } else - executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first, input_rows_count); + executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); + + const size_t size = offsets.size(); ColumnArray::Offset current_offset = 0; KeyType key{}; if constexpr (Keyed) key = Impl::getKey(key_cols, 0); - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < size; ++i) { if constexpr (Keyed) if (!key_cols.is_const && i != 0) @@ -1163,7 +1170,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key_cols, type, full_column.get(), vec_to, input_rows_count); + executeArray(key_cols, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1171,7 +1178,7 @@ private: } template - void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to, size_t input_rows_count) const + void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const { WhichDataType which(from_type); @@ -1183,43 +1190,43 @@ private: if (key_cols.size() != vec_to.size() && key_cols.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName()); - if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); + if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result - else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isString()) executeString(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to, input_rows_count); - else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to, input_rows_count); - else executeGeneric(key_cols, icolumn, vec_to, input_rows_count); + else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to); + else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to); + else if (which.isString()) executeString(key_cols, icolumn, vec_to); + else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to); + else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to); + else executeGeneric(key_cols, icolumn, vec_to); } /// Return a fixed random-looking magic number when input is empty. static constexpr auto filler = 0xe28dbde7fe22e41c; - void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first, size_t input_rows_count) const + void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -1233,7 +1240,7 @@ private: hash = static_cast(filler); for (size_t i = 0; i < tuple_size; ++i) - executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first, input_rows_count); + executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { @@ -1248,24 +1255,24 @@ private: for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); - executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first, input_rows_count); + executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first); } } else if (const auto * map = checkAndGetColumn(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first, input_rows_count); + executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } else if (const auto * const_map = checkAndGetColumnConst(column)) { - executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first, input_rows_count); + executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { if (is_first) - executeAny(key_cols, type, column, vec_to, input_rows_count); + executeAny(key_cols, type, column, vec_to); else - executeAny(key_cols, type, column, vec_to, input_rows_count); + executeAny(key_cols, type, column, vec_to); } is_first = false; @@ -1318,7 +1325,7 @@ public: for (size_t i = first_data_argument; i < arguments.size(); ++i) { const auto & col = arguments[i]; - executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument, input_rows_count); + executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); } } diff --git a/src/Functions/FunctionsMultiStringFuzzySearch.h b/src/Functions/FunctionsMultiStringFuzzySearch.h index 8346380c35d..a92a6570279 100644 --- a/src/Functions/FunctionsMultiStringFuzzySearch.h +++ b/src/Functions/FunctionsMultiStringFuzzySearch.h @@ -71,7 +71,7 @@ public: return Impl::getReturnType(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & haystack_ptr = arguments[0].column; const ColumnPtr & edit_distance_ptr = arguments[1].column; @@ -114,16 +114,14 @@ public: col_needles_const->getValue(), vec_res, offsets_res, edit_distance, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, - input_rows_count); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); else Impl::vectorVector( col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needles_vector->getData(), col_needles_vector->getOffsets(), vec_res, offsets_res, edit_distance, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, - input_rows_count); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); // the combination of const haystack + const needle is not implemented because // useDefaultImplementationForConstants() == true makes upper layers convert both to diff --git a/src/Functions/FunctionsMultiStringSearch.h b/src/Functions/FunctionsMultiStringSearch.h index 6bcc8581a38..03db2651fd0 100644 --- a/src/Functions/FunctionsMultiStringSearch.h +++ b/src/Functions/FunctionsMultiStringSearch.h @@ -81,7 +81,7 @@ public: return Impl::getReturnType(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & haystack_ptr = arguments[0].column; const ColumnPtr & needles_ptr = arguments[1].column; @@ -110,15 +110,13 @@ public: col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needles_const->getValue(), vec_res, offsets_res, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, - input_rows_count); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); else Impl::vectorVector( col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needles_vector->getData(), col_needles_vector->getOffsets(), vec_res, offsets_res, - allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, - input_rows_count); + allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); // the combination of const haystack + const needle is not implemented because // useDefaultImplementationForConstants() == true makes upper layers convert both to diff --git a/src/Functions/FunctionsStringSearch.h b/src/Functions/FunctionsStringSearch.h index 7ec0076e395..fba6336ebff 100644 --- a/src/Functions/FunctionsStringSearch.h +++ b/src/Functions/FunctionsStringSearch.h @@ -163,7 +163,7 @@ public: return return_type; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { const ColumnPtr & column_haystack = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[0].column : arguments[1].column; const ColumnPtr & column_needle = (argument_order == ArgumentOrder::HaystackNeedle) ? arguments[1].column : arguments[0].column; @@ -236,8 +236,7 @@ public: col_needle_vector->getOffsets(), column_start_pos, vec_res, - null_map.get(), - input_rows_count); + null_map.get()); else if (col_haystack_vector && col_needle_const) Impl::vectorConstant( col_haystack_vector->getChars(), @@ -245,8 +244,7 @@ public: col_needle_const->getValue(), column_start_pos, vec_res, - null_map.get(), - input_rows_count); + null_map.get()); else if (col_haystack_vector_fixed && col_needle_vector) Impl::vectorFixedVector( col_haystack_vector_fixed->getChars(), @@ -255,16 +253,14 @@ public: col_needle_vector->getOffsets(), column_start_pos, vec_res, - null_map.get(), - input_rows_count); + null_map.get()); else if (col_haystack_vector_fixed && col_needle_const) Impl::vectorFixedConstant( col_haystack_vector_fixed->getChars(), col_haystack_vector_fixed->getN(), col_needle_const->getValue(), vec_res, - null_map.get(), - input_rows_count); + null_map.get()); else if (col_haystack_const && col_needle_vector) Impl::constantVector( col_haystack_const->getValue(), @@ -272,8 +268,7 @@ public: col_needle_vector->getOffsets(), column_start_pos, vec_res, - null_map.get(), - input_rows_count); + null_map.get()); else throw Exception( ErrorCodes::ILLEGAL_COLUMN, diff --git a/src/Functions/FunctionsStringSearchToString.h b/src/Functions/FunctionsStringSearchToString.h index c889cf062a3..978a84de472 100644 --- a/src/Functions/FunctionsStringSearchToString.h +++ b/src/Functions/FunctionsStringSearchToString.h @@ -60,7 +60,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr column = arguments[0].column; const ColumnPtr column_needle = arguments[1].column; @@ -75,7 +75,7 @@ public: ColumnString::Chars & vec_res = col_res->getChars(); ColumnString::Offsets & offsets_res = col_res->getOffsets(); - Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), vec_res, offsets_res, input_rows_count); + Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), vec_res, offsets_res); return col_res; } diff --git a/src/Functions/FunctionsVisitParam.h b/src/Functions/FunctionsVisitParam.h index dbe291fcb93..5e13fbbad5c 100644 --- a/src/Functions/FunctionsVisitParam.h +++ b/src/Functions/FunctionsVisitParam.h @@ -93,8 +93,7 @@ struct ExtractParamImpl std::string needle, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t /*input_rows_count*/) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -169,12 +168,11 @@ struct ExtractParamToStringImpl { static void vector(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, std::string needle, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - size_t input_rows_count) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { /// Constant 5 is taken from a function that performs a similar task FunctionsStringSearch.h::ExtractImpl - res_data.reserve(input_rows_count / 5); - res_offsets.resize(input_rows_count); + res_data.reserve(haystack_data.size() / 5); + res_offsets.resize(haystack_offsets.size()); /// We are looking for a parameter simply as a substring of the form "name" needle = "\"" + needle + "\":"; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 4943bf708c5..a4ff49859cc 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -35,13 +35,12 @@ struct HasTokenImpl const std::string & pattern, const ColumnPtr & start_pos, PaddedPODArray & res, - ColumnUInt8 * res_null, - size_t input_rows_count) + ColumnUInt8 * res_null) { if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); - if (input_rows_count == 0) + if (haystack_offsets.empty()) return; const UInt8 * const begin = haystack_data.data(); diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index dd1dec9bdff..55b2fee5400 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -127,13 +127,14 @@ struct MatchImpl const String & needle, [[maybe_unused]] const ColumnPtr & start_pos_, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - assert(input_rows_count == res.size()); + const size_t haystack_size = haystack_offsets.size(); + + assert(haystack_size == res.size()); assert(start_pos_ == nullptr); if (haystack_offsets.empty()) @@ -201,11 +202,11 @@ struct MatchImpl if (required_substring.empty()) { if (!regexp.getRE2()) /// An empty regexp. Always matches. - memset(res.data(), !negate, input_rows_count * sizeof(res[0])); + memset(res.data(), !negate, haystack_size * sizeof(res[0])); else { size_t prev_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { const bool match = regexp.getRE2()->Match( {reinterpret_cast(&haystack_data[prev_offset]), haystack_offsets[i] - prev_offset - 1}, @@ -290,13 +291,14 @@ struct MatchImpl size_t N, const String & needle, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - assert(input_rows_count == res.size()); + const size_t haystack_size = haystack.size() / N; + + assert(haystack_size == res.size()); if (haystack.empty()) return; @@ -368,11 +370,11 @@ struct MatchImpl if (required_substring.empty()) { if (!regexp.getRE2()) /// An empty regexp. Always matches. - memset(res.data(), !negate, input_rows_count * sizeof(res[0])); + memset(res.data(), !negate, haystack_size * sizeof(res[0])); else { size_t offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { const bool match = regexp.getRE2()->Match( {reinterpret_cast(&haystack[offset]), N}, @@ -462,14 +464,15 @@ struct MatchImpl const ColumnString::Offsets & needle_offset, [[maybe_unused]] const ColumnPtr & start_pos_, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - assert(input_rows_count == needle_offset.size()); - assert(input_rows_count == res.size()); + const size_t haystack_size = haystack_offsets.size(); + + assert(haystack_size == needle_offset.size()); + assert(haystack_size == res.size()); assert(start_pos_ == nullptr); if (haystack_offsets.empty()) @@ -485,7 +488,7 @@ struct MatchImpl Regexps::LocalCacheTable cache; Regexps::RegexpPtr regexp; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; @@ -570,14 +573,15 @@ struct MatchImpl const ColumnString::Offsets & needle_offset, [[maybe_unused]] const ColumnPtr & start_pos_, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); - assert(input_rows_count == needle_offset.size()); - assert(input_rows_count == res.size()); + const size_t haystack_size = haystack.size()/N; + + assert(haystack_size == needle_offset.size()); + assert(haystack_size == res.size()); assert(start_pos_ == nullptr); if (haystack.empty()) @@ -593,7 +597,7 @@ struct MatchImpl Regexps::LocalCacheTable cache; Regexps::RegexpPtr regexp; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { const auto * const cur_haystack_data = &haystack[prev_haystack_offset]; const size_t cur_haystack_length = N; diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 3aeac808880..3e9c8fba215 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -52,10 +52,9 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { - vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); + vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); } static void vectorConstant( @@ -68,8 +67,7 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); @@ -89,7 +87,7 @@ struct MultiMatchAllIndicesImpl throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Regular expression evaluation in vectorscan will be too slow. To ignore this error, disable setting 'reject_expensive_hyperscan_regexps'."); } - offsets.resize(input_rows_count); + offsets.resize(haystack_offsets.size()); if (needles_arr.empty()) { @@ -116,8 +114,9 @@ struct MultiMatchAllIndicesImpl static_cast*>(context)->push_back(id); return 0; }; + const size_t haystack_offsets_size = haystack_offsets.size(); UInt64 offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_offsets_size; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; /// vectorscan restriction. @@ -147,7 +146,6 @@ struct MultiMatchAllIndicesImpl (void)max_hyperscan_regexp_length; (void)max_hyperscan_regexp_total_length; (void)reject_expensive_hyperscan_regexps; - (void)input_rows_count; throw Exception(ErrorCodes::NOT_IMPLEMENTED, "multi-search all indices is not implemented when vectorscan is off"); #endif // USE_VECTORSCAN } @@ -162,10 +160,9 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { - vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); + vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); } static void vectorVector( @@ -179,13 +176,12 @@ struct MultiMatchAllIndicesImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); #if USE_VECTORSCAN - offsets.resize(input_rows_count); + offsets.resize(haystack_offsets.size()); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -193,7 +189,7 @@ struct MultiMatchAllIndicesImpl std::vector needles; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_offsets.size(); ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); @@ -275,7 +271,6 @@ struct MultiMatchAllIndicesImpl (void)max_hyperscan_regexp_length; (void)max_hyperscan_regexp_total_length; (void)reject_expensive_hyperscan_regexps; - (void)input_rows_count; throw Exception(ErrorCodes::NOT_IMPLEMENTED, "multi-search all indices is not implemented when vectorscan is off"); #endif // USE_VECTORSCAN } diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 03b17321eea..20b2150048b 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -66,10 +66,9 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { - vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); + vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); } static void vectorConstant( @@ -82,8 +81,7 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); @@ -103,7 +101,7 @@ struct MultiMatchAnyImpl throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Regular expression evaluation in vectorscan will be too slow. To ignore this error, disable setting 'reject_expensive_hyperscan_regexps'."); } - res.resize(input_rows_count); + res.resize(haystack_offsets.size()); if (needles_arr.empty()) { @@ -135,8 +133,9 @@ struct MultiMatchAnyImpl /// Once we hit the callback, there is no need to search for others. return 1; }; + const size_t haystack_offsets_size = haystack_offsets.size(); UInt64 offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_offsets_size; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; /// vectorscan restriction. @@ -163,9 +162,9 @@ struct MultiMatchAnyImpl PaddedPODArray accum(res.size()); memset(res.data(), 0, res.size() * sizeof(res.front())); memset(accum.data(), 0, accum.size()); - for (size_t j = 0; j < input_rows_count; ++j) + for (size_t j = 0; j < needles.size(); ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum, nullptr, input_rows_count); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum, nullptr); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) @@ -187,10 +186,9 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { - vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps, input_rows_count); + vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps); } static void vectorVector( @@ -204,13 +202,12 @@ struct MultiMatchAnyImpl bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, - bool reject_expensive_hyperscan_regexps, - size_t input_rows_count) + bool reject_expensive_hyperscan_regexps) { if (!allow_hyperscan) throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0"); - res.resize(input_rows_count); + res.resize(haystack_offsets.size()); #if USE_VECTORSCAN size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -219,7 +216,7 @@ struct MultiMatchAnyImpl std::vector needles; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_offsets.size(); ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); @@ -309,7 +306,7 @@ struct MultiMatchAnyImpl std::vector needles; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_offsets.size(); ++i) { const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset]; const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; diff --git a/src/Functions/MultiSearchFirstIndexImpl.h b/src/Functions/MultiSearchFirstIndexImpl.h index b80d9d3a124..36a5fd514d9 100644 --- a/src/Functions/MultiSearchFirstIndexImpl.h +++ b/src/Functions/MultiSearchFirstIndexImpl.h @@ -33,8 +33,7 @@ struct MultiSearchFirstIndexImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/, - size_t input_rows_count) + bool /*reject_expensive_hyperscan_regexps*/) { // For performance of Volnitsky search, it is crucial to save only one byte for pattern number. if (needles_arr.size() > std::numeric_limits::max()) @@ -49,13 +48,14 @@ struct MultiSearchFirstIndexImpl auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - res.resize(input_rows_count); + const size_t haystack_size = haystack_offsets.size(); + res.resize(haystack_size); size_t iteration = 0; while (searcher.hasMoreToSearch()) { size_t prev_haystack_offset = 0; - for (size_t j = 0; j < input_rows_count; ++j) + for (size_t j = 0; j < haystack_size; ++j) { const auto * haystack = &haystack_data[prev_haystack_offset]; const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1; @@ -80,10 +80,10 @@ struct MultiSearchFirstIndexImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/, - size_t input_rows_count) + bool /*reject_expensive_hyperscan_regexps*/) { - res.resize(input_rows_count); + const size_t haystack_size = haystack_offsets.size(); + res.resize(haystack_size); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -92,12 +92,14 @@ struct MultiSearchFirstIndexImpl std::vector needles; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) + { needles.emplace_back(needles_data_string.getDataAt(j).toView()); + } auto searcher = Impl::createMultiSearcherInBigHaystack(needles); // sub-optimal @@ -108,11 +110,15 @@ struct MultiSearchFirstIndexImpl while (searcher.hasMoreToSearch()) { if (iteration == 0 || res[i] == 0) + { res[i] = searcher.searchOneFirstIndex(haystack, haystack_end); + } ++iteration; } if (iteration == 0) + { res[i] = 0; + } prev_haystack_offset = haystack_offsets[i]; prev_needles_offset = needles_offsets[i]; diff --git a/src/Functions/MultiSearchFirstPositionImpl.h b/src/Functions/MultiSearchFirstPositionImpl.h index cd4e585e99b..ccdd82a0ee5 100644 --- a/src/Functions/MultiSearchFirstPositionImpl.h +++ b/src/Functions/MultiSearchFirstPositionImpl.h @@ -33,8 +33,7 @@ struct MultiSearchFirstPositionImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/, - size_t input_rows_count) + bool /*reject_expensive_hyperscan_regexps*/) { // For performance of Volnitsky search, it is crucial to save only one byte for pattern number. if (needles_arr.size() > std::numeric_limits::max()) @@ -53,13 +52,14 @@ struct MultiSearchFirstPositionImpl }; auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - res.resize(input_rows_count); + const size_t haystack_size = haystack_offsets.size(); + res.resize(haystack_size); size_t iteration = 0; while (searcher.hasMoreToSearch()) { size_t prev_haystack_offset = 0; - for (size_t j = 0; j < input_rows_count; ++j) + for (size_t j = 0; j < haystack_size; ++j) { const auto * haystack = &haystack_data[prev_haystack_offset]; const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1; @@ -89,10 +89,10 @@ struct MultiSearchFirstPositionImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/, - size_t input_rows_count) + bool /*reject_expensive_hyperscan_regexps*/) { - res.resize(input_rows_count); + const size_t haystack_size = haystack_offsets.size(); + res.resize(haystack_size); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -106,12 +106,14 @@ struct MultiSearchFirstPositionImpl return 1 + Impl::countChars(reinterpret_cast(start), reinterpret_cast(end)); }; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) + { needles.emplace_back(needles_data_string.getDataAt(j).toView()); + } auto searcher = Impl::createMultiSearcherInBigHaystack(needles); // sub-optimal @@ -136,7 +138,9 @@ struct MultiSearchFirstPositionImpl ++iteration; } if (iteration == 0) + { res[i] = 0; + } prev_haystack_offset = haystack_offsets[i]; prev_needles_offset = needles_offsets[i]; diff --git a/src/Functions/MultiSearchImpl.h b/src/Functions/MultiSearchImpl.h index 909425f5a93..467cc96a95f 100644 --- a/src/Functions/MultiSearchImpl.h +++ b/src/Functions/MultiSearchImpl.h @@ -33,8 +33,7 @@ struct MultiSearchImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/, - size_t input_rows_count) + bool /*reject_expensive_hyperscan_regexps*/) { // For performance of Volnitsky search, it is crucial to save only one byte for pattern number. if (needles_arr.size() > std::numeric_limits::max()) @@ -49,13 +48,14 @@ struct MultiSearchImpl auto searcher = Impl::createMultiSearcherInBigHaystack(needles); - res.resize(input_rows_count); + const size_t haystack_size = haystack_offsets.size(); + res.resize(haystack_size); size_t iteration = 0; while (searcher.hasMoreToSearch()) { size_t prev_haystack_offset = 0; - for (size_t j = 0; j < input_rows_count; ++j) + for (size_t j = 0; j < haystack_size; ++j) { const auto * haystack = &haystack_data[prev_haystack_offset]; const auto * haystack_end = haystack + haystack_offsets[j] - prev_haystack_offset - 1; @@ -79,10 +79,10 @@ struct MultiSearchImpl bool /*allow_hyperscan*/, size_t /*max_hyperscan_regexp_length*/, size_t /*max_hyperscan_regexp_total_length*/, - bool /*reject_expensive_hyperscan_regexps*/, - size_t input_rows_count) + bool /*reject_expensive_hyperscan_regexps*/) { - res.resize(input_rows_count); + const size_t haystack_size = haystack_offsets.size(); + res.resize(haystack_size); size_t prev_haystack_offset = 0; size_t prev_needles_offset = 0; @@ -91,12 +91,14 @@ struct MultiSearchImpl std::vector needles; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < haystack_size; ++i) { needles.reserve(needles_offsets[i] - prev_needles_offset); for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j) + { needles.emplace_back(needles_data_string.getDataAt(j).toView()); + } const auto * const haystack = &haystack_data[prev_haystack_offset]; const size_t haystack_length = haystack_offsets[i] - prev_haystack_offset - 1; diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index e525b5fab57..eeb9d8b6a59 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -193,8 +193,7 @@ struct PositionImpl const std::string & needle, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -215,12 +214,13 @@ struct PositionImpl } ColumnString::Offset prev_offset = 0; + size_t rows = haystack_offsets.size(); if (const ColumnConst * start_pos_const = typeid_cast(&*start_pos)) { /// Needle is empty and start_pos is constant UInt64 start = std::max(start_pos_const->getUInt(0), static_cast(1)); - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < rows; ++i) { size_t haystack_size = Impl::countChars( reinterpret_cast(pos), reinterpret_cast(pos + haystack_offsets[i] - prev_offset - 1)); @@ -234,7 +234,7 @@ struct PositionImpl else { /// Needle is empty and start_pos is not constant - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < rows; ++i) { size_t haystack_size = Impl::countChars( reinterpret_cast(pos), reinterpret_cast(pos + haystack_offsets[i] - prev_offset - 1)); @@ -359,8 +359,7 @@ struct PositionImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -368,7 +367,9 @@ struct PositionImpl ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + size_t size = haystack_offsets.size(); + + for (size_t i = 0; i < size; ++i) { size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; @@ -422,8 +423,7 @@ struct PositionImpl const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, PaddedPODArray & res, - [[maybe_unused]] ColumnUInt8 * res_null, - size_t input_rows_count) + [[maybe_unused]] ColumnUInt8 * res_null) { /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. assert(!res_null); @@ -431,7 +431,9 @@ struct PositionImpl /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + size_t size = needle_offsets.size(); + + for (size_t i = 0; i < size; ++i) { size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; diff --git a/src/Functions/URL/extractURLParameter.cpp b/src/Functions/URL/extractURLParameter.cpp index 590c2779d9c..f75875e0200 100644 --- a/src/Functions/URL/extractURLParameter.cpp +++ b/src/Functions/URL/extractURLParameter.cpp @@ -10,11 +10,10 @@ struct ExtractURLParameterImpl static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, std::string pattern, - ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - size_t input_rows_count) + ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { res_data.reserve(data.size() / 5); - res_offsets.resize(input_rows_count); + res_offsets.resize(offsets.size()); pattern += '='; const char * param_str = pattern.c_str(); @@ -23,7 +22,7 @@ struct ExtractURLParameterImpl ColumnString::Offset prev_offset = 0; ColumnString::Offset res_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < offsets.size(); ++i) { ColumnString::Offset cur_offset = offsets[i]; diff --git a/src/Functions/extract.cpp b/src/Functions/extract.cpp index c78ee9898b7..6bbdaff0e3f 100644 --- a/src/Functions/extract.cpp +++ b/src/Functions/extract.cpp @@ -16,11 +16,10 @@ struct ExtractImpl const ColumnString::Offsets & offsets, const std::string & pattern, ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets, - size_t input_rows_count) + ColumnString::Offsets & res_offsets) { res_data.reserve(data.size() / 5); - res_offsets.resize(input_rows_count); + res_offsets.resize(offsets.size()); const OptimizedRegularExpression regexp = Regexps::createRegexp(pattern); @@ -30,7 +29,7 @@ struct ExtractImpl size_t prev_offset = 0; size_t res_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + for (size_t i = 0; i < offsets.size(); ++i) { size_t cur_offset = offsets[i]; From 837b0e053d841bcd00ed15769a22853bb58f1a84 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 29 Jul 2024 10:50:41 +0000 Subject: [PATCH 21/34] Add JSONAsObject format to documentation --- docs/en/interfaces/formats.md | 62 +++++++++++++++++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 63 insertions(+) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 9c4c082bc3a..8892c6d8d3f 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -32,6 +32,7 @@ The supported formats are: | [Vertical](#vertical) | ✗ | ✔ | | [JSON](#json) | ✔ | ✔ | | [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONAsObject](#jsonasobject) | ✔ | ✗ | | [JSONStrings](#jsonstrings) | ✔ | ✔ | | [JSONColumns](#jsoncolumns) | ✔ | ✔ | | [JSONColumnsWithMetadata](#jsoncolumnsmonoblock) | ✔ | ✔ | @@ -822,6 +823,67 @@ Result: └────────────────────────────┘ ``` +## JSONAsObject {#jsonasobject} + +In this format, a single JSON object is interpreted as a single [Object('json')](/docs/en/sql-reference/data-types/json.md) value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs. + +This format can only be parsed for a table with a single field of type [Object('json')](/docs/en/sql-reference/data-types/json.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized). + +**Examples** + +Query: + +``` sql +SET allow_experimental_object_type = 1; +CREATE TABLE json_as_object (json Object('json')) ENGINE = Memory; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {"foo":{"bar":{"x":"y"},"baz":1}},{},{"any json stucture":1} +SELECT * FROM json_as_object FORMAT JSONEachRow; +``` + +Result: + +``` response +{"json":{"any json stucture":0,"foo":{"bar":{"x":"y"},"baz":1}}} +{"json":{"any json stucture":0,"foo":{"bar":{"x":""},"baz":0}}} +{"json":{"any json stucture":1,"foo":{"bar":{"x":""},"baz":0}}} +``` + +**An array of JSON objects** + +Query: + +``` sql +SET allow_experimental_object_type = 1; +CREATE TABLE json_square_brackets (field Object('json')) ENGINE = Memory; +INSERT INTO json_square_brackets FORMAT JSONAsObject [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}]; + +SELECT * FROM json_square_brackets FORMAT JSONEachRow; +``` + +Result: + +```response +{"field":{"id":1,"name":"name1"}} +{"field":{"id":2,"name":"name2"}} +``` + +**Columns with default values** + +```sql +SET allow_experimental_object_type = 1; +CREATE TABLE json_as_object (json Object('json'), time DateTime MATERIALIZED now()) ENGINE = Memory; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {"foo":{"bar":{"x":"y"},"baz":1}}; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {}; +INSERT INTO json_as_object (json) FORMAT JSONAsObject {"any json stucture":1} +SELECT * FROM json_as_object FORMAT JSONEachRow +``` + +```resonse +{"json":{"any json stucture":0,"foo":{"bar":{"x":"y"},"baz":1}},"time":"2024-07-25 17:02:45"} +{"json":{"any json stucture":0,"foo":{"bar":{"x":""},"baz":0}},"time":"2024-07-25 17:02:47"} +{"json":{"any json stucture":1,"foo":{"bar":{"x":""},"baz":0}},"time":"2024-07-25 17:02:50"} +``` + ## JSONCompact {#jsoncompact} Differs from JSON only in that data rows are output in arrays, not in objects. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8e4e4fafe29..1efb7b23d1b 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1868,6 +1868,7 @@ joinGet json jsonMergePatch jsonasstring +jsonasobject jsoncolumns jsoncolumnsmonoblock jsoncompact From 8f6a8b7ad6eac57774bf8a05b71a9837c42dca0d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 28 Jul 2024 16:16:43 +0200 Subject: [PATCH 22/34] Fix test test_backup_restore_on_cluster/test_disallow_concurrency --- .../test_disallow_concurrency.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index cd0f2032559..7ce28cbdc96 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -111,10 +111,10 @@ def create_and_fill_table(): nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") -def wait_for_fail_backup(node, backup_id, backup_name): +def wait_for_fail_backup(node, backup_id): expected_errors = [ "Concurrent backups not supported", - f"Backup {backup_name} already exists", + "BACKUP_ALREADY_EXISTS", ] status = node.query( f"SELECT status FROM system.backups WHERE id == '{backup_id}'" @@ -137,7 +137,7 @@ def wait_for_fail_backup(node, backup_id, backup_name): error = node.query( f"SELECT error FROM system.backups WHERE id == '{backup_id}'" ).rstrip("\n") - assert re.search(f"Backup {backup_name} already exists", error) + assert any([expected_error in error for expected_error in expected_errors]) return else: assert False, "Concurrent backups both passed, when one is expected to fail" @@ -207,10 +207,10 @@ def test_concurrent_backups_on_same_node(): expected_errors = [ "Concurrent backups not supported", - f"Backup {backup_name} already exists", + "BACKUP_ALREADY_EXISTS", ] if not error: - wait_for_fail_backup(nodes[0], id, backup_name) + wait_for_fail_backup(nodes[0], id) assert any([expected_error in error for expected_error in expected_errors]) @@ -257,11 +257,11 @@ def test_concurrent_backups_on_different_nodes(): expected_errors = [ "Concurrent backups not supported", - f"Backup {backup_name} already exists", + "BACKUP_ALREADY_EXISTS", ] if not error: - wait_for_fail_backup(nodes[1], id, backup_name) + wait_for_fail_backup(nodes[1], id) assert any([expected_error in error for expected_error in expected_errors]) From 6c104e7c73c454d5c4158365e5625be94ed3c5eb Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 28 Jul 2024 16:16:53 +0200 Subject: [PATCH 23/34] Simplify test test_backup_restore_on_cluster/test_disallow_concurrency --- .../test_disallow_concurrency.py | 256 ++++++++---------- 1 file changed, 117 insertions(+), 139 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 7ce28cbdc96..f3b6993c4fd 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -9,13 +9,13 @@ import re cluster = ClickHouseCluster(__file__) -num_nodes = 10 +num_nodes = 2 def generate_cluster_def(): path = os.path.join( os.path.dirname(os.path.realpath(__file__)), - "./_gen/cluster_for_concurrency_test.xml", + "./_gen/cluster_for_test_disallow_concurrency.xml", ) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w") as f: @@ -111,22 +111,56 @@ def create_and_fill_table(): nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") -def wait_for_fail_backup(node, backup_id): +def get_status_and_error(node, backup_or_restore_id): + return ( + node.query( + f"SELECT status, error FROM system.backups WHERE id == '{backup_or_restore_id}'" + ) + .rstrip("\n") + .split("\t") + ) + + +def wait_for_backup(node, backup_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "BACKUP_CREATED", + sleep_time=2, + retry_count=50, + ) + + +def wait_for_restore(node, restore_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id == '{restore_id}'", + "RESTORED", + sleep_time=2, + retry_count=50, + ) + + +def check_backup_error(error): expected_errors = [ "Concurrent backups not supported", "BACKUP_ALREADY_EXISTS", ] - status = node.query( - f"SELECT status FROM system.backups WHERE id == '{backup_id}'" - ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + + +def check_restore_error(error): + expected_errors = [ + "Concurrent restores not supported", + "Cannot restore the table default.tbl because it already contains some data", + ] + assert any([expected_error in error for expected_error in expected_errors]) + + +def wait_for_backup_failure(node, backup_id): + status, error = get_status_and_error(node, backup_id) # It is possible that the second backup was picked up first, and then the async backup - if status == "BACKUP_FAILED": - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{backup_id}'" - ).rstrip("\n") - assert any([expected_error in error for expected_error in expected_errors]) - return - elif status == "CREATING_BACKUP": + if status == "CREATING_BACKUP": assert_eq_with_retry( node, f"SELECT status FROM system.backups WHERE id = '{backup_id}'", @@ -134,31 +168,17 @@ def wait_for_fail_backup(node, backup_id): sleep_time=2, retry_count=50, ) - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{backup_id}'" - ).rstrip("\n") - assert any([expected_error in error for expected_error in expected_errors]) - return + status, error = get_status_and_error(node, backup_id) + if status == "BACKUP_FAILED": + check_backup_error(error) else: assert False, "Concurrent backups both passed, when one is expected to fail" -def wait_for_fail_restore(node, restore_id): - expected_errors = [ - "Concurrent restores not supported", - "Cannot restore the table default.tbl because it already contains some data", - ] - status = node.query( - f"SELECT status FROM system.backups WHERE id == '{restore_id}'" - ).rstrip("\n") +def wait_for_restore_failure(node, restore_id): + status, error = get_status_and_error(node, restore_id) # It is possible that the second backup was picked up first, and then the async backup - if status == "RESTORE_FAILED": - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{restore_id}'" - ).rstrip("\n") - assert any([expected_error in error for expected_error in expected_errors]) - return - elif status == "RESTORING": + if status == "RESTORING": assert_eq_with_retry( node, f"SELECT status FROM system.backups WHERE id = '{restore_id}'", @@ -166,14 +186,9 @@ def wait_for_fail_restore(node, restore_id): sleep_time=2, retry_count=50, ) - error = node.query( - f"SELECT error FROM system.backups WHERE id == '{restore_id}'" - ).rstrip("\n") - assert re.search( - "Cannot restore the table default.tbl because it already contains some data", - error, - ) - return + status, error = get_status_and_error(node, restore_id) + if status == "RESTORE_FAILED": + check_restore_error(error) else: assert False, "Concurrent restores both passed, when one is expected to fail" @@ -188,39 +203,28 @@ def test_concurrent_backups_on_same_node(): backup_name = new_backup_name() - id = ( + # Backup #1. + id, status = ( nodes[0] .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") - .rstrip("\n") - ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - result, error = nodes[0].query_and_get_answer_with_error( + # Backup #2. + _, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) - expected_errors = [ - "Concurrent backups not supported", - "BACKUP_ALREADY_EXISTS", - ] - if not error: - wait_for_fail_backup(nodes[0], id) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id = '{id}'", - "BACKUP_CREATED", - sleep_time=2, - retry_count=50, - ) + if error: + # Backup #2 failed, backup #1 should be successful. + check_backup_error(error) + wait_for_backup(nodes[0], id) + else: + # Backup #2 was successful, backup #1 should fail. + wait_for_backup_failure(nodes[0], id) # This restore part is added to confirm creating an internal backup & restore work # even when a concurrent backup is stopped @@ -238,40 +242,38 @@ def test_concurrent_backups_on_different_nodes(): backup_name = new_backup_name() - id = ( - nodes[1] + # Backup #1. + id, status = ( + nodes[0] .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[1] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") - .rstrip("\n") - ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - result, error = nodes[0].query_and_get_answer_with_error( + # Backup #2. + _, error = nodes[1].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) - expected_errors = [ - "Concurrent backups not supported", - "BACKUP_ALREADY_EXISTS", - ] + if error: + # Backup #2 failed, backup #1 should be successful. + check_backup_error(error) + wait_for_backup(nodes[0], id) + else: + # Backup #2 was successful, backup #1 should fail. + wait_for_backup_failure(nodes[0], id) - if not error: - wait_for_fail_backup(nodes[1], id) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[1], - f"SELECT status FROM system.backups WHERE id = '{id}'", - "BACKUP_CREATED", - sleep_time=2, - retry_count=50, + # This restore part is added to confirm creating an internal backup & restore work + # even when a concurrent backup is stopped + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC", + settings={ + "distributed_ddl_task_timeout": 360, + }, ) + nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") def test_concurrent_restores_on_same_node(): @@ -288,40 +290,28 @@ def test_concurrent_restores_on_same_node(): }, ) - restore_id = ( + # Restore #1. + restore_id, status = ( nodes[0] .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") - .rstrip("\n") - ) assert status in ["RESTORING", "RESTORED"] - result, error = nodes[0].query_and_get_answer_with_error( + # Restore #2. + _, error = nodes[0].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) - expected_errors = [ - "Concurrent restores not supported", - "Cannot restore the table default.tbl because it already contains some data", - ] - - if not error: - wait_for_fail_restore(nodes[0], restore_id) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id == '{restore_id}'", - "RESTORED", - sleep_time=2, - retry_count=50, - ) + if error: + # Restore #2 failed, restore #1 should be successful. + check_restore_error(error) + wait_for_restore(nodes[0], restore_id) + else: + # Restore #2 was successful, restore #1 should fail. + wait_for_restore_failure(nodes[0], restore_id) def test_concurrent_restores_on_different_node(): @@ -338,37 +328,25 @@ def test_concurrent_restores_on_different_node(): }, ) - restore_id = ( + # Restore #1. + restore_id, status = ( nodes[0] .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") - .split("\t")[0] + .rstrip("\n") + .split("\t") ) - status = ( - nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") - .rstrip("\n") - ) assert status in ["RESTORING", "RESTORED"] - result, error = nodes[1].query_and_get_answer_with_error( + # Restore #2. + _, error = nodes[1].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) - expected_errors = [ - "Concurrent restores not supported", - "Cannot restore the table default.tbl because it already contains some data", - ] - - if not error: - wait_for_fail_restore(nodes[0], restore_id) - - assert any([expected_error in error for expected_error in expected_errors]) - - assert_eq_with_retry( - nodes[0], - f"SELECT status FROM system.backups WHERE id == '{restore_id}'", - "RESTORED", - sleep_time=2, - retry_count=50, - ) + if error: + # Restore #2 failed, restore #1 should be successful. + check_restore_error(error) + wait_for_restore(nodes[0], restore_id) + else: + # Restore #2 was successful, restore #1 should fail. + wait_for_restore_failure(nodes[0], restore_id) From 634c513ba710ce3b428641593d7730130a34a338 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 29 Jul 2024 13:19:24 +0200 Subject: [PATCH 24/34] Fix cleanup in test_backup_restore_on_cluster/test.py::test_system_functions --- tests/integration/test_backup_restore_on_cluster/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 700ed6f15f5..1b7f4aaa97d 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -776,7 +776,6 @@ def test_system_users(): def test_system_functions(): node1.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;") - node1.query("CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');") backup_name = new_backup_name() @@ -817,6 +816,9 @@ def test_system_functions(): [[0, "even"], [1, "odd"], [2, "even"]] ) + node1.query("DROP FUNCTION linear_equation") + node1.query("DROP FUNCTION parity_str") + def test_projection(): node1.query( From 9f8c90065e5beff8a6e8d763123bbda672371293 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 29 Jul 2024 14:16:20 +0200 Subject: [PATCH 25/34] Fix docker jobs dependencies --- .github/actions/release/action.yml | 10 ++++-- tests/ci/ci_config.py | 4 +-- tests/ci/docker_server.py | 53 ++++++++++++------------------ 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml index c3897682a33..94f178738e3 100644 --- a/.github/actions/release/action.yml +++ b/.github/actions/release/action.yml @@ -145,7 +145,9 @@ runs: cd "./tests/ci" python3 ./create_release.py --set-progress-started --progress "docker server release" export CHECK_NAME="Docker server image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + git checkout ${{ env.RELEASE_TAG }} + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" ${{ ! inputs.dry-run && '--push' || '' }} + git checkout - python3 ./create_release.py --set-progress-completed - name: Docker clickhouse/clickhouse-keeper building if: ${{ inputs.type == 'patch' }} @@ -154,15 +156,19 @@ runs: cd "./tests/ci" python3 ./create_release.py --set-progress-started --progress "docker keeper release" export CHECK_NAME="Docker keeper image" - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + git checkout ${{ env.RELEASE_TAG }} + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" ${{ ! inputs.dry-run && '--push' || '' }} + git checkout - python3 ./create_release.py --set-progress-completed - name: Set current Release progress to Completed with OK shell: bash run: | + git checkout "$GITHUB_REF_NAME" python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" python3 ./tests/ci/create_release.py --set-progress-completed - name: Post Slack Message if: ${{ !cancelled() }} shell: bash run: | + git checkout "$GITHUB_REF_NAME" python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }} diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a44b15f34c1..72c50886d86 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -508,10 +508,10 @@ class CI: runner_type=Runners.STYLE_CHECKER, ), JobNames.DOCKER_SERVER: CommonJobConfigs.DOCKER_SERVER.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE] + required_builds=[BuildNames.PACKAGE_RELEASE, BuildNames.PACKAGE_AARCH64] ), JobNames.DOCKER_KEEPER: CommonJobConfigs.DOCKER_SERVER.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE] + required_builds=[BuildNames.PACKAGE_RELEASE, BuildNames.PACKAGE_AARCH64] ), JobNames.DOCS_CHECK: JobConfig( digest=DigestConfig( diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 413c35cbebe..8ee0ed84120 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -62,12 +62,6 @@ def parse_args() -> argparse.Namespace: help="a version to build, automaticaly got from version_helper, accepts either " "tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format", ) - parser.add_argument( - "--sha", - type=str, - default="", - help="sha of the commit to use packages from", - ) parser.add_argument( "--release-type", type=str, @@ -128,17 +122,9 @@ def parse_args() -> argparse.Namespace: def retry_popen(cmd: str, log_file: Path) -> int: max_retries = 2 + sleep_seconds = 10 + retcode = -1 for retry in range(max_retries): - # From time to time docker build may failed. Curl issues, or even push - # It will sleep progressively 5, 15, 30 and 50 seconds between retries - progressive_sleep = 5 * sum(i + 1 for i in range(retry)) - if progressive_sleep: - logging.warning( - "The following command failed, sleep %s before retry: %s", - progressive_sleep, - cmd, - ) - time.sleep(progressive_sleep) with TeePopen( cmd, log_file=log_file, @@ -146,7 +132,14 @@ def retry_popen(cmd: str, log_file: Path) -> int: retcode = process.wait() if retcode == 0: return 0 - + else: + # From time to time docker build may failed. Curl issues, or even push + logging.error( + "The following command failed, sleep %s before retry: %s", + sleep_seconds, + cmd, + ) + time.sleep(sleep_seconds) return retcode @@ -377,21 +370,6 @@ def main(): direct_urls: Dict[str, List[str]] = {} for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): - if args.bucket_prefix: - assert not args.allow_build_reuse - repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" - elif args.sha: - # CreateRelease workflow only. TODO - version = args.version - repo_urls[arch] = ( - f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{version.major}.{version.minor}/{args.sha}/{build_name}" - ) - else: - # In all other cases urls must be fetched from build reports. TODO: script needs refactoring - repo_urls[arch] = "" - assert args.allow_build_reuse - if args.allow_build_reuse: # read s3 urls from pre-downloaded build reports if "clickhouse-server" in image_repo: @@ -413,6 +391,17 @@ def main(): for url in urls if any(package in url for package in PACKAGES) and "-dbg" not in url ] + elif args.bucket_prefix: + assert not args.allow_build_reuse + repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" + print(f"Bucket prefix is set: Fetching packages from [{repo_urls}]") + else: + version = args.version + repo_urls[arch] = ( + f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" + f"{version.major}.{version.minor}/{git.sha}/{build_name}" + ) + print(f"Fetching packages from [{repo_urls}]") if push: docker_login() From 0ce81e7570acc49976a0e3242e7cda3a2f1fa510 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 29 Jul 2024 14:19:31 +0200 Subject: [PATCH 26/34] Create release workflow to use docker build script from master branch --- .github/actions/release/action.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml index 94f178738e3..c3897682a33 100644 --- a/.github/actions/release/action.yml +++ b/.github/actions/release/action.yml @@ -145,9 +145,7 @@ runs: cd "./tests/ci" python3 ./create_release.py --set-progress-started --progress "docker server release" export CHECK_NAME="Docker server image" - git checkout ${{ env.RELEASE_TAG }} - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" ${{ ! inputs.dry-run && '--push' || '' }} - git checkout - + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} python3 ./create_release.py --set-progress-completed - name: Docker clickhouse/clickhouse-keeper building if: ${{ inputs.type == 'patch' }} @@ -156,19 +154,15 @@ runs: cd "./tests/ci" python3 ./create_release.py --set-progress-started --progress "docker keeper release" export CHECK_NAME="Docker keeper image" - git checkout ${{ env.RELEASE_TAG }} - python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" ${{ ! inputs.dry-run && '--push' || '' }} - git checkout - + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} python3 ./create_release.py --set-progress-completed - name: Set current Release progress to Completed with OK shell: bash run: | - git checkout "$GITHUB_REF_NAME" python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" python3 ./tests/ci/create_release.py --set-progress-completed - name: Post Slack Message if: ${{ !cancelled() }} shell: bash run: | - git checkout "$GITHUB_REF_NAME" python3 ./tests/ci/create_release.py --post-status ${{ inputs.dry-run && '--dry-run' || '' }} From fd2dc474a1073d6d151d6f508d16a2f48296004d Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 29 Jul 2024 14:26:02 +0200 Subject: [PATCH 27/34] docker server fixes --- tests/ci/docker_server.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 8ee0ed84120..479d63f1a3b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -62,6 +62,12 @@ def parse_args() -> argparse.Namespace: help="a version to build, automaticaly got from version_helper, accepts either " "tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format", ) + parser.add_argument( + "--sha", + type=str, + default="", + help="sha of the commit to use packages from", + ) parser.add_argument( "--release-type", type=str, @@ -395,13 +401,17 @@ def main(): assert not args.allow_build_reuse repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" print(f"Bucket prefix is set: Fetching packages from [{repo_urls}]") - else: + elif args.sha: version = args.version repo_urls[arch] = ( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{version.major}.{version.minor}/{git.sha}/{build_name}" + f"{version.major}.{version.minor}/{args.sha}/{build_name}" ) print(f"Fetching packages from [{repo_urls}]") + else: + assert ( + False + ), "--sha, --bucket_prefix or --allow-build-reuse (to fetch packages from build report) must be provided" if push: docker_login() From b271a9dc8ca85ca96ac5f15c583c142f7d63696e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 29 Jul 2024 15:44:25 +0200 Subject: [PATCH 28/34] Don't detach from thread group if running from main thread --- src/Interpreters/Aggregator.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e073b7a49b6..90bbde8be35 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2997,7 +2997,11 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari std::unique_ptr thread_pool; if (max_threads > 1 && total_input_rows > 100000) /// TODO Make a custom threshold. - thread_pool = std::make_unique(CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, max_threads); + thread_pool = std::make_unique( + CurrentMetrics::AggregatorThreads, + CurrentMetrics::AggregatorThreadsActive, + CurrentMetrics::AggregatorThreadsScheduled, + max_threads); for (const auto & bucket_blocks : bucket_to_blocks) { @@ -3009,7 +3013,10 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari result.aggregates_pools.push_back(std::make_shared()); Arena * aggregates_pool = result.aggregates_pools.back().get(); - auto task = [group = CurrentThread::getGroup(), bucket, &merge_bucket, aggregates_pool]{ merge_bucket(bucket, aggregates_pool, group); }; + /// if we don't use thread pool we don't need to attach and definitely don't want to detach from the thread group + /// because this thread is already attached + auto task = [group = thread_pool != nullptr ? CurrentThread::getGroup() : nullptr, bucket, &merge_bucket, aggregates_pool] + { merge_bucket(bucket, aggregates_pool, group); }; if (thread_pool) thread_pool->scheduleOrThrowOnError(task); From 4417a9ae6f4d175761bccd01dc15f0648ef25e0b Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 29 Jul 2024 16:14:45 +0200 Subject: [PATCH 29/34] style fix --- tests/ci/docker_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 479d63f1a3b..3e782c079c6 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -130,7 +130,7 @@ def retry_popen(cmd: str, log_file: Path) -> int: max_retries = 2 sleep_seconds = 10 retcode = -1 - for retry in range(max_retries): + for _retry in range(max_retries): with TeePopen( cmd, log_file=log_file, From d2f89bc7f862073720f10a8fcec1cfb956c0eaae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jul 2024 16:57:06 +0200 Subject: [PATCH 30/34] Fix bad test `01036_no_superfluous_dict_reload_on_create_database` --- tests/queries/0_stateless/01018_ddl_dictionaries_select.sql | 2 +- .../01036_no_superfluous_dict_reload_on_create_database.sql | 3 +++ tests/queries/0_stateless/01257_dictionary_mismatch_types.sql | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 4b9b15bee8f..95a5f2a0708 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -139,6 +139,6 @@ SELECT {CLICKHOUSE_DATABASE:String} || '.dict3' as n, dictGet(n, 'some_column', DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; -SYSTEM RELOAD DICTIONARIES; -- {serverError UNKNOWN_TABLE} +SYSTEM RELOAD DICTIONARY dict1; -- {serverError UNKNOWN_TABLE} SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); diff --git a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql index d0841124706..1334780a5af 100644 --- a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql +++ b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql @@ -1,3 +1,6 @@ +-- Tags: no-parallel +-- Does not allow if other tests do SYSTEM RELOAD DICTIONARIES at the same time. + CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); CREATE DICTIONARY dict ( diff --git a/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql b/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql index a4bb7bf2525..38349d8291b 100644 --- a/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql +++ b/tests/queries/0_stateless/01257_dictionary_mismatch_types.sql @@ -98,7 +98,7 @@ ORDER BY (col1, col2, col3, col4, col5); SET input_format_null_as_default = 1; INSERT INTO test_dict_db.table1 VALUES ('id1',1,'20200127-1',701,'20200127-1-01',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',701,'20200127-1-01',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',702,'20200127-1-02',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',703,'20200127-1-03',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',704,'20200127-1-04',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200127-1',705,'20200127-1-05',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:37:59',NULL,'2020-02-04 11:35:14','2020-02-08 05:32:04',NULL,NULL,'12345'),('id1',1,'20200202-1',711,'20200202-1-01',0,200,NULL,'C2','Hello','C40',NULL,1,'2020-02-03 11:07:57',NULL,NULL,NULL,'2020-02-03 11:09:23',NULL,NULL),('id1',1,'20200202-2',712,'20200202-2-01',0,0,NULL,'C3','bye','R40',NULL,1,'2020-02-03 14:13:10',NULL,'2020-02-03 16:11:31','2020-02-07 05:32:05','2020-02-07 11:18:15','2020-02-07 11:18:16','123455'),('id1',1,'20200202-2',713,'20200202-2-02',0,0,NULL,'C3','bye','R40',NULL,1,'2020-02-03 14:13:10',NULL,'2020-02-03 16:11:31','2020-02-07 05:32:05','2020-02-07 11:18:15','2020-02-07 11:18:16','123455'),('id1',2,'20200128-1',701,'20200128-1-01',0,0,NULL,'N1','Hi','N40',NULL,2,'2020-02-03 17:07:27',NULL,'2020-02-05 13:33:55','2020-02-13 05:32:04',NULL,NULL,'A123755'),('id1',2,'20200131-1',701,'20200131-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 13:07:17',NULL,'2020-02-04 13:47:55','2020-02-12 05:32:04',NULL,NULL,'A123485'),('id1',2,'20200201-1',701,'20200201-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 21:07:37',NULL,'2020-02-05 13:40:51','2020-02-13 05:32:04',NULL,NULL,'A123455'),('id1',2,'20200202-1',711,'20200202-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 02:06:54',NULL,'2020-02-04 13:36:45','2020-02-12 05:32:04',NULL,NULL,'A123459'),('id1',2,'20200202-1',712,'20200202-1-02',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 02:06:54',NULL,'2020-02-04 13:36:45','2020-02-12 05:32:04',NULL,NULL,'A123429'),('id2',1,'20200131-1',401,'20200131-1-01',0,210,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',402,'20200131-1-02',0,210,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',403,'20200131-1-03',0,270,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',404,'20200131-1-04',0,270,'2020-02-16 05:22:04','N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:05',NULL,NULL,'454545'),('id2',1,'20200131-1',405,'20200131-1-05',0,380,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-11 16:52:58','2020-02-15 05:32:04',NULL,NULL,'6892144935823'),('id2',1,'20200131-1',406,'20200131-1-06',0,380,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-11 16:52:58','2020-02-15 05:32:04',NULL,NULL,'6892144935823'),('id2',1,'20200131-1',407,'20200131-1-07',0,280,NULL,'C2','Hello','C40',NULL,1,'2020-02-03 10:11:00',NULL,NULL,NULL,'2020-02-04 11:01:21',NULL,NULL),('id2',1,'20200131-1',408,'20200131-1-08',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:11:00',NULL,'2020-02-05 17:30:05','2020-02-09 05:32:04',NULL,NULL,'454545'),('id2',1,'20200201-1',401,'20200201-1-01',0,190,'2020-02-16 05:22:05','N1','Hi','N40',NULL,1,'2020-02-03 12:06:17',NULL,'2020-02-05 17:30:30','2020-02-09 05:32:03',NULL,NULL,'90071'),('id2',1,'20200201-1',402,'20200201-1-01',0,160,'2020-02-14 05:22:13','N1','Hi','N40',NULL,1,'2020-02-03 06:21:05',NULL,'2020-02-03 17:42:35','2020-02-07 05:32:04',NULL,NULL,'96575'),('id2',1,'20200201-1',403,'20200201-1-02',0,230,'2020-02-14 05:22:13','N1','Hi','N40',NULL,1,'2020-02-03 06:21:05',NULL,'2020-02-03 17:42:35','2020-02-07 05:32:04',NULL,NULL,'96575'),('id2',1,'20200202-1',404,'20200202-1-01',0,130,'2020-02-14 05:22:14','N1','Hi','N40',NULL,1,'2020-02-03 14:00:39',NULL,'2020-02-03 17:42:45','2020-02-07 05:32:04',NULL,NULL,'96850'),('id3',1,'20200130-1',391,'20200130-1-01',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:26:46',NULL,'2020-02-05 15:33:01','2020-02-08 05:32:05',NULL,NULL,'27243'),('id3',1,'20200130-1',392,'20200130-1-02',0,300,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:26:46',NULL,'2020-02-10 16:16:11','2020-02-13 05:32:06',NULL,NULL,'92512'),('id3',1,'20200131-1',393,'20200131-1-01',0,0,NULL,'C2','Hello','C40',NULL,1,'2020-02-03 10:24:38',NULL,NULL,NULL,'2020-02-05 14:04:40',NULL,NULL),('id3',1,'20200131-2',391,'20200131-1-01',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:22:08',NULL,'2020-02-06 14:27:06','2020-02-09 05:32:04',NULL,NULL,'46433'),('id3',1,'20200131-2',392,'20200131-1-02',0,0,NULL,'N1','Hi','N40',NULL,1,'2020-02-03 10:22:08',NULL,'2020-02-06 14:27:06','2020-02-09 05:32:02',NULL,NULL,'46433'); -SYSTEM RELOAD DICTIONARIES; +SYSTEM RELOAD DICTIONARY test_dict_db.table1_dict; SELECT dictGet('test_dict_db.table1_dict', 'col6', (col1, col2, col3, col4, col5)), From 064fe8c3f551a634f50543f1cfadd164f08173d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jul 2024 17:13:38 +0200 Subject: [PATCH 31/34] Fix tests --- tests/queries/0_stateless/01018_ddl_dictionaries_select.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql index 95a5f2a0708..4c4bcc440ca 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_select.sql @@ -139,6 +139,6 @@ SELECT {CLICKHOUSE_DATABASE:String} || '.dict3' as n, dictGet(n, 'some_column', DROP TABLE {CLICKHOUSE_DATABASE:Identifier}.table_for_dict; -SYSTEM RELOAD DICTIONARY dict1; -- {serverError UNKNOWN_TABLE} +SYSTEM RELOAD DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict3; -- {serverError UNKNOWN_TABLE} SELECT dictGetString({CLICKHOUSE_DATABASE:String} || '.dict3', 'some_column', toUInt64(12)); From 75728ac56d83b85e476162a745686837cb194b73 Mon Sep 17 00:00:00 2001 From: Halersson Paris <142428374+halersson@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:42:58 -0300 Subject: [PATCH 32/34] Fix typo --- src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index e837d4d5e20..bc5e8292192 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -420,7 +420,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() int num_row_groups = metadata->num_row_groups(); row_group_batches.reserve(num_row_groups); - auto adative_chunk_size = [&](int row_group_idx) -> size_t + auto adaptive_chunk_size = [&](int row_group_idx) -> size_t { size_t total_size = 0; auto row_group_meta = metadata->RowGroup(row_group_idx); @@ -457,7 +457,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() row_group_batches.back().row_groups_idxs.push_back(row_group); row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows(); row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size(); - auto rows = adative_chunk_size(row_group); + auto rows = adaptive_chunk_size(row_group); row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size; } } From fea03cf46ff29aa398b08d86ae77361fe85d7c40 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 29 Jul 2024 21:07:24 +0200 Subject: [PATCH 33/34] Build results fix --- tests/ci/ci.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index e30062c32ff..935fe472e50 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -766,7 +766,9 @@ def _upload_build_artifacts( int(job_report.duration), GITHUB_JOB_API_URL(), head_ref=pr_info.head_ref, - pr_number=pr_info.number, + # PRInfo fetches pr number for release branches as well - set pr_number to 0 for release + # so that build results are not mistakenly treated as feature branch builds + pr_number=pr_info.number if pr_info.is_pr else 0, ) report_url = ci_cache.upload_build_report(build_result) print(f"Report file has been uploaded to [{report_url}]") From 3df2d88cf13ad552058a6958630741d7cdab9d3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jul 2024 21:09:11 +0200 Subject: [PATCH 34/34] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07b37835dda..620b7c99bac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,7 +45,6 @@ * Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)). #### Performance Improvement -* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)). * Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)). * Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). * Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)).