Merge remote-tracking branch 'origin/master' into nomv

This commit is contained in:
Michael Kolupaev 2024-07-29 20:56:00 +00:00
commit 765cbf6092
498 changed files with 7198 additions and 3340 deletions

View File

@ -93,21 +93,21 @@ jobs:
with:
stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }}
Tests_2:
Tests_2_ww:
needs: [RunConfig, Builds_2]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2_ww
data: ${{ needs.RunConfig.outputs.data }}
Tests_2:
# Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there.
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
Tests_3:
# Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there.
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_3
data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
# Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3
@ -123,7 +123,7 @@ jobs:
FinishCheck:
if: ${{ !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -133,6 +133,7 @@ jobs:
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
if: ${{ !cancelled() }}
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'

View File

@ -123,20 +123,20 @@ jobs:
stage: Builds_2
data: ${{ needs.RunConfig.outputs.data }}
# stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected
Tests_2:
Tests_2_ww:
needs: [RunConfig, Builds_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2_ww') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2_ww
data: ${{ needs.RunConfig.outputs.data }}
Tests_2:
needs: [RunConfig, Builds_1, Tests_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_2
data: ${{ needs.RunConfig.outputs.data }}
Tests_3:
needs: [RunConfig, Builds_1, Tests_1]
if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
uses: ./.github/workflows/reusable_test_stage.yml
with:
stage: Tests_3
data: ${{ needs.RunConfig.outputs.data }}
################################# Reports #################################
# Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
@ -154,7 +154,7 @@ jobs:
if: ${{ !cancelled() }}
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
# however, set them as "needs" to get all checks results before the automatic merge occurs.
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -178,7 +178,7 @@ jobs:
#
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2_ww, Tests_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code

2
contrib/icu vendored

@ -1 +1 @@
Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668
Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625

View File

@ -4,7 +4,9 @@ else ()
option(ENABLE_ICU "Enable ICU" 0)
endif ()
if (NOT ENABLE_ICU)
# Temporarily disabled s390x because the ICU build links a blob (icudt71b_dat.S) and our friends from IBM did not explain how they generated
# the blob on s390x: https://github.com/ClickHouse/icudata/pull/2#issuecomment-2226957255
if (NOT ENABLE_ICU OR ARCH_S390X)
message(STATUS "Not using ICU")
return()
endif()
@ -12,8 +14,6 @@ endif()
set(ICU_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/source")
set(ICUDATA_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/icudata/")
set (CMAKE_CXX_STANDARD 17)
# These lists of sources were generated from build log of the original ICU build system (configure + make).
set(ICUUC_SOURCES
@ -462,9 +462,9 @@ file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ")
enable_language(ASM)
if (ARCH_S390X)
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" )
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S" )
else()
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" )
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" )
endif()
set(ICUDATA_SOURCES

2
contrib/icudata vendored

@ -1 +1 @@
Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab
Subproject commit d345d6ac22f381c882420de9053d30ae1ff38d75

View File

@ -261,9 +261,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -251,9 +251,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -247,12 +247,22 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
START_TIME=${SECONDS}
set +e
timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
set -e
DURATION=$((START_TIME - SECONDS))
echo "Elapsed ${DURATION} seconds."
if [[ $DURATION -ge $TIMEOUT ]]
then
echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds."
fi
}
export -f run_tests
@ -264,7 +274,7 @@ if [ "$NUM_TRIES" -gt "1" ]; then
# We don't run tests with Ordinary database in PRs, only in master.
# So run new/changed tests with Ordinary at least once in flaky check.
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
fi
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:

View File

@ -45,9 +45,12 @@ function timeout_with_logging() {
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
if [[ "${exit_code}" -eq "124" ]]
then
echo "The command 'timeout ${*}' has been killed by timeout"
echo "The command 'timeout ${*}' has been killed by timeout."
else
echo "No, it isn't a timeout."
fi
return $exit_code

View File

@ -54,7 +54,7 @@ CREATE TABLE keeper_map_table
`v2` String,
`v3` Float32
)
ENGINE = KeeperMap(/keeper_map_table, 4)
ENGINE = KeeperMap('/keeper_map_table', 4)
PRIMARY KEY key
```

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,7 @@ These actions are described in detail below.
ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
```
Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#codecs) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)).
Adds a new column to the table with the specified `name`, `type`, [`codec`](../create/table.md/#column_compression_codec) and `default_expr` (see the section [Default expressions](/docs/en/sql-reference/statements/create/table.md/#create-default-values)).
If the `IF NOT EXISTS` clause is included, the query wont return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
@ -155,7 +155,7 @@ This query changes the `name` column properties:
- Column-level Settings
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs).
For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#column_compression_codec).
For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).

View File

@ -209,8 +209,8 @@ std::vector<String> Client::loadWarningMessages()
{} /* query_parameters */,
"" /* query_id */,
QueryProcessingStage::Complete,
&global_context->getSettingsRef(),
&global_context->getClientInfo(), false, {});
&client_context->getSettingsRef(),
&client_context->getClientInfo(), false, {});
while (true)
{
Packet packet = connection->receivePacket();
@ -306,9 +306,6 @@ void Client::initialize(Poco::Util::Application & self)
if (env_password && !config().has("password"))
config().setString("password", env_password);
// global_context->setApplicationType(Context::ApplicationType::CLIENT);
global_context->setQueryParameters(query_parameters);
/// settings and limits could be specified in config file, but passed settings has higher priority
for (const auto & setting : global_context->getSettingsRef().allUnchanged())
{
@ -382,7 +379,7 @@ try
showWarnings();
/// Set user password complexity rules
auto & access_control = global_context->getAccessControl();
auto & access_control = client_context->getAccessControl();
access_control.setPasswordComplexityRules(connection->getPasswordComplexityRules());
if (is_interactive && !delayed_interactive)
@ -459,7 +456,7 @@ void Client::connect()
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
connection = Connection::createConnection(connection_parameters, global_context);
connection = Connection::createConnection(connection_parameters, client_context);
if (max_client_network_bandwidth)
{
@ -528,7 +525,7 @@ void Client::connect()
}
}
if (!global_context->getSettingsRef().use_client_time_zone)
if (!client_context->getSettingsRef().use_client_time_zone)
{
const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
if (!time_zone.empty())
@ -611,7 +608,7 @@ void Client::printChangedSettings() const
}
};
print_changes(global_context->getSettingsRef().changes(), "settings");
print_changes(client_context->getSettingsRef().changes(), "settings");
print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
}
@ -709,7 +706,7 @@ bool Client::processWithFuzzing(const String & full_query)
{
const char * begin = full_query.data();
orig_ast = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
@ -733,7 +730,7 @@ bool Client::processWithFuzzing(const String & full_query)
}
// Kusto is not a subject for fuzzing (yet)
if (global_context->getSettingsRef().dialect == DB::Dialect::kusto)
if (client_context->getSettingsRef().dialect == DB::Dialect::kusto)
{
return true;
}
@ -1138,8 +1135,6 @@ void Client::processOptions(const OptionsDescription & options_description,
if ((query_fuzzer_runs = options["query-fuzzer-runs"].as<int>()))
{
// Fuzzer implies multiquery.
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
config().setBool("ignore-error", true);
ignore_error = true;
@ -1147,8 +1142,6 @@ void Client::processOptions(const OptionsDescription & options_description,
if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as<int>()))
{
// Fuzzer implies multiquery.
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
config().setBool("ignore-error", true);
@ -1166,6 +1159,11 @@ void Client::processOptions(const OptionsDescription & options_description,
if (options.count("opentelemetry-tracestate"))
global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as<std::string>();
/// In case of clickhouse-client the `client_context` can be just an alias for the `global_context`.
/// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
client_context = global_context;
initClientContext();
}
@ -1199,17 +1197,9 @@ void Client::processConfig()
}
print_stack_trace = config().getBool("stacktrace", false);
if (config().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
setDefaultFormatsAndCompressionFromConfiguration();
global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);
}
@ -1362,13 +1352,6 @@ void Client::readArguments(
allow_repeated_settings = true;
else if (arg == "--allow_merge_tree_settings")
allow_merge_tree_settings = true;
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-')))
{
common_arguments.emplace_back(arg);

View File

@ -16,7 +16,6 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
bool processWithFuzzing(const String & full_query) override;

View File

@ -295,6 +295,8 @@ void LocalServer::cleanup()
if (suggest)
suggest.reset();
client_context.reset();
if (global_context)
{
global_context->shutdown();
@ -436,7 +438,7 @@ void LocalServer::connect()
in = input.get();
}
connection = LocalConnection::createConnection(
connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name);
connection_parameters, client_context, in, need_render_progress, need_render_profile_events, server_display_name);
}
@ -497,8 +499,6 @@ try
initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
applyCmdSettings(global_context);
/// try to load user defined executable functions, throw on error and die
try
{
@ -510,6 +510,11 @@ try
throw;
}
/// Must be called after we stopped initializing the global context and changing its settings.
/// After this point the global context must be stayed almost unchanged till shutdown,
/// and all necessary changes must be made to the client context instead.
createClientContext();
if (is_interactive)
{
clearTerminal();
@ -564,9 +569,6 @@ void LocalServer::processConfig()
if (!queries.empty() && getClientConfiguration().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
if (getClientConfiguration().has("multiquery"))
is_multiquery = true;
pager = getClientConfiguration().getString("pager", "");
delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file"));
@ -735,6 +737,9 @@ void LocalServer::processConfig()
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(getClientConfiguration());
/// Command-line parameters can override settings from the default profile.
applyCmdSettings(global_context);
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
@ -778,10 +783,6 @@ void LocalServer::processConfig()
server_display_name = getClientConfiguration().getString("display_name", "");
prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) ");
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
global_context->setQueryParameters(query_parameters);
}
@ -860,6 +861,16 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context)
}
void LocalServer::createClientContext()
{
/// In case of clickhouse-local it's necessary to use a separate context for client-related purposes.
/// We can't just change the global context because it is used in background tasks (for example, in merges)
/// which don't expect that the global context can suddenly change.
client_context = Context::createCopy(global_context);
initClientContext();
}
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
{
if (options.count("table"))
@ -922,13 +933,6 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1));
}
}
else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
{
/// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
++arg_num;
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else
{
common_arguments.emplace_back(arg);

View File

@ -31,7 +31,6 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
void connect() override;
@ -50,7 +49,6 @@ protected:
void processConfig() override;
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &) override;
void updateLoggerLevel(const String & logs_level) override;
private:
@ -67,6 +65,8 @@ private:
void applyCmdOptions(ContextMutablePtr context);
void applyCmdSettings(ContextMutablePtr context);
void createClientContext();
ServerSettings server_settings;
std::optional<StatusFile> status;

View File

@ -0,0 +1,13 @@
<clickhouse>
<storage_configuration>
<disks>
<backups>
<type>local</type>
<path>/tmp/backups/</path>
</backups>
</disks>
</storage_configuration>
<backups>
<allowed_disk>backups</allowed_disk>
</backups>
</clickhouse>

View File

@ -0,0 +1 @@
../../../tests/config/config.d/enable_keeper_map.xml

View File

@ -0,0 +1 @@
../../../tests/config/config.d/session_log.xml

View File

@ -68,10 +68,13 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr;
}
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments:
/// [ (a <> b AND) ] (a IS NULL) AND (b IS NULL)
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
QueryTreeNodePtrWithHashSet is_null_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
@ -80,7 +83,11 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs,
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
{
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
is_null_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
}
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
@ -95,7 +102,7 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs,
return false;
}
if (all_arguments.size() != 2)
if (all_arguments.size() != 2 || is_null_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;

View File

@ -268,6 +268,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
}
}
const auto enable_order_by_all = updated_context->getSettingsRef().enable_order_by_all;
auto current_query_tree = std::make_shared<QueryNode>(std::move(updated_context), std::move(settings_changes));
current_query_tree->setIsSubquery(is_subquery);
@ -281,7 +283,10 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
/// order_by_all flag in AST is set w/o consideration of `enable_order_by_all` setting
/// since SETTINGS section has not been parsed yet, - so, check the setting here
if (enable_order_by_all)
current_query_tree->setIsOrderByAll(select_query_typed.order_by_all);
current_query_tree->setOriginalAST(select_query);
auto current_context = current_query_tree->getContext();

View File

@ -1740,7 +1740,7 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
if (!tuple_data_type)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Qualified matcher {} find non compound expression {} with type {}. Expected tuple or array of tuples. In scope {}",
"Qualified matcher {} found a non-compound expression {} with type {}. Expected a tuple or an array of tuples. In scope {}",
matcher_node->formatASTForErrorMessage(),
expression_query_tree_node->formatASTForErrorMessage(),
expression_query_tree_node->getResultType()->getName(),

View File

@ -226,6 +226,9 @@ add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
# Always compile this file with the highest possible level of optimizations, even in Debug builds.
# https://github.com/ClickHouse/ClickHouse/issues/65745
set_source_files_properties(Client/ClientBaseOptimizedParts.cpp PROPERTIES COMPILE_FLAGS "-O3")
add_object_library(clickhouse_bridge BridgeHelper)
add_object_library(clickhouse_server Server)
add_object_library(clickhouse_server_http Server/HTTP)

View File

@ -70,7 +70,6 @@
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/split.hpp>
#include <iostream>
#include <filesystem>
#include <limits>
@ -80,6 +79,8 @@
#include <Common/config_version.h>
#include "config.h"
#include <IO/ReadHelpers.h>
#include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
#if USE_GWP_ASAN
# include <Common/GWPAsan.h>
@ -108,7 +109,6 @@ namespace ErrorCodes
extern const int UNEXPECTED_PACKET_FROM_SERVER;
extern const int INVALID_USAGE_OF_INPUT;
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int CANNOT_OPEN_FILE;
extern const int FILE_ALREADY_EXISTS;
@ -478,7 +478,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query)
std::vector<ExternalTableDataPtr> data;
for (auto & table : external_tables)
data.emplace_back(table.getData(global_context));
data.emplace_back(table.getData(client_context));
connection->sendExternalTablesData(data);
}
@ -691,10 +691,10 @@ try
/// intermixed with data with parallel formatting.
/// It may increase code complexity significantly.
if (!extras_into_stdout || select_only_into_file)
output_format = global_context->getOutputFormatParallelIfPossible(
output_format = client_context->getOutputFormatParallelIfPossible(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
else
output_format = global_context->getOutputFormat(
output_format = client_context->getOutputFormat(
current_format, out_file_buf ? *out_file_buf : *out_buf, block);
output_format->setAutoFlush();
@ -746,14 +746,6 @@ void ClientBase::adjustSettings()
/// NOTE: Do not forget to set changed=false to avoid sending it to the server (to avoid breakage read only profiles)
/// In case of multi-query we allow data after semicolon since it will be
/// parsed by the client and interpreted as new query
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
settings.input_format_values_allow_data_after_semicolon = true;
settings.input_format_values_allow_data_after_semicolon.changed = false;
}
/// Do not limit pretty format output in case of --pager specified or in case of stdout is not a tty.
if (!pager.empty() || !stdout_is_a_tty)
{
@ -773,6 +765,15 @@ void ClientBase::adjustSettings()
global_context->setSettings(settings);
}
void ClientBase::initClientContext()
{
client_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
client_context->setQuotaClientKey(getClientConfiguration().getString("quota_key", ""));
client_context->setQueryKindInitial();
client_context->setQueryKind(query_kind);
client_context->setQueryParameters(query_parameters);
}
bool ClientBase::isRegularFile(int fd)
{
struct stat file_stat;
@ -963,7 +964,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
/// client-side. Thus we need to parse the query.
const char * begin = full_query.data();
auto parsed_query = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ false);
if (!parsed_query)
@ -986,7 +987,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
/// But for asynchronous inserts we don't extract data, because it's needed
/// to be done on server side in that case (for coalescing the data from multiple inserts on server side).
const auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && isSyncInsertWithData(*insert, global_context))
if (insert && isSyncInsertWithData(*insert, client_context))
query_to_execute = full_query.substr(0, insert->data - full_query.data());
else
query_to_execute = full_query;
@ -1104,7 +1105,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
}
}
const auto & settings = global_context->getSettingsRef();
const auto & settings = client_context->getSettingsRef();
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
int retries_left = 10;
@ -1119,10 +1120,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
connection_parameters.timeouts,
query,
query_parameters,
global_context->getCurrentQueryId(),
client_context->getCurrentQueryId(),
query_processing_stage,
&global_context->getSettingsRef(),
&global_context->getClientInfo(),
&client_context->getSettingsRef(),
&client_context->getClientInfo(),
true,
[&](const Progress & progress) { onProgress(progress); });
@ -1309,7 +1310,7 @@ void ClientBase::onProgress(const Progress & value)
void ClientBase::onTimezoneUpdate(const String & tz)
{
global_context->setSetting("session_timezone", tz);
client_context->setSetting("session_timezone", tz);
}
@ -1505,25 +1506,18 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de
void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
{
if (!global_context->hasInsertionTable() && insert_query.table)
if (!client_context->hasInsertionTable() && insert_query.table)
{
String table = insert_query.table->as<ASTIdentifier &>().shortName();
if (!table.empty())
{
String database = insert_query.database ? insert_query.database->as<ASTIdentifier &>().shortName() : "";
global_context->setInsertionTable(StorageID(database, table));
client_context->setInsertionTable(StorageID(database, table));
}
}
}
void ClientBase::addMultiquery(std::string_view query, Arguments & common_arguments) const
{
common_arguments.emplace_back("--multiquery");
common_arguments.emplace_back("-q");
common_arguments.emplace_back(query);
}
namespace
{
bool isStdinNotEmptyAndValid(ReadBufferFromFileDescriptor & std_in)
@ -1562,7 +1556,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in))))
{
const auto & settings = global_context->getSettingsRef();
const auto & settings = client_context->getSettingsRef();
if (settings.throw_if_no_data_to_insert)
throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert");
else
@ -1576,10 +1570,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
connection_parameters.timeouts,
query,
query_parameters,
global_context->getCurrentQueryId(),
client_context->getCurrentQueryId(),
query_processing_stage,
&global_context->getSettingsRef(),
&global_context->getClientInfo(),
&client_context->getSettingsRef(),
&client_context->getClientInfo(),
true,
[&](const Progress & progress) { onProgress(progress); });
@ -1627,7 +1621,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
/// Set callback to be called on file progress.
if (tty_buf)
progress_indication.setFileProgressCallback(global_context, *tty_buf);
progress_indication.setFileProgressCallback(client_context, *tty_buf);
}
/// If data fetched from file (maybe compressed file)
@ -1661,10 +1655,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
}
StorageFile::CommonArguments args{
WithContext(global_context),
WithContext(client_context),
parsed_insert_query->table_id,
current_format,
getFormatSettings(global_context),
getFormatSettings(client_context),
compression_method,
columns_for_storage_file,
ConstraintsDescription{},
@ -1672,7 +1666,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
{},
String{},
};
StoragePtr storage = std::make_shared<StorageFile>(in_file, global_context->getUserFilesPath(), args);
StoragePtr storage = std::make_shared<StorageFile>(in_file, client_context->getUserFilesPath(), args);
storage->startup();
SelectQueryInfo query_info;
@ -1683,16 +1677,16 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
storage->read(
plan,
sample.getNames(),
storage->getStorageSnapshot(metadata, global_context),
storage->getStorageSnapshot(metadata, client_context),
query_info,
global_context,
client_context,
{},
global_context->getSettingsRef().max_block_size,
client_context->getSettingsRef().max_block_size,
getNumberOfPhysicalCPUCores());
auto builder = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(global_context),
BuildQueryPipelineSettings::fromContext(global_context));
QueryPlanOptimizationSettings::fromContext(client_context),
BuildQueryPipelineSettings::fromContext(client_context));
QueryPlanResourceHolder resources;
auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources);
@ -1753,14 +1747,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes
current_format = insert->format;
}
auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
auto source = client_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size);
Pipe pipe(source);
if (columns_description.hasDefaults())
{
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, global_context);
return std::make_shared<AddingDefaultsTransform>(header, columns_description, *source, client_context);
});
}
@ -1922,12 +1916,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
global_context->setCurrentQueryId("");
client_context->setCurrentQueryId("");
// Generate a new query_id
for (const auto & query_id_format : query_id_formats)
{
writeString(query_id_format.first, std_out);
writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out);
writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", client_context->getCurrentQueryId())), std_out);
writeChar('\n', std_out);
std_out.next();
}
@ -1954,7 +1948,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
auto password = auth_data->getPassword();
if (password)
global_context->getAccessControl().checkPasswordComplexityRules(*password);
client_context->getAccessControl().checkPasswordComplexityRules(*password);
}
}
}
@ -1969,15 +1963,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
std::optional<Settings> old_settings;
SCOPE_EXIT_SAFE({
if (old_settings)
global_context->setSettings(*old_settings);
client_context->setSettings(*old_settings);
});
auto apply_query_settings = [&](const IAST & settings_ast)
{
if (!old_settings)
old_settings.emplace(global_context->getSettingsRef());
global_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
global_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
old_settings.emplace(client_context->getSettingsRef());
client_context->applySettingsChanges(settings_ast.as<ASTSetQuery>()->changes);
client_context->resetSettingsToDefaultValue(settings_ast.as<ASTSetQuery>()->default_settings);
};
const auto * insert = parsed_query->as<ASTInsertQuery>();
@ -2010,7 +2004,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (insert && insert->select)
insert->tryFindInputFunction(input_function);
bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
bool is_async_insert_with_inlined_data = client_context->getSettingsRef().async_insert && insert && insert->hasInlinedData();
if (is_async_insert_with_inlined_data)
{
@ -2045,9 +2039,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (change.name == "profile")
current_profile = change.value.safeGet<String>();
else
global_context->applySettingChange(change);
client_context->applySettingChange(change);
}
global_context->resetSettingsToDefaultValue(set_query->default_settings);
client_context->resetSettingsToDefaultValue(set_query->default_settings);
/// Query parameters inside SET queries should be also saved on the client side
/// to override their previous definitions set with --param_* arguments
@ -2055,7 +2049,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
for (const auto & [name, value] : set_query->query_parameters)
query_parameters.insert_or_assign(name, value);
global_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()});
client_context->addQueryParameters(NameToNameMap{set_query->query_parameters.begin(), set_query->query_parameters.end()});
}
if (const auto * use_query = parsed_query->as<ASTUseQuery>())
{
@ -2132,8 +2126,8 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
if (this_query_begin >= all_queries_end)
return MultiQueryProcessingStage::QUERIES_END;
unsigned max_parser_depth = static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth);
unsigned max_parser_backtracks = static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks);
unsigned max_parser_depth = static_cast<unsigned>(client_context->getSettingsRef().max_parser_depth);
unsigned max_parser_backtracks = static_cast<unsigned>(client_context->getSettingsRef().max_parser_backtracks);
// If there are only comments left until the end of file, we just
// stop. The parser can't handle this situation because it always
@ -2153,7 +2147,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
try
{
parsed_query = parseQuery(this_query_end, all_queries_end,
global_context->getSettingsRef(),
client_context->getSettingsRef(),
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
@ -2178,25 +2172,50 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
return MultiQueryProcessingStage::PARSING_FAILED;
}
// INSERT queries may have the inserted data in the query text
// that follow the query itself, e.g. "insert into t format CSV 1;2".
// They need special handling. First of all, here we find where the
// inserted data ends. In multi-query mode, it is delimited by a
// newline.
// The VALUES format needs even more handling - we also allow the
// data to be delimited by semicolon. This case is handled later by
// the format parser itself.
// We can't do multiline INSERTs with inline data, because most
// row input formats (e.g. TSV) can't tell when the input stops,
// unlike VALUES.
// INSERT queries may have the inserted data in the query text that follow the query itself, e.g. "insert into t format CSV 1,2". They
// need special handling.
// - If the INSERT statement FORMAT is VALUES, we use the VALUES format parser to skip the inserted data until we reach the trailing single semicolon.
// - Other formats (e.g. FORMAT CSV) are arbitrarily more complex and tricky to parse. For example, we may be unable to distinguish if the semicolon
// is part of the data or ends the statement. In this case, we simply assume that the end of the INSERT statement is determined by \n\n (two newlines).
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
const char * query_to_execute_end = this_query_end;
if (insert_ast && insert_ast->data)
{
this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
if (insert_ast->format == "Values")
{
// Invoke the VALUES format parser to skip the inserted data
ReadBufferFromMemory data_in(insert_ast->data, all_queries_end - insert_ast->data);
skipBOMIfExists(data_in);
do
{
skipWhitespaceIfAny(data_in);
if (data_in.eof() || *data_in.position() == ';')
break;
}
while (ValuesBlockInputFormat::skipToNextRow(&data_in, 1, 0));
// Handle the case of a comment followed by a semicolon
// Example: INSERT INTO tab VALUES xx; -- {serverError xx}
// If we use this error hint, the next query should not be placed on the same line
this_query_end = insert_ast->data + data_in.count();
const auto * pos_newline = find_first_symbols<'\n'>(this_query_end, all_queries_end);
if (pos_newline != this_query_end)
{
TestHint hint(String(this_query_end, pos_newline - this_query_end));
if (hint.hasClientErrors() || hint.hasServerErrors())
this_query_end = pos_newline;
}
}
else
{
// Handling of generic formats
auto pos_newline = String(insert_ast->data, all_queries_end).find("\n\n");
if (pos_newline != std::string::npos)
this_query_end = insert_ast->data + pos_newline;
else
this_query_end = all_queries_end;
}
insert_ast->end = this_query_end;
query_to_execute_end = isSyncInsertWithData(*insert_ast, global_context) ? insert_ast->data : this_query_end;
query_to_execute_end = isSyncInsertWithData(*insert_ast, client_context) ? insert_ast->data : this_query_end;
}
query_to_execute = all_queries_text.substr(this_query_begin - all_queries_text.data(), query_to_execute_end - this_query_begin);
@ -2229,7 +2248,10 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
size_t test_tags_length = getTestTagsLength(all_queries_text);
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
/// INSERT data is ended by the empty line (\n\n), not ';'.
/// Unnecessary semicolons may cause data to be parsed containing ';'
/// e.g. 'insert into xx format csv val;' will insert "val;" instead of "val"
/// 'insert into xx format csv val\n;' will insert "val" and ";"
/// An exception is VALUES format where we also support semicolon in
/// addition to end of line.
const char * this_query_begin = all_queries_text.data() + test_tags_length;
@ -2240,8 +2262,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
String query_to_execute;
ASTPtr parsed_query;
std::unique_ptr<Exception> current_exception;
size_t retries_count = 0;
bool is_first = true;
while (true)
{
@ -2250,16 +2272,24 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
switch (stage)
{
case MultiQueryProcessingStage::QUERIES_END:
{
/// Compatible with old version when run interactive, e.g. "", "\ld"
if (is_first && is_interactive)
processTextAsSingleQuery(all_queries_text);
return true;
}
case MultiQueryProcessingStage::PARSING_FAILED:
{
return true;
}
case MultiQueryProcessingStage::CONTINUE_PARSING:
{
is_first = false;
continue;
}
case MultiQueryProcessingStage::PARSING_EXCEPTION:
{
is_first = false;
this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end);
// Try to find test hint for syntax error. We don't know where
@ -2289,6 +2319,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
}
case MultiQueryProcessingStage::EXECUTE_QUERY:
{
is_first = false;
full_query = all_queries_text.substr(this_query_begin - all_queries_text.data(), this_query_end - this_query_begin);
if (query_fuzzer_runs)
{
@ -2298,6 +2329,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
this_query_begin = this_query_end;
continue;
}
if (suggest)
updateSuggest(parsed_query);
// Now we know for sure where the query ends.
// Look for the hint in the text of query + insert data + trailing
@ -2405,13 +2438,13 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// , where the inline data is delimited by semicolon and not by a
// newline.
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
if (insert_ast && isSyncInsertWithData(*insert_ast, global_context))
if (insert_ast && isSyncInsertWithData(*insert_ast, client_context))
{
this_query_end = insert_ast->end;
adjustQueryEnd(
this_query_end, all_queries_end,
static_cast<unsigned>(global_context->getSettingsRef().max_parser_depth),
static_cast<unsigned>(global_context->getSettingsRef().max_parser_backtracks));
static_cast<unsigned>(client_context->getSettingsRef().max_parser_depth),
static_cast<unsigned>(client_context->getSettingsRef().max_parser_backtracks));
}
// Report error.
@ -2448,14 +2481,6 @@ bool ClientBase::processQueryText(const String & text)
return processMultiQueryFromFile(file_name);
}
if (!is_multiquery)
{
assert(!query_fuzzer_runs);
processTextAsSingleQuery(text);
return true;
}
if (query_fuzzer_runs)
{
processWithFuzzing(text);
@ -2542,10 +2567,10 @@ void ClientBase::runInteractive()
if (load_suggestions)
{
/// Load suggestion data from the server.
if (global_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
if (client_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (client_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(client_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
}
if (home_path.empty())
@ -2683,7 +2708,7 @@ void ClientBase::runInteractive()
{
// If a separate connection loading suggestions failed to open a new session,
// use the main session to receive them.
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo());
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), client_context->getClientInfo());
}
try
@ -2732,10 +2757,10 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
if (!getClientConfiguration().has("log_comment"))
{
Settings settings = global_context->getSettings();
Settings settings = client_context->getSettings();
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
settings.log_comment = fs::absolute(fs::path(file_name));
global_context->setSettings(settings);
client_context->setSettings(settings);
}
return executeMultiQuery(queries_from_file);
@ -2848,168 +2873,6 @@ void ClientBase::showClientVersion()
output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
}
namespace
{
/// Define transparent hash to we can use
/// std::string_view with the containers
struct TransparentStringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
};
/*
* This functor is used to parse command line arguments and replace dashes with underscores,
* allowing options to be specified using either dashes or underscores.
*/
class OptionsAliasParser
{
public:
explicit OptionsAliasParser(const boost::program_options::options_description& options)
{
options_names.reserve(options.options().size());
for (const auto& option : options.options())
options_names.insert(option->long_name());
}
/*
* Parses arguments by replacing dashes with underscores, and matches the resulting name with known options
* Implements boost::program_options::ext_parser logic
*/
std::pair<std::string, std::string> operator()(const std::string & token) const
{
if (!token.starts_with("--"))
return {};
std::string arg = token.substr(2);
// divide token by '=' to separate key and value if options style=long_allow_adjacent
auto pos_eq = arg.find('=');
std::string key = arg.substr(0, pos_eq);
if (options_names.contains(key))
// option does not require any changes, because it is already correct
return {};
std::replace(key.begin(), key.end(), '-', '_');
if (!options_names.contains(key))
// after replacing '-' with '_' argument is still unknown
return {};
std::string value;
if (pos_eq != std::string::npos && pos_eq < arg.size())
value = arg.substr(pos_eq + 1);
return {key, value};
}
private:
std::unordered_set<std::string> options_names;
};
}
/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests
#if defined(__clang__)
#pragma clang optimize on
#endif
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
else
addProgramOptions(cmd_settings, options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
std::unordered_set<std::string, TransparentStringHash, std::equal_to<>> main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
const auto add_setting = [&](const std::string_view name)
{
if (auto it = main_option_names.find(name); it != main_option_names.end())
return;
if (allow_repeated_settings)
addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
else
addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
};
const auto & setting_name = setting.getName();
add_setting(setting_name);
const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases();
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
{
for (const auto alias : it->second)
{
add_setting(alias);
}
}
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments)
.options(options_description.main_description.value())
.extra_parser(OptionsAliasParser(options_description.main_description.value()))
.allow_unregistered();
po::parsed_options parsed = parser.run();
/// Check unrecognized options without positional options.
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
if (!unrecognized_options.empty())
{
auto hints = this->getHints(unrecognized_options[0]);
if (!hints.empty())
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}",
unrecognized_options[0], toString(hints));
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
}
/// Check positional options.
for (const auto & op : parsed.options)
{
if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--")
&& !op.original_tokens[0].empty() && !op.value.empty())
{
/// Two special cases for better usability:
/// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1"
/// These are relevant for interactive usage - user-friendly, but questionable in general.
/// In case of ambiguity or for scripts, prefer using proper options.
const auto & token = op.original_tokens[0];
po::variable_value value(boost::any(op.value), false);
const char * option;
if (token.contains(' '))
option = "query";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
if (!options.emplace(option, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
}
}
po::store(parsed, options);
}
void ClientBase::init(int argc, char ** argv)
{
namespace po = boost::program_options;
@ -3055,9 +2918,9 @@ void ClientBase::init(int argc, char ** argv)
("config-file,C", po::value<std::string>(), "config-file path")
("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(query; can be specified multiple times (--query "SELECT 1" --query "SELECT 2"...))")
("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)")
("queries-file", po::value<std::vector<std::string>>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.")
("multiquery,n", "Obsolete, does nothing")
("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)")
("database,d", po::value<std::string>(), "database")
("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
@ -3086,7 +2949,7 @@ void ClientBase::init(int argc, char ** argv)
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
("ignore-error", "do not stop processing in multiquery mode")
("ignore-error", "do not stop processing when an error occurs")
("stacktrace", "print stack traces of exceptions")
("hardware-utilization", "print hardware utilization information in progress bar")
("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets")
@ -3179,8 +3042,6 @@ void ClientBase::init(int argc, char ** argv)
queries_files = options["queries-file"].as<std::vector<std::string>>();
if (options.count("multiline"))
getClientConfiguration().setBool("multiline", true);
if (options.count("multiquery"))
getClientConfiguration().setBool("multiquery", true);
if (options.count("ignore-error"))
getClientConfiguration().setBool("ignore-error", true);
if (options.count("format"))

View File

@ -156,8 +156,6 @@ protected:
void setInsertionTable(const ASTInsertQuery & insert_query);
void addMultiquery(std::string_view query, Arguments & common_arguments) const;
private:
void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool partial_result_on_first_cancel);
bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_);
@ -206,6 +204,9 @@ protected:
/// Adjust some settings after command line options and config had been processed.
void adjustSettings();
/// Initializes the client context.
void initClientContext();
void setDefaultFormatsAndCompressionFromConfiguration();
void initTTYBuffer(ProgressOption progress);
@ -215,6 +216,9 @@ protected:
SharedContextHolder shared_context;
ContextMutablePtr global_context;
/// Client context is a context used only by the client to parse queries, process query parameters and to connect to clickhouse-server.
ContextMutablePtr client_context;
LoggerPtr fatal_log;
Poco::AutoPtr<Poco::SplitterChannel> fatal_channel_ptr;
Poco::AutoPtr<Poco::Channel> fatal_console_channel_ptr;
@ -223,7 +227,6 @@ protected:
std::unique_ptr<Poco::Runnable> signal_listener;
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
bool is_multiquery = false;
bool delayed_interactive = false;
bool echo_queries = false; /// Print queries before execution in batch mode.

View File

@ -0,0 +1,176 @@
#include <Client/ClientBase.h>
#include <Core/BaseSettingsProgramOptions.h>
namespace DB
{
/**
* Program options parsing is very slow in debug builds and it affects .sh tests
* causing them to timeout sporadically.
* It seems impossible to enable optimizations for a single function (only to disable them), so
* instead we extract the code to a separate source file and compile it with different options.
*/
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int UNRECOGNIZED_ARGUMENTS;
}
namespace
{
/// Define transparent hash to we can use
/// std::string_view with the containers
struct TransparentStringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
};
/*
* This functor is used to parse command line arguments and replace dashes with underscores,
* allowing options to be specified using either dashes or underscores.
*/
class OptionsAliasParser
{
public:
explicit OptionsAliasParser(const boost::program_options::options_description& options)
{
options_names.reserve(options.options().size());
for (const auto& option : options.options())
options_names.insert(option->long_name());
}
/*
* Parses arguments by replacing dashes with underscores, and matches the resulting name with known options
* Implements boost::program_options::ext_parser logic
*/
std::pair<std::string, std::string> operator()(const std::string & token) const
{
if (!token.starts_with("--"))
return {};
std::string arg = token.substr(2);
// divide token by '=' to separate key and value if options style=long_allow_adjacent
auto pos_eq = arg.find('=');
std::string key = arg.substr(0, pos_eq);
if (options_names.contains(key))
// option does not require any changes, because it is already correct
return {};
std::replace(key.begin(), key.end(), '-', '_');
if (!options_names.contains(key))
// after replacing '-' with '_' argument is still unknown
return {};
std::string value;
if (pos_eq != std::string::npos && pos_eq < arg.size())
value = arg.substr(pos_eq + 1);
return {key, value};
}
private:
std::unordered_set<std::string> options_names;
};
}
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
else
addProgramOptions(cmd_settings, options_description.main_description.value());
if (allow_merge_tree_settings)
{
/// Add merge tree settings manually, because names of some settings
/// may clash. Query settings have higher priority and we just
/// skip ambiguous merge tree settings.
auto & main_options = options_description.main_description.value();
std::unordered_set<std::string, TransparentStringHash, std::equal_to<>> main_option_names;
for (const auto & option : main_options.options())
main_option_names.insert(option->long_name());
for (const auto & setting : cmd_merge_tree_settings.all())
{
const auto add_setting = [&](const std::string_view name)
{
if (auto it = main_option_names.find(name); it != main_option_names.end())
return;
if (allow_repeated_settings)
addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
else
addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
};
const auto & setting_name = setting.getName();
add_setting(setting_name);
const auto & settings_to_aliases = MergeTreeSettings::Traits::settingsToAliases();
if (auto it = settings_to_aliases.find(setting_name); it != settings_to_aliases.end())
{
for (const auto alias : it->second)
{
add_setting(alias);
}
}
}
}
/// Parse main commandline options.
auto parser = po::command_line_parser(arguments)
.options(options_description.main_description.value())
.extra_parser(OptionsAliasParser(options_description.main_description.value()))
.allow_unregistered();
po::parsed_options parsed = parser.run();
/// Check unrecognized options without positional options.
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
if (!unrecognized_options.empty())
{
auto hints = this->getHints(unrecognized_options[0]);
if (!hints.empty())
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}",
unrecognized_options[0], toString(hints));
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
}
/// Check positional options.
for (const auto & op : parsed.options)
{
if (!op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--")
&& !op.original_tokens[0].empty() && !op.value.empty())
{
/// Two special cases for better usability:
/// - if the option contains a whitespace, it might be a query: clickhouse "SELECT 1"
/// These are relevant for interactive usage - user-friendly, but questionable in general.
/// In case of ambiguity or for scripts, prefer using proper options.
const auto & token = op.original_tokens[0];
po::variable_value value(boost::any(op.value), false);
const char * option;
if (token.contains(' '))
option = "query";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
if (!options.emplace(option, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional option `{}` is not supported.", token);
}
}
po::store(parsed, options);
}
}

View File

@ -8,6 +8,7 @@
#include <Common/ErrorCodes.h>
#include <Common/Exception.h>
#include <Common/LockMemoryExceptionInThread.h>
#include <Common/Logger.h>
#include <Common/MemorySanitizer.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/config_version.h>
@ -100,7 +101,7 @@ Exception::Exception(const MessageMasked & msg_masked, int code, bool remote_)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
handle_error_code(msg_masked.msg, code, remote, getStackFramePointers());
}
@ -110,7 +111,7 @@ Exception::Exception(MessageMasked && msg_masked, int code, bool remote_)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
handle_error_code(message(), code, remote, getStackFramePointers());
}
@ -119,7 +120,7 @@ Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
auto * stack_trace_frames = exc.get_stack_trace_frames();
auto stack_trace_size = exc.get_stack_trace_size();
@ -133,7 +134,7 @@ Exception::Exception(CreateFromSTDTag, const std::exception & exc)
{
if (terminate_on_any_exception)
std::_Exit(terminate_status_code);
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
#ifdef STD_EXCEPTION_HAS_STACK_TRACE
auto * stack_trace_frames = exc.get_stack_trace_frames();
auto stack_trace_size = exc.get_stack_trace_size();
@ -223,10 +224,38 @@ Exception::FramePointers Exception::getStackFramePointers() const
}
thread_local bool Exception::enable_job_stack_trace = false;
thread_local std::vector<StackTrace::FramePointers> Exception::thread_frame_pointers = {};
thread_local bool Exception::can_use_thread_frame_pointers = false;
thread_local Exception::ThreadFramePointers Exception::thread_frame_pointers;
Exception::ThreadFramePointers::ThreadFramePointers()
{
can_use_thread_frame_pointers = true;
}
Exception::ThreadFramePointers::~ThreadFramePointers()
{
can_use_thread_frame_pointers = false;
}
Exception::ThreadFramePointersBase Exception::getThreadFramePointers()
{
if (can_use_thread_frame_pointers)
return thread_frame_pointers.frame_pointers;
return {};
}
void Exception::setThreadFramePointers(ThreadFramePointersBase frame_pointers)
{
if (can_use_thread_frame_pointers)
thread_frame_pointers.frame_pointers = std::move(frame_pointers);
}
static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string & start_of_message)
{
if (!isLoggingEnabled())
return;
try
{
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
@ -242,6 +271,9 @@ static void tryLogCurrentExceptionImpl(Poco::Logger * logger, const std::string
void tryLogCurrentException(const char * log_name, const std::string & start_of_message)
{
if (!isLoggingEnabled())
return;
/// Under high memory pressure, new allocations throw a
/// MEMORY_LIMIT_EXCEEDED exception.
///

View File

@ -10,7 +10,6 @@
#include <cerrno>
#include <exception>
#include <memory>
#include <vector>
#include <fmt/core.h>
@ -49,14 +48,14 @@ public:
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
}
Exception(const PreformattedMessage & msg, int code): Exception(msg.text, code)
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
message_format_string = msg.format_string;
message_format_string_args = msg.format_string_args;
}
@ -65,18 +64,36 @@ public:
{
if (terminate_on_any_exception)
std::terminate();
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
message_format_string = msg.format_string;
message_format_string_args = msg.format_string_args;
}
/// Collect call stacks of all previous jobs' schedulings leading to this thread job's execution
static thread_local bool enable_job_stack_trace;
static thread_local std::vector<StackTrace::FramePointers> thread_frame_pointers;
static thread_local bool can_use_thread_frame_pointers;
/// Because of unknown order of static destructor calls,
/// thread_frame_pointers can already be uninitialized when a different destructor generates an exception.
/// To prevent such scenarios, a wrapper class is created and a function that will return empty vector
/// if its destructor is already called
using ThreadFramePointersBase = std::vector<StackTrace::FramePointers>;
struct ThreadFramePointers
{
ThreadFramePointers();
~ThreadFramePointers();
ThreadFramePointersBase frame_pointers;
};
static ThreadFramePointersBase getThreadFramePointers();
static void setThreadFramePointers(ThreadFramePointersBase frame_pointers);
/// Callback for any exception
static std::function<void(const std::string & msg, int code, bool remote, const Exception::FramePointers & trace)> callback;
protected:
static thread_local ThreadFramePointers thread_frame_pointers;
// used to remove the sensitive information from exceptions if query_masking_rules is configured
struct MessageMasked
{
@ -178,7 +195,7 @@ class ErrnoException : public Exception
public:
ErrnoException(std::string && msg, int code, int with_errno) : Exception(msg, code), saved_errno(with_errno)
{
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
addMessage(", {}", errnoToString(saved_errno));
}
@ -187,7 +204,7 @@ public:
requires std::is_convertible_v<T, String>
ErrnoException(int code, T && message) : Exception(message, code), saved_errno(errno)
{
capture_thread_frame_pointers = thread_frame_pointers;
capture_thread_frame_pointers = getThreadFramePointers();
addMessage(", {}", errnoToString(saved_errno));
}

View File

@ -25,3 +25,15 @@ bool hasLogger(const std::string & name)
{
return Poco::Logger::has(name);
}
static constinit std::atomic<bool> allow_logging{true};
bool isLoggingEnabled()
{
return allow_logging;
}
void disableLogging()
{
allow_logging = false;
}

View File

@ -64,3 +64,7 @@ LoggerRawPtr createRawLogger(const std::string & name, Poco::Channel * channel,
* Otherwise, returns false.
*/
bool hasLogger(const std::string & name);
void disableLogging();
bool isLoggingEnabled();

View File

@ -89,7 +89,7 @@ void signalHandler(int sig, siginfo_t * info, void * context)
writePODBinary(*info, out);
writePODBinary(signal_context, out);
writePODBinary(stack_trace, out);
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector<StackTrace::FramePointers>{}, out);
writeVectorBinary(Exception::enable_job_stack_trace ? Exception::getThreadFramePointers() : std::vector<StackTrace::FramePointers>{}, out);
writeBinary(static_cast<UInt32>(getThreadId()), out);
writePODBinary(current_thread, out);

View File

@ -489,24 +489,25 @@ struct CacheEntry
using CacheEntryPtr = std::shared_ptr<CacheEntry>;
static constinit std::atomic<bool> can_use_cache = false;
static constinit bool can_use_cache = false;
using StackTraceCacheBase = std::map<StackTraceTriple, CacheEntryPtr, std::less<>>;
struct StackTraceCache : public StackTraceCacheBase
{
StackTraceCache()
: StackTraceCacheBase()
{
can_use_cache = true;
}
~StackTraceCache()
{
can_use_cache = false;
}
};
static StackTraceCache & cacheInstance()
{
static StackTraceCache cache;
can_use_cache = true;
return cache;
}
static StackTraceCache cache;
static DB::SharedMutex stacktrace_cache_mutex;
@ -524,7 +525,6 @@ String toStringCached(const StackTrace::FramePointers & pointers, size_t offset,
/// Calculation of stack trace text is extremely slow.
/// We use cache because otherwise the server could be overloaded by trash queries.
/// Note that this cache can grow unconditionally, but practically it should be small.
StackTraceCache & cache = cacheInstance();
CacheEntryPtr cache_entry;
// Optimistic try for cache hit to avoid any contention whatsoever, should be the main hot code route
@ -576,7 +576,7 @@ std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offse
void StackTrace::dropCache()
{
std::lock_guard lock{stacktrace_cache_mutex};
cacheInstance().clear();
cache.clear();
}

View File

@ -51,7 +51,7 @@ public:
if (!capture_frame_pointers)
return;
/// Save all previous jobs call stacks and append with current
frame_pointers = DB::Exception::thread_frame_pointers;
frame_pointers = DB::Exception::getThreadFramePointers();
frame_pointers.push_back(StackTrace().getFramePointers());
}
@ -455,7 +455,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
try
{
if (DB::Exception::enable_job_stack_trace)
DB::Exception::thread_frame_pointers = std::move(job_data->frame_pointers);
DB::Exception::setThreadFramePointers(std::move(job_data->frame_pointers));
CurrentMetrics::Increment metric_active_pool_threads(metric_active_threads);

View File

@ -1,11 +1,12 @@
#if defined(OS_LINUX)
#include <Common/TimerDescriptor.h>
#include <Common/Exception.h>
#include <sys/timerfd.h>
#include <fcntl.h>
#include <unistd.h>
namespace DB
{
@ -13,21 +14,18 @@ namespace ErrorCodes
{
extern const int CANNOT_CREATE_TIMER;
extern const int CANNOT_SET_TIMER_PERIOD;
extern const int CANNOT_FCNTL;
extern const int CANNOT_READ_FROM_SOCKET;
}
TimerDescriptor::TimerDescriptor(int clockid, int flags)
TimerDescriptor::TimerDescriptor()
{
timer_fd = timerfd_create(clockid, flags);
timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (timer_fd == -1)
throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor");
if (-1 == fcntl(timer_fd, F_SETFL, O_NONBLOCK))
throw ErrnoException(ErrorCodes::CANNOT_FCNTL, "Cannot set O_NONBLOCK for timer_fd");
throw ErrnoException(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor");
}
TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept : timer_fd(other.timer_fd)
TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept
: timer_fd(other.timer_fd)
{
other.timer_fd = -1;
}
@ -40,21 +38,19 @@ TimerDescriptor & TimerDescriptor::operator=(DB::TimerDescriptor && other) noexc
TimerDescriptor::~TimerDescriptor()
{
/// Do not check for result cause cannot throw exception.
if (timer_fd != -1)
{
int err = close(timer_fd);
chassert(!err || errno == EINTR);
if (0 != ::close(timer_fd))
std::terminate();
}
}
void TimerDescriptor::reset() const
{
itimerspec spec;
spec.it_interval.tv_nsec = 0;
spec.it_interval.tv_sec = 0;
spec.it_value.tv_sec = 0;
spec.it_value.tv_nsec = 0;
if (timer_fd == -1)
return;
itimerspec spec{};
if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
throw ErrnoException(ErrorCodes::CANNOT_SET_TIMER_PERIOD, "Cannot reset timer_fd");
@ -66,25 +62,46 @@ void TimerDescriptor::reset() const
void TimerDescriptor::drain() const
{
if (timer_fd == -1)
return;
/// It is expected that socket returns 8 bytes when readable.
/// Read in loop anyway cause signal may interrupt read call.
/// man timerfd_create:
/// If the timer has already expired one or more times since its settings were last modified using timerfd_settime(),
/// or since the last successful read(2), then the buffer given to read(2) returns an unsigned 8-byte integer (uint64_t)
/// containing the number of expirations that have occurred.
/// (The returned value is in host byte order—that is, the native byte order for integers on the host machine.)
uint64_t buf;
while (true)
{
ssize_t res = ::read(timer_fd, &buf, sizeof(buf));
if (res < 0)
{
/// man timerfd_create:
/// If no timer expirations have occurred at the time of the read(2),
/// then the call either blocks until the next timer expiration, or fails with the error EAGAIN
/// if the file descriptor has been made nonblocking
/// (via the use of the fcntl(2) F_SETFL operation to set the O_NONBLOCK flag).
if (errno == EAGAIN)
break;
if (errno != EINTR)
throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd");
/// A signal happened, need to retry.
if (errno == EINTR)
continue;
throw ErrnoException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot drain timer_fd");
}
chassert(res == sizeof(buf));
}
}
void TimerDescriptor::setRelative(uint64_t usec) const
{
chassert(timer_fd >= 0);
static constexpr uint32_t TIMER_PRECISION = 1e6;
itimerspec spec;
@ -103,4 +120,5 @@ void TimerDescriptor::setRelative(Poco::Timespan timespan) const
}
}
#endif

View File

@ -12,7 +12,7 @@ private:
int timer_fd;
public:
explicit TimerDescriptor(int clockid = CLOCK_MONOTONIC, int flags = 0);
TimerDescriptor();
~TimerDescriptor();
TimerDescriptor(const TimerDescriptor &) = delete;

View File

@ -548,7 +548,7 @@ public:
virtual bool isExpired() const = 0;
/// Get the current connected node idx.
virtual Int8 getConnectedNodeIdx() const = 0;
virtual std::optional<int8_t> getConnectedNodeIdx() const = 0;
/// Get the current connected host and port.
virtual String getConnectedHostPort() const = 0;

View File

@ -39,7 +39,7 @@ public:
~TestKeeper() override;
bool isExpired() const override { return expired; }
Int8 getConnectedNodeIdx() const override { return 0; }
std::optional<int8_t> getConnectedNodeIdx() const override { return 0; }
String getConnectedHostPort() const override { return "TestKeeper:0000"; }
int32_t getConnectionXid() const override { return 0; }
int64_t getSessionID() const override { return 0; }

View File

@ -128,16 +128,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
ShuffleHosts shuffled_hosts = shuffleHosts();
impl = std::make_unique<Coordination::ZooKeeper>(shuffled_hosts, args, zk_log);
Int8 node_idx = impl->getConnectedNodeIdx();
auto node_idx = impl->getConnectedNodeIdx();
if (args.chroot.empty())
LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ","));
else
LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot);
/// If the balancing strategy has an optimal node then it will be the first in the list
bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index;
bool connected_to_suboptimal_node = node_idx && static_cast<UInt8>(*node_idx) != shuffled_hosts[0].original_index;
bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty();
bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode();
if (connected_to_suboptimal_node && may_benefit_from_reconnecting)
@ -145,7 +144,7 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
auto reconnect_timeout_sec = getSecondsUntilReconnect(args);
LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})."
" To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds",
impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec);
impl->getConnectedHostPort(), *node_idx, reconnect_timeout_sec);
auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]()
{
@ -154,13 +153,15 @@ void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper>
LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host);
ShuffleHosts node{optimal_host};
std::unique_ptr<Coordination::IKeeper> new_impl = std::make_unique<Coordination::ZooKeeper>(node, args, zk_log);
Int8 new_node_idx = new_impl->getConnectedNodeIdx();
auto new_node_idx = new_impl->getConnectedNodeIdx();
chassert(new_node_idx.has_value());
/// Maybe the node was unavailable when getting AZs first time, update just in case
if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty())
if (args.availability_zone_autodetect && availability_zones[*new_node_idx].empty())
{
availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone();
LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]);
availability_zones[*new_node_idx] = new_impl->tryGetAvailabilityZone();
LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[*new_node_idx]);
}
optimal_impl = std::move(new_impl);
@ -1525,7 +1526,7 @@ void ZooKeeper::setServerCompletelyStarted()
zk->setServerCompletelyStarted();
}
Int8 ZooKeeper::getConnectedHostIdx() const
std::optional<int8_t> ZooKeeper::getConnectedHostIdx() const
{
return impl->getConnectedNodeIdx();
}
@ -1544,10 +1545,10 @@ String ZooKeeper::getConnectedHostAvailabilityZone() const
{
if (args.implementation != "zookeeper" || !impl)
return "";
Int8 idx = impl->getConnectedNodeIdx();
if (idx < 0)
std::optional<int8_t> idx = impl->getConnectedNodeIdx();
if (!idx)
return ""; /// session expired
return availability_zones.at(idx);
return availability_zones.at(*idx);
}
size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)

View File

@ -620,7 +620,7 @@ public:
void setServerCompletelyStarted();
Int8 getConnectedHostIdx() const;
std::optional<int8_t> getConnectedHostIdx() const;
String getConnectedHostPort() const;
int32_t getConnectionXid() const;

View File

@ -536,7 +536,7 @@ void ZooKeeper::connect(
compressed_out.emplace(*out, CompressionCodecFactory::instance().get("LZ4", {}));
}
original_index = static_cast<Int8>(node.original_index);
original_index.store(node.original_index);
break;
}
catch (...)
@ -1531,6 +1531,30 @@ void ZooKeeper::close()
}
std::optional<int8_t> ZooKeeper::getConnectedNodeIdx() const
{
int8_t res = original_index.load();
if (res == -1)
return std::nullopt;
else
return res;
}
String ZooKeeper::getConnectedHostPort() const
{
auto idx = getConnectedNodeIdx();
if (idx)
return args.hosts[*idx];
else
return "";
}
int32_t ZooKeeper::getConnectionXid() const
{
return next_xid.load();
}
void ZooKeeper::setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_)
{
/// logOperationIfNeeded(...) uses zk_log and can be called from different threads, so we have to use atomic shared_ptr

View File

@ -114,13 +114,12 @@ public:
~ZooKeeper() override;
/// If expired, you can only destroy the object. All other methods will throw exception.
bool isExpired() const override { return requests_queue.isFinished(); }
Int8 getConnectedNodeIdx() const override { return original_index; }
String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; }
int32_t getConnectionXid() const override { return next_xid.load(); }
std::optional<int8_t> getConnectedNodeIdx() const override;
String getConnectedHostPort() const override;
int32_t getConnectionXid() const override;
String tryGetAvailabilityZone() override;
@ -219,7 +218,7 @@ private:
ACLs default_acls;
zkutil::ZooKeeperArgs args;
Int8 original_index = -1;
std::atomic<int8_t> original_index{-1};
/// Fault injection
void maybeInjectSendFault();

View File

@ -1,2 +1,2 @@
clickhouse_add_executable (mysqlxx_pool_test mysqlxx_pool_test.cpp)
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx clickhouse_common_config)
target_link_libraries (mysqlxx_pool_test PRIVATE mysqlxx clickhouse_common_config loggers_no_text_log)

View File

@ -13,10 +13,10 @@
namespace DB
{
InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases)
: actions(actions_)
InterpolateDescription::InterpolateDescription(ActionsDAG actions_, const Aliases & aliases)
: actions(std::move(actions_))
{
for (const auto & name_type : actions->getRequiredColumns())
for (const auto & name_type : actions.getRequiredColumns())
{
if (const auto & p = aliases.find(name_type.name); p != aliases.end())
required_columns_map[p->second->getColumnName()] = name_type;
@ -24,7 +24,7 @@ namespace DB
required_columns_map[name_type.name] = name_type;
}
for (const ColumnWithTypeAndName & column : actions->getResultColumns())
for (const ColumnWithTypeAndName & column : actions.getResultColumns())
{
std::string name = column.name;
if (const auto & p = aliases.find(name); p != aliases.end())

View File

@ -5,21 +5,20 @@
#include <string>
#include <Core/NamesAndTypes.h>
#include <Parsers/IAST_fwd.h>
#include <Interpreters/ActionsDAG.h>
namespace DB
{
class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
using Aliases = std::unordered_map<String, ASTPtr>;
/// Interpolate description
struct InterpolateDescription
{
explicit InterpolateDescription(ActionsDAGPtr actions, const Aliases & aliases);
explicit InterpolateDescription(ActionsDAG actions, const Aliases & aliases);
ActionsDAGPtr actions;
ActionsDAG actions;
std::unordered_map<std::string, NameAndTypePair> required_columns_map; /// input column name -> {alias, type}
std::unordered_set<std::string> result_columns_set; /// result block columns

View File

@ -186,7 +186,7 @@ class IColumn;
M(Bool, allow_suspicious_ttl_expressions, false, "Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.", 0) \
M(Bool, allow_suspicious_variant_types, false, "In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types.", 0) \
M(Bool, allow_suspicious_primary_key, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code.", 0) \
M(Bool, compile_expressions, false, "Compile some scalar functions and operators to native code. Due to a bug in the LLVM compiler infrastructure, on AArch64 machines, it is known to lead to a nullptr dereference and, consequently, server crash. Do not enable this setting.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
@ -1156,7 +1156,6 @@ class IColumn;
M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(Bool, input_format_values_allow_data_after_semicolon, false, "For Values format: allow extra data after semicolon (used by client to interpret comments).", 0) \
M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
/** This setting is obsolete and do nothing, left for compatibility reasons. */ \
M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \

View File

@ -158,7 +158,7 @@ BaseDaemon::~BaseDaemon()
tryLogCurrentException(&logger());
}
OwnSplitChannel::disableLogging();
disableLogging();
}

View File

@ -257,8 +257,8 @@ static DataTypePtr create(const ASTPtr & arguments)
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Unexpected AST element passed as aggregate function name for data type AggregateFunction. "
"Must be identifier or function.");
"Unexpected AST element {} passed as aggregate function name for data type AggregateFunction. "
"Must be identifier or function", data_type_ast->getID());
for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i)
argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));

View File

@ -2,7 +2,7 @@
#include <DataTypes/DataTypeCustom.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDataType.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Common/typeid_cast.h>
@ -22,7 +22,6 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_TYPE;
extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
}
@ -83,15 +82,9 @@ DataTypePtr DataTypeFactory::tryGet(const ASTPtr & ast) const
template <bool nullptr_on_error>
DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const
{
if (const auto * func = ast->as<ASTFunction>())
if (const auto * type = ast->as<ASTDataType>())
{
if (func->parameters)
{
if constexpr (nullptr_on_error)
return nullptr;
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type cannot have multiple parenthesized parameters.");
}
return getImpl<nullptr_on_error>(func->name, func->arguments);
return getImpl<nullptr_on_error>(type->name, type->arguments);
}
if (const auto * ident = ast->as<ASTIdentifier>())
@ -107,7 +100,7 @@ DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const
if constexpr (nullptr_on_error)
return nullptr;
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type.");
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type: {}.", ast->getID());
}
DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters) const

View File

@ -4,9 +4,10 @@
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDataType.h>
#include <IO/Operators.h>
namespace DB
{
@ -53,13 +54,13 @@ static DataTypePtr create(const ASTPtr & arguments)
ASTPtr schema_argument = arguments->children[0];
bool is_nullable = false;
if (const auto * func = schema_argument->as<ASTFunction>())
if (const auto * type = schema_argument->as<ASTDataType>())
{
if (func->name != "Nullable" || func->arguments->children.size() != 1)
if (type->name != "Nullable" || type->arguments->children.size() != 1)
throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
"Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", func->name);
"Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", type->name);
schema_argument = func->arguments->children[0];
schema_argument = type->arguments->children[0];
is_nullable = true;
}

View File

@ -647,12 +647,13 @@ LoadTaskPtr DatabaseReplicated::startupDatabaseAsync(AsyncLoader & async_loader,
{
std::lock_guard lock{ddl_worker_mutex};
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
ddl_worker->startup();
ddl_worker_initialized = true;
}
ddl_worker->startup();
ddl_worker_initialized = true;
});
std::scoped_lock lock(mutex);
return startup_replicated_database_task = makeLoadTask(async_loader, {job});
startup_replicated_database_task = makeLoadTask(async_loader, {job});
return startup_replicated_database_task;
}
void DatabaseReplicated::waitDatabaseStarted() const
@ -1530,8 +1531,11 @@ void DatabaseReplicated::stopReplication()
void DatabaseReplicated::shutdown()
{
stopReplication();
ddl_worker_initialized = false;
ddl_worker = nullptr;
{
std::lock_guard lock{ddl_worker_mutex};
ddl_worker_initialized = false;
ddl_worker = nullptr;
}
DatabaseAtomic::shutdown();
}
@ -1679,6 +1683,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const
/// It may update the metadata digest (both locally and in ZooKeeper)
/// before DatabaseReplicatedDDLWorker::initializeReplication() has finished.
/// We should not update metadata until the database is initialized.
std::lock_guard lock{ddl_worker_mutex};
return ddl_worker_initialized && ddl_worker->isCurrentlyActive();
}

View File

@ -155,7 +155,7 @@ private:
std::atomic_bool is_recovering = false;
std::atomic_bool ddl_worker_initialized = false;
std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
std::mutex ddl_worker_mutex;
mutable std::mutex ddl_worker_mutex;
UInt32 max_log_ptr_at_creation = 0;
/// Usually operation with metadata are single-threaded because of the way replication works,

View File

@ -149,7 +149,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_
columns = metadata_ptr->columns.getAll();
for (const auto & column_name_and_type: columns)
{
const auto & ast_column_declaration = std::make_shared<ASTColumnDeclaration>();
const auto ast_column_declaration = std::make_shared<ASTColumnDeclaration>();
ast_column_declaration->name = column_name_and_type.name;
/// parser typename
{
@ -164,7 +164,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_
if (!parser.parse(pos, ast_type, expected))
{
if (throw_on_error)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parser metadata of {}.{}",
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse metadata of {}.{}",
backQuote(table_id.database_name), backQuote(table_id.table_name));
else
return nullptr;

View File

@ -12,9 +12,9 @@
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTDataType.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/escapeForFileName.h>
#include <Common/parseRemoteDescription.h>
#include <Databases/DatabaseFactory.h>
@ -25,6 +25,7 @@
#include <Core/Settings.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
@ -432,7 +433,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary())
{
const auto & column_declaration = std::make_shared<ASTColumnDeclaration>();
const auto column_declaration = std::make_shared<ASTColumnDeclaration>();
column_declaration->name = column_type_and_name.name;
column_declaration->type = getColumnDeclaration(column_type_and_name.type);
columns_expression_list->children.emplace_back(column_declaration);
@ -470,17 +471,15 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c
WhichDataType which(data_type);
if (which.isNullable())
return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
return makeASTDataType("Nullable", getColumnDeclaration(typeid_cast<const DataTypeNullable *>(data_type.get())->getNestedType()));
if (which.isArray())
return makeASTFunction("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
return makeASTDataType("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
if (which.isDateTime64())
{
return makeASTFunction("DateTime64", std::make_shared<ASTLiteral>(static_cast<UInt32>(6)));
}
return makeASTDataType("DateTime64", std::make_shared<ASTLiteral>(static_cast<UInt32>(6)));
return std::make_shared<ASTIdentifier>(data_type->getName());
return makeASTDataType(data_type->getName());
}
void registerDatabasePostgreSQL(DatabaseFactory & factory)

View File

@ -34,7 +34,7 @@ public:
String getFileName() const override { return impl->getFileName(); }
size_t getFileSize() override { return impl->getFileSize(); }
std::optional<size_t> tryGetFileSize() override { return impl->tryGetFileSize(); }
String getInfoForLog() override { return impl->getInfoForLog(); }

View File

@ -810,6 +810,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{
last_caller_id = FileSegment::getCallerId();
chassert(file_offset_of_buffer_end <= read_until_position);
if (file_offset_of_buffer_end == read_until_position)
return false;
@ -1051,7 +1052,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
if (download_current_segment && download_current_segment_succeeded)
chassert(file_segment.getCurrentWriteOffset() >= file_offset_of_buffer_end);
chassert(file_offset_of_buffer_end <= read_until_position);
chassert(
file_offset_of_buffer_end <= read_until_position,
fmt::format("Expected {} <= {} (size: {}, read range: {})",
file_offset_of_buffer_end, read_until_position, size, current_read_range.toString()));
}
swap(*implementation_buffer);

View File

@ -253,16 +253,15 @@ void ReadBufferFromAzureBlobStorage::initialize()
initialized = true;
}
size_t ReadBufferFromAzureBlobStorage::getFileSize()
std::optional<size_t> ReadBufferFromAzureBlobStorage::tryGetFileSize()
{
if (!blob_client)
blob_client = std::make_unique<Azure::Storage::Blobs::BlobClient>(blob_container_client->GetBlobClient(path));
if (file_size.has_value())
return *file_size;
if (!file_size)
file_size = blob_client->GetProperties().Value.BlobSize;
file_size = blob_client->GetProperties().Value.BlobSize;
return *file_size;
return file_size;
}
size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & /*progress_callback*/) const

View File

@ -42,7 +42,7 @@ public:
bool supportsRightBoundedReads() const override { return true; }
size_t getFileSize() override;
std::optional<size_t> tryGetFileSize() override;
size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function<bool(size_t)> & progress_callback) const override;

View File

@ -41,7 +41,7 @@ public:
void setReadUntilEnd() override { setReadUntilPosition(getFileSize()); }
size_t getFileSize() override { return getTotalSize(blobs_to_read); }
std::optional<size_t> tryGetFileSize() override { return getTotalSize(blobs_to_read); }
size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; }

View File

@ -215,7 +215,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
format_settings.tsv.crlf_end_of_line_input = settings.input_format_tsv_crlf_end_of_line;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.values.escape_quote_with_quote = settings.output_format_values_escape_quote_with_quote;

View File

@ -81,46 +81,43 @@ struct CRCFunctionWrapper
static constexpr auto is_fixed_to_constant = true;
using ReturnType = typename Impl::ReturnType;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res)
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = doCRC(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = doCRC(data, 0, n); }
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res)
static void vectorFixedToConstant(const ColumnString::Chars & data, size_t n, ReturnType & res, size_t)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
res[i] = doCRC(data, i * n, n);
}
res = doCRC(data, 0, n);
}
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res, size_t input_rows_count)
{
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = doCRC(data, i * n, n);
}
[[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", std::string(Impl::name));
}
[[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void uuid(const ColumnUUID::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", std::string(Impl::name));
}
[[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", std::string(Impl::name));
}
[[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
[[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t, PaddedPODArray<ReturnType> &, size_t)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", std::string(Impl::name));
}

View File

@ -32,13 +32,12 @@ struct WeekTransformer
{}
template <typename FromVectorType, typename ToVectorType>
void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const
void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone, size_t input_rows_count) const
{
using ValueType = typename ToVectorType::value_type;
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (is_extended_result)
vec_to[i] = static_cast<ValueType>(transform.executeExtendedResult(vec_from[i], week_mode, time_zone));
@ -56,7 +55,7 @@ template <typename FromDataType, typename ToDataType, bool is_extended_result =
struct CustomWeekTransformImpl
{
template <typename Transform>
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, Transform transform = {})
static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count, Transform transform = {})
{
const auto op = WeekTransformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform, is_extended_result>{transform};
@ -77,9 +76,9 @@ struct CustomWeekTransformImpl
const auto * sources = checkAndGetColumn<DataTypeString::ColumnType>(source_col.get());
auto col_to = ToDataType::ColumnType::create();
col_to->getData().resize(sources->size());
col_to->getData().resize(input_rows_count);
for (size_t i = 0; i < sources->size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
DateTime64 dt64;
ReadBufferFromString buf(sources->getDataAt(i).toView());
@ -92,7 +91,7 @@ struct CustomWeekTransformImpl
else if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
{
auto col_to = ToDataType::ColumnType::create();
op.vector(sources->getData(), col_to->getData(), week_mode, time_zone);
op.vector(sources->getData(), col_to->getData(), week_mode, time_zone, input_rows_count);
return col_to;
}
else

View File

@ -24,7 +24,7 @@ namespace DB
static constexpr auto millisecond_multiplier = 1'000;
static constexpr auto microsecond_multiplier = 1'000'000;
static constexpr auto nanosecond_multiplier = 1'000'000'000;
static constexpr auto nanosecond_multiplier = 1'000'000'000;
static constexpr FormatSettings::DateTimeOverflowBehavior default_date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore;
@ -2134,13 +2134,12 @@ struct Transformer
{
template <typename FromTypeVector, typename ToTypeVector>
static void vector(const FromTypeVector & vec_from, ToTypeVector & vec_to, const DateLUTImpl & time_zone, const Transform & transform,
[[maybe_unused]] ColumnUInt8::Container * vec_null_map_to)
[[maybe_unused]] ColumnUInt8::Container * vec_null_map_to, size_t input_rows_count)
{
using ValueType = typename ToTypeVector::value_type;
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (std::is_same_v<ToType, DataTypeDate> || std::is_same_v<ToType, DataTypeDateTime>)
{
@ -2178,7 +2177,7 @@ struct DateTimeTransformImpl
{
template <typename Additions = void *>
static ColumnPtr execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const Transform & transform = {})
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const Transform & transform = {})
{
using Op = Transformer<FromDataType, ToDataType, Transform, is_extended_result, Additions>;
@ -2200,7 +2199,7 @@ struct DateTimeTransformImpl
if (result_data_type.isDateTime() || result_data_type.isDateTime64())
{
const auto & time_zone = dynamic_cast<const TimezoneMixin &>(*result_type).getTimeZone();
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count);
}
else
{
@ -2209,15 +2208,13 @@ struct DateTimeTransformImpl
time_zone_argument_position = 2;
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_argument_position, 0);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to);
Op::vector(sources->getData(), col_to->getData(), time_zone, transform, vec_null_map_to, input_rows_count);
}
if constexpr (std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
{
if (vec_null_map_to)
{
return ColumnNullable::create(std::move(mutable_result_col), std::move(col_null_map_to));
}
}
return mutable_result_col;

View File

@ -21,11 +21,10 @@ struct EmptyImpl
/// If the function will return constant value for FixedString data type.
static constexpr auto is_fixed_to_constant = false;
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
static void vector(const ColumnString::Chars & /*data*/, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 1;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = negative ^ (offsets[i] == prev_offset);
prev_offset = offsets[i] + 1;
@ -33,42 +32,40 @@ struct EmptyImpl
}
/// Only make sense if is_fixed_to_constant.
static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/)
static void vectorFixedToConstant(const ColumnString::Chars &, size_t, UInt8 &, size_t)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "'vectorFixedToConstant method' is called");
}
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
res[i] = negative ^ memoryIsZeroSmallAllowOverflow15(data.data() + i * n, n);
}
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
static void array(const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res, size_t input_rows_count)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = negative ^ (offsets[i] == prev_offset);
prev_offset = offsets[i];
}
}
static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray<UInt8> & res)
static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray<UInt8> & res, size_t)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container[i].toUnderType() == 0);
}
static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray<UInt8> & res)
static void ipv6(const ColumnIPv6::Container & container, size_t n, PaddedPODArray<UInt8> & res, size_t)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container[i].toUnderType() == 0);
}
static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray<UInt8> & res)
static void ipv4(const ColumnIPv4::Container & container, size_t n, PaddedPODArray<UInt8> & res, size_t)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container[i].toUnderType() == 0);

View File

@ -20,7 +20,7 @@ namespace DB
// includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word
struct ExtractStringImpl
{
static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
static const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
{
// jump separators
while (pos < end && isUTF8Sep(*pos))

View File

@ -46,7 +46,7 @@ public:
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", first_arg->getName(), getName());
for (const auto i : collections::range(1, arguments.size()))
for (size_t i = 1; i < arguments.size(); ++i)
{
const auto & pos_arg = arguments[i];
@ -57,19 +57,19 @@ public:
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const auto * value_col = arguments.front().column.get();
ColumnPtr res;
if (!((res = execute<UInt8>(arguments, result_type, value_col))
|| (res = execute<UInt16>(arguments, result_type, value_col))
|| (res = execute<UInt32>(arguments, result_type, value_col))
|| (res = execute<UInt64>(arguments, result_type, value_col))
|| (res = execute<Int8>(arguments, result_type, value_col))
|| (res = execute<Int16>(arguments, result_type, value_col))
|| (res = execute<Int32>(arguments, result_type, value_col))
|| (res = execute<Int64>(arguments, result_type, value_col))))
if (!((res = execute<UInt8>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<UInt16>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<UInt32>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<UInt64>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int8>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int16>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int32>(arguments, result_type, value_col, input_rows_count))
|| (res = execute<Int64>(arguments, result_type, value_col, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", value_col->getName(), getName());
return res;
@ -79,28 +79,28 @@ private:
template <typename T>
ColumnPtr execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type,
const IColumn * const value_col_untyped) const
const IColumn * const value_col_untyped,
size_t input_rows_count) const
{
if (const auto value_col = checkAndGetColumn<ColumnVector<T>>(value_col_untyped))
{
const auto size = value_col->size();
bool is_const;
const auto const_mask = createConstMaskIfConst<T>(arguments, is_const);
const auto & val = value_col->getData();
auto out_col = ColumnVector<UInt8>::create(size);
auto out_col = ColumnVector<UInt8>::create(input_rows_count);
auto & out = out_col->getData();
if (is_const)
{
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out[i] = Impl::apply(val[i], const_mask);
}
else
{
const auto mask = createMask<T>(size, arguments);
const auto mask = createMask<T>(input_rows_count, arguments);
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out[i] = Impl::apply(val[i], mask[i]);
}
@ -108,23 +108,22 @@ private:
}
else if (const auto value_col_const = checkAndGetColumnConst<ColumnVector<T>>(value_col_untyped))
{
const auto size = value_col_const->size();
bool is_const;
const auto const_mask = createConstMaskIfConst<T>(arguments, is_const);
const auto val = value_col_const->template getValue<T>();
if (is_const)
{
return result_type->createColumnConst(size, toField(Impl::apply(val, const_mask)));
return result_type->createColumnConst(input_rows_count, toField(Impl::apply(val, const_mask)));
}
else
{
const auto mask = createMask<T>(size, arguments);
auto out_col = ColumnVector<UInt8>::create(size);
const auto mask = createMask<T>(input_rows_count, arguments);
auto out_col = ColumnVector<UInt8>::create(input_rows_count);
auto & out = out_col->getData();
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out[i] = Impl::apply(val, mask[i]);
return out_col;
@ -140,7 +139,7 @@ private:
out_is_const = true;
ValueType mask = 0;
for (const auto i : collections::range(1, arguments.size()))
for (size_t i = 1; i < arguments.size(); ++i)
{
if (auto pos_col_const = checkAndGetColumnConst<ColumnVector<ValueType>>(arguments[i].column.get()))
{
@ -166,7 +165,7 @@ private:
{
PaddedPODArray<ValueType> mask(size, ValueType{});
for (const auto i : collections::range(1, arguments.size()))
for (size_t i = 1; i < arguments.size(); ++i)
{
const auto * pos_col = arguments[i].column.get();
@ -187,7 +186,7 @@ private:
{
const auto & pos = pos_col->getData();
for (const auto i : collections::range(0, mask.size()))
for (size_t i = 0; i < mask.size(); ++i)
if (pos[i] < 8 * sizeof(ValueType))
mask[i] = mask[i] | (ValueType(1) << pos[i]);
else
@ -205,7 +204,7 @@ private:
const auto new_mask = ValueType(1) << pos;
for (const auto i : collections::range(0, mask.size()))
for (size_t i = 0; i < mask.size(); ++i)
mask[i] = mask[i] | new_mask;
return true;

View File

@ -103,14 +103,11 @@ private:
const ColumnVector<T> * src_data_concrete = checkAndGetColumn<ColumnVector<T>>(&src_data);
if (!src_data_concrete)
{
return false;
}
for (size_t row = 0; row < rows; ++row)
{
out_vec[row * size_per_row + column_idx] = static_cast<char>(src_data_concrete->getInt(row));
}
return true;
}
};

View File

@ -428,19 +428,17 @@ struct Processor
{}
template <typename FromColumnType, typename ToColumnType>
void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
if constexpr (std::is_same_v<FromColumnType, ColumnString>)
{
const auto & offsets_from = col_from.getOffsets();
auto & vec_to = col_to.getData();
size_t size = offsets_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0 ; i < size; ++i)
for (size_t i = 0 ; i < input_rows_count; ++i)
{
std::string_view from = col_from.getDataAt(i).toView();
vec_to[i] = transform.execute(from, checkOverflow(delta), time_zone, utc_time_zone, scale);
@ -451,32 +449,31 @@ struct Processor
const auto & vec_from = col_from.getData();
auto & vec_to = col_to.getData();
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, utc_time_zone, scale);
}
}
template <typename FromColumnType, typename ToColumnType>
void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
castTypeToEither<
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale); return true; });
&delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale, input_rows_count); return true; });
}
template <typename FromType, typename ToColumnType>
void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
castTypeToEither<
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale); return true; });
&delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale, input_rows_count); return true; });
}
private:
@ -491,19 +488,17 @@ private:
template <typename FromColumnType, typename ToColumnType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const
const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
if constexpr (std::is_same_v<FromColumnType, ColumnString>)
{
const auto & offsets_from = col_from.getOffsets();
auto & vec_to = col_to.getData();
size_t size = offsets_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0 ; i < size; ++i)
for (size_t i = 0 ; i < input_rows_count; ++i)
{
std::string_view from = col_from.getDataAt(i).toView();
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale);
@ -514,26 +509,24 @@ private:
const auto & vec_from = col_from.getData();
auto & vec_to = col_to.getData();
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale);
}
}
template <typename FromType, typename ToColumnType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const
const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t input_rows_count) const
{
static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC");
auto & vec_to = col_to.getData();
size_t size = delta.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale);
}
};
@ -542,7 +535,7 @@ private:
template <typename FromDataType, typename ToDataType, typename Transform>
struct DateTimeAddIntervalImpl
{
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale)
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale, size_t input_rows_count)
{
using FromValueType = typename FromDataType::FieldType;
using FromColumnType = typename FromDataType::ColumnType;
@ -561,15 +554,15 @@ struct DateTimeAddIntervalImpl
if (const auto * sources = checkAndGetColumn<FromColumnType>(&source_column))
{
if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column))
processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale);
processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale, input_rows_count);
else
processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale);
processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale, input_rows_count);
}
else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(&source_column))
{
processor.constantVector(
sources_const->template getValue<FromValueType>(),
*col_to, delta_column, time_zone, scale);
*col_to, delta_column, time_zone, scale, input_rows_count);
}
else
{
@ -708,25 +701,25 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const IDataType * from_type = arguments[0].type.get();
WhichDataType which(from_type);
if (which.isDate())
return DateTimeAddIntervalImpl<DataTypeDate, TransformResultDataType<DataTypeDate>, Transform>::execute(Transform{}, arguments, result_type, 0);
return DateTimeAddIntervalImpl<DataTypeDate, TransformResultDataType<DataTypeDate>, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count);
else if (which.isDate32())
return DateTimeAddIntervalImpl<DataTypeDate32, TransformResultDataType<DataTypeDate32>, Transform>::execute(Transform{}, arguments, result_type, 0);
return DateTimeAddIntervalImpl<DataTypeDate32, TransformResultDataType<DataTypeDate32>, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count);
else if (which.isDateTime())
return DateTimeAddIntervalImpl<DataTypeDateTime, TransformResultDataType<DataTypeDateTime>, Transform>::execute(Transform{}, arguments, result_type, 0);
return DateTimeAddIntervalImpl<DataTypeDateTime, TransformResultDataType<DataTypeDateTime>, Transform>::execute(Transform{}, arguments, result_type, 0, input_rows_count);
else if (which.isDateTime64())
{
const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type);
auto from_scale = datetime64_type->getScale();
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(Transform{}, arguments, result_type, from_scale);
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(Transform{}, arguments, result_type, from_scale, input_rows_count);
}
else if (which.isString())
return DateTimeAddIntervalImpl<DataTypeString, DataTypeDateTime64, Transform>::execute(Transform{}, arguments, result_type, 3);
return DateTimeAddIntervalImpl<DataTypeString, DataTypeDateTime64, Transform>::execute(Transform{}, arguments, result_type, 3, input_rows_count);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", arguments[0].type->getName(), getName());
}

View File

@ -54,7 +54,7 @@ private:
}
template <typename LeftType, typename RightType>
static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg)
static ColumnPtr executeTyped(const ColumnConst * left_arg, const IColumn * right_arg, size_t input_rows_count)
{
if (const auto right_arg_typed = checkAndGetColumn<ColumnVector<RightType>>(right_arg))
{
@ -63,12 +63,11 @@ private:
LeftType left_src_data[Impl::rows_per_iteration];
std::fill(std::begin(left_src_data), std::end(left_src_data), left_arg->template getValue<LeftType>());
const auto & right_src_data = right_arg_typed->getData();
const auto src_size = right_src_data.size();
auto & dst_data = dst->getData();
dst_data.resize(src_size);
dst_data.resize(input_rows_count);
const auto rows_remaining = src_size % Impl::rows_per_iteration;
const auto rows_size = src_size - rows_remaining;
const auto rows_remaining = input_rows_count % Impl::rows_per_iteration;
const auto rows_size = input_rows_count - rows_remaining;
for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
Impl::execute(left_src_data, &right_src_data[i], &dst_data[i]);
@ -92,7 +91,7 @@ private:
}
template <typename LeftType, typename RightType>
static ColumnPtr executeTyped(const ColumnVector<LeftType> * left_arg, const IColumn * right_arg)
static ColumnPtr executeTyped(const ColumnVector<LeftType> * left_arg, const IColumn * right_arg, size_t input_rows_count)
{
if (const auto right_arg_typed = checkAndGetColumn<ColumnVector<RightType>>(right_arg))
{
@ -100,12 +99,11 @@ private:
const auto & left_src_data = left_arg->getData();
const auto & right_src_data = right_arg_typed->getData();
const auto src_size = left_src_data.size();
auto & dst_data = dst->getData();
dst_data.resize(src_size);
dst_data.resize(input_rows_count);
const auto rows_remaining = src_size % Impl::rows_per_iteration;
const auto rows_size = src_size - rows_remaining;
const auto rows_remaining = input_rows_count % Impl::rows_per_iteration;
const auto rows_size = input_rows_count - rows_remaining;
for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
Impl::execute(&left_src_data[i], &right_src_data[i], &dst_data[i]);
@ -136,12 +134,11 @@ private:
const auto & left_src_data = left_arg->getData();
RightType right_src_data[Impl::rows_per_iteration];
std::fill(std::begin(right_src_data), std::end(right_src_data), right_arg_typed->template getValue<RightType>());
const auto src_size = left_src_data.size();
auto & dst_data = dst->getData();
dst_data.resize(src_size);
dst_data.resize(input_rows_count);
const auto rows_remaining = src_size % Impl::rows_per_iteration;
const auto rows_size = src_size - rows_remaining;
const auto rows_remaining = input_rows_count % Impl::rows_per_iteration;
const auto rows_size = input_rows_count - rows_remaining;
for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
Impl::execute(&left_src_data[i], right_src_data, &dst_data[i]);
@ -165,7 +162,7 @@ private:
return nullptr;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_left = arguments[0];
const ColumnWithTypeAndName & col_right = arguments[1];
@ -202,7 +199,7 @@ private:
if (const auto left_arg_typed = checkAndGetColumn<ColVecLeft>(left_arg))
{
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg)))
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg, input_rows_count)))
return true;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}",
@ -210,7 +207,7 @@ private:
}
if (const auto left_arg_typed = checkAndGetColumnConst<ColVecLeft>(left_arg))
{
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg)))
if ((res = executeTyped<LeftType, RightType>(left_arg_typed, right_arg, input_rows_count)))
return true;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function {}",

View File

@ -106,42 +106,40 @@ private:
}
template <typename T, typename ReturnType>
static ColumnPtr execute(const ColumnVector<T> * col)
static ColumnPtr execute(const ColumnVector<T> * col, size_t input_rows_count)
{
const auto & src_data = col->getData();
const size_t size = src_data.size();
auto dst = ColumnVector<ReturnType>::create();
auto & dst_data = dst->getData();
dst_data.resize(size);
dst_data.resize(input_rows_count);
executeInIterations(src_data.data(), dst_data.data(), size);
executeInIterations(src_data.data(), dst_data.data(), input_rows_count);
return dst;
}
template <typename T, typename ReturnType>
static ColumnPtr execute(const ColumnDecimal<T> * col)
static ColumnPtr execute(const ColumnDecimal<T> * col, size_t input_rows_count)
{
const auto & src_data = col->getData();
const size_t size = src_data.size();
UInt32 scale = col->getScale();
auto dst = ColumnVector<ReturnType>::create();
auto & dst_data = dst->getData();
dst_data.resize(size);
dst_data.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
dst_data[i] = DecimalUtils::convertTo<ReturnType>(src_data[i], scale);
executeInIterations(dst_data.data(), dst_data.data(), size);
executeInIterations(dst_data.data(), dst_data.data(), input_rows_count);
return dst;
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col = arguments[0];
ColumnPtr res;
@ -156,7 +154,7 @@ private:
const auto col_vec = checkAndGetColumn<ColVecType>(col.column.get());
if (col_vec == nullptr)
return false;
return (res = execute<Type, ReturnType>(col_vec)) != nullptr;
return (res = execute<Type, ReturnType>(col_vec, input_rows_count)) != nullptr;
};
if (!callOnBasicType<void, true, true, true, false>(col.type->getTypeId(), call))

View File

@ -53,39 +53,37 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto * in = arguments.front().column.get();
ColumnPtr res;
if (!((res = execute<UInt8>(in))
|| (res = execute<UInt16>(in))
|| (res = execute<UInt32>(in))
|| (res = execute<UInt64>(in))
|| (res = execute<Int8>(in))
|| (res = execute<Int16>(in))
|| (res = execute<Int32>(in))
|| (res = execute<Int64>(in))
|| (res = execute<Float32>(in))
|| (res = execute<Float64>(in))))
if (!((res = execute<UInt8>(in, input_rows_count))
|| (res = execute<UInt16>(in, input_rows_count))
|| (res = execute<UInt32>(in, input_rows_count))
|| (res = execute<UInt64>(in, input_rows_count))
|| (res = execute<Int8>(in, input_rows_count))
|| (res = execute<Int16>(in, input_rows_count))
|| (res = execute<Int32>(in, input_rows_count))
|| (res = execute<Int64>(in, input_rows_count))
|| (res = execute<Float32>(in, input_rows_count))
|| (res = execute<Float64>(in, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
return res;
}
template <typename T>
ColumnPtr execute(const IColumn * in_untyped) const
ColumnPtr execute(const IColumn * in_untyped, size_t input_rows_count) const
{
if (const auto in = checkAndGetColumn<ColumnVector<T>>(in_untyped))
{
const auto size = in->size();
auto out = ColumnUInt8::create(size);
auto out = ColumnUInt8::create(input_rows_count);
const auto & in_data = in->getData();
auto & out_data = out->getData();
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
out_data[i] = Impl::execute(in_data[i]);
return out;

View File

@ -132,9 +132,7 @@ public:
}
DataTypes types(tuple_size);
for (size_t i = 0; i < tuple_size; i++)
{
types[i] = std::make_shared<DataTypeUInt64>();
}
return std::make_shared<DataTypeTuple>(types);
}
};

View File

@ -71,7 +71,7 @@ public:
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
Impl::vector(col->getChars(), col->getOffsets(), vec_res);
Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count);
return col_res;
}
@ -80,7 +80,7 @@ public:
if (Impl::is_fixed_to_constant)
{
ResultType res = 0;
Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res);
Impl::vectorFixedToConstant(col_fixed->getChars(), col_fixed->getN(), res, input_rows_count);
return result_type->createColumnConst(col_fixed->size(), toField(res));
}
@ -90,7 +90,7 @@ public:
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_fixed->size());
Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res);
Impl::vectorFixedToVector(col_fixed->getChars(), col_fixed->getN(), vec_res, input_rows_count);
return col_res;
}
@ -101,7 +101,7 @@ public:
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_arr->size());
Impl::array(col_arr->getOffsets(), vec_res);
Impl::array(col_arr->getOffsets(), vec_res, input_rows_count);
return col_res;
}
@ -112,7 +112,7 @@ public:
vec_res.resize(col_map->size());
const auto & col_nested = col_map->getNestedColumn();
Impl::array(col_nested.getOffsets(), vec_res);
Impl::array(col_nested.getOffsets(), vec_res, input_rows_count);
return col_res;
}
else if (const ColumnUUID * col_uuid = checkAndGetColumn<ColumnUUID>(column.get()))
@ -120,7 +120,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_uuid->size());
Impl::uuid(col_uuid->getData(), input_rows_count, vec_res);
Impl::uuid(col_uuid->getData(), input_rows_count, vec_res, input_rows_count);
return col_res;
}
else if (const ColumnIPv6 * col_ipv6 = checkAndGetColumn<ColumnIPv6>(column.get()))
@ -128,7 +128,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_ipv6->size());
Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res);
Impl::ipv6(col_ipv6->getData(), input_rows_count, vec_res, input_rows_count);
return col_res;
}
else if (const ColumnIPv4 * col_ipv4 = checkAndGetColumn<ColumnIPv4>(column.get()))
@ -136,7 +136,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_ipv4->size());
Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res);
Impl::ipv4(col_ipv4->getData(), input_rows_count, vec_res, input_rows_count);
return col_res;
}
else

View File

@ -45,7 +45,7 @@ public:
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr column_haystack = arguments[0].column;
column_haystack = column_haystack->convertToFullColumnIfConst();
@ -70,7 +70,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_const->getValue<String>(),
col_replacement_const->getValue<String>(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack && col_needle_vector && col_replacement_const)
@ -79,7 +80,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_vector->getChars(), col_needle_vector->getOffsets(),
col_replacement_const->getValue<String>(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack && col_needle_const && col_replacement_vector)
@ -88,7 +90,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_const->getValue<String>(),
col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack && col_needle_vector && col_replacement_vector)
@ -97,7 +100,8 @@ public:
col_haystack->getChars(), col_haystack->getOffsets(),
col_needle_vector->getChars(), col_needle_vector->getOffsets(),
col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else if (col_haystack_fixed && col_needle_const && col_replacement_const)
@ -106,7 +110,8 @@ public:
col_haystack_fixed->getChars(), col_haystack_fixed->getN(),
col_needle_const->getValue<String>(),
col_replacement_const->getValue<String>(),
col_res->getChars(), col_res->getOffsets());
col_res->getChars(), col_res->getOffsets(),
input_rows_count);
return col_res;
}
else

View File

@ -632,7 +632,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
@ -646,11 +646,10 @@ public:
const ColumnString::Chars & in_vec = col->getChars();
const ColumnString::Offsets & in_offsets = col->getOffsets();
size_t size = in_offsets.size();
out_offsets.resize(size);
out_offsets.resize(input_rows_count);
size_t max_out_len = 0;
for (size_t i = 0; i < in_offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1])
- /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1;
@ -662,7 +661,7 @@ public:
char * pos = begin;
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t new_offset = in_offsets[i];
@ -691,15 +690,14 @@ public:
const ColumnString::Chars & in_vec = col_fix_string->getChars();
const size_t n = col_fix_string->getN();
size_t size = col_fix_string->size();
out_offsets.resize(size);
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size);
out_offsets.resize(input_rows_count);
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * input_rows_count);
char * begin = reinterpret_cast<char *>(out_vec.data());
char * pos = begin;
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t new_offset = prev_offset + n;

View File

@ -60,17 +60,17 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr res;
if (!((res = executeType<UInt8>(arguments))
|| (res = executeType<UInt16>(arguments))
|| (res = executeType<UInt32>(arguments))
|| (res = executeType<UInt64>(arguments))
|| (res = executeType<Int8>(arguments))
|| (res = executeType<Int16>(arguments))
|| (res = executeType<Int32>(arguments))
|| (res = executeType<Int64>(arguments))))
if (!((res = executeType<UInt8>(arguments, input_rows_count))
|| (res = executeType<UInt16>(arguments, input_rows_count))
|| (res = executeType<UInt32>(arguments, input_rows_count))
|| (res = executeType<UInt64>(arguments, input_rows_count))
|| (res = executeType<Int8>(arguments, input_rows_count))
|| (res = executeType<Int16>(arguments, input_rows_count))
|| (res = executeType<Int32>(arguments, input_rows_count))
|| (res = executeType<Int64>(arguments, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
@ -98,7 +98,7 @@ private:
}
template <typename T>
ColumnPtr executeType(const ColumnsWithTypeAndName & columns) const
ColumnPtr executeType(const ColumnsWithTypeAndName & columns, size_t input_rows_count) const
{
if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(columns[0].column.get()))
{
@ -107,13 +107,12 @@ private:
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
ColumnString::Chars & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
size_t size = vec_from.size();
data_to.resize(size * 2);
offsets_to.resize(size);
data_to.resize(input_rows_count * 2);
offsets_to.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
writeBitmask<T>(vec_from[i], buf_to);
writeChar(0, buf_to);
@ -244,7 +243,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
template <typename T>
ColumnPtr executeType(const IColumn * column) const
ColumnPtr executeType(const IColumn * column, size_t input_rows_count) const
{
const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(column);
if (!col_from)
@ -257,13 +256,12 @@ public:
auto & result_array_offsets_data = result_array_offsets->getData();
auto & vec_from = col_from->getData();
size_t size = vec_from.size();
result_array_offsets_data.resize(size);
result_array_values_data.reserve(size * 2);
result_array_offsets_data.resize(input_rows_count);
result_array_values_data.reserve(input_rows_count * 2);
using UnsignedType = make_unsigned_t<T>;
for (size_t row = 0; row < size; ++row)
for (size_t row = 0; row < input_rows_count; ++row)
{
UnsignedType x = static_cast<UnsignedType>(vec_from[row]);
@ -302,24 +300,24 @@ public:
return result_column;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IColumn * in_column = arguments[0].column.get();
ColumnPtr result_column;
if (!((result_column = executeType<UInt8>(in_column))
|| (result_column = executeType<UInt16>(in_column))
|| (result_column = executeType<UInt32>(in_column))
|| (result_column = executeType<UInt32>(in_column))
|| (result_column = executeType<UInt64>(in_column))
|| (result_column = executeType<UInt128>(in_column))
|| (result_column = executeType<UInt256>(in_column))
|| (result_column = executeType<Int8>(in_column))
|| (result_column = executeType<Int16>(in_column))
|| (result_column = executeType<Int32>(in_column))
|| (result_column = executeType<Int64>(in_column))
|| (result_column = executeType<Int128>(in_column))
|| (result_column = executeType<Int256>(in_column))))
if (!((result_column = executeType<UInt8>(in_column, input_rows_count))
|| (result_column = executeType<UInt16>(in_column, input_rows_count))
|| (result_column = executeType<UInt32>(in_column, input_rows_count))
|| (result_column = executeType<UInt32>(in_column, input_rows_count))
|| (result_column = executeType<UInt64>(in_column, input_rows_count))
|| (result_column = executeType<UInt128>(in_column, input_rows_count))
|| (result_column = executeType<UInt256>(in_column, input_rows_count))
|| (result_column = executeType<Int8>(in_column, input_rows_count))
|| (result_column = executeType<Int16>(in_column, input_rows_count))
|| (result_column = executeType<Int32>(in_column, input_rows_count))
|| (result_column = executeType<Int64>(in_column, input_rows_count))
|| (result_column = executeType<Int128>(in_column, input_rows_count))
|| (result_column = executeType<Int256>(in_column, input_rows_count))))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of first argument of function {}",

View File

@ -155,7 +155,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IDataType * from_type = arguments[0].type.get();
const auto * array_type = typeid_cast<const DataTypeArray *>(from_type);
@ -165,21 +165,21 @@ public:
WhichDataType which(nested_type);
if (which.isUInt8())
return executeBitmapData<UInt8>(argument_types, arguments);
return executeBitmapData<UInt8>(argument_types, arguments, input_rows_count);
else if (which.isUInt16())
return executeBitmapData<UInt16>(argument_types, arguments);
return executeBitmapData<UInt16>(argument_types, arguments, input_rows_count);
else if (which.isUInt32())
return executeBitmapData<UInt32>(argument_types, arguments);
return executeBitmapData<UInt32>(argument_types, arguments, input_rows_count);
else if (which.isUInt64())
return executeBitmapData<UInt64>(argument_types, arguments);
return executeBitmapData<UInt64>(argument_types, arguments, input_rows_count);
else if (which.isInt8())
return executeBitmapData<Int8>(argument_types, arguments);
return executeBitmapData<Int8>(argument_types, arguments, input_rows_count);
else if (which.isInt16())
return executeBitmapData<Int16>(argument_types, arguments);
return executeBitmapData<Int16>(argument_types, arguments, input_rows_count);
else if (which.isInt32())
return executeBitmapData<Int32>(argument_types, arguments);
return executeBitmapData<Int32>(argument_types, arguments, input_rows_count);
else if (which.isInt64())
return executeBitmapData<Int64>(argument_types, arguments);
return executeBitmapData<Int64>(argument_types, arguments, input_rows_count);
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected type {} of argument of function {}",
from_type->getName(), getName());
@ -187,7 +187,7 @@ public:
private:
template <typename T>
ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments) const
ColumnPtr executeBitmapData(DataTypes & argument_types, const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
{
// input data
const ColumnArray * array = typeid_cast<const ColumnArray *>(arguments[0].column.get());
@ -203,10 +203,10 @@ private:
AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get(
AggregateFunctionGroupBitmapData<T>::name(), action, argument_types, params_row, properties);
auto col_to = ColumnAggregateFunction::create(bitmap_function);
col_to->reserve(offsets.size());
col_to->reserve(input_rows_count);
size_t pos = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bitmap_data

View File

@ -23,7 +23,7 @@ namespace
constexpr size_t max_string_size = 1UL << 15;
template <typename ModelMap>
ALWAYS_INLINE inline Float64 naiveBayes(
Float64 naiveBayes(
const FrequencyHolder::EncodingMap & standard,
const ModelMap & model,
Float64 max_result)
@ -51,7 +51,7 @@ namespace
/// Count how many times each bigram occurs in the text.
template <typename ModelMap>
ALWAYS_INLINE inline void calculateStats(
void calculateStats(
const UInt8 * data,
const size_t size,
ModelMap & model)
@ -77,24 +77,25 @@ struct CharsetClassificationImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const auto & encodings_freq = FrequencyHolder::getInstance().getEncodingsFrequency();
if constexpr (detect_language)
/// 2 chars for ISO code + 1 zero byte
res_data.reserve(offsets.size() * 3);
res_data.reserve(input_rows_count * 3);
else
/// Mean charset length is 8
res_data.reserve(offsets.size() * 8);
res_data.reserve(input_rows_count * 8);
res_offsets.resize(offsets.size());
res_offsets.resize(input_rows_count);
size_t current_result_offset = 0;
double zero_frequency_log = log(zero_frequency);
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * str = data.data() + offsets[i - 1];
const size_t str_len = offsets[i] - offsets[i - 1] - 1;

View File

@ -341,7 +341,7 @@ class FunctionIPv4NumToString : public IFunction
{
private:
template <typename ArgType>
ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const
ColumnPtr executeTyped(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
{
using ColumnType = ColumnVector<ArgType>;
@ -356,12 +356,12 @@ private:
ColumnString::Chars & vec_res = col_res->getChars();
ColumnString::Offsets & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(vec_in.size());
vec_res.resize(input_rows_count * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(input_rows_count);
char * begin = reinterpret_cast<char *>(vec_res.data());
char * pos = begin;
for (size_t i = 0; i < vec_in.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
DB::formatIPv4(reinterpret_cast<const unsigned char*>(&vec_in[i]), sizeof(ArgType), pos, mask_tail_octets, "xxx");
offsets_res[i] = pos - begin;
@ -532,7 +532,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -542,11 +542,11 @@ public:
auto col_res = ColumnIPv6::create();
auto & vec_res = col_res->getData();
vec_res.resize(col_in->size());
vec_res.resize(input_rows_count);
const auto & vec_in = col_in->getData();
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
mapIPv4ToIPv6(vec_in[i], reinterpret_cast<UInt8 *>(&vec_res[i].toUnderType()));
return col_res;
@ -557,7 +557,7 @@ public:
auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH);
auto & vec_res = col_res->getChars();
vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH);
vec_res.resize(input_rows_count * IPV6_BINARY_LENGTH);
const auto & vec_in = col_in->getData();
@ -742,7 +742,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
@ -751,13 +751,13 @@ public:
auto col_res = ColumnUInt64::create();
ColumnUInt64::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
vec_res.resize(input_rows_count);
const ColumnString::Chars & vec_src = col->getChars();
const ColumnString::Offsets & offsets_src = col->getOffsets();
size_t prev_offset = 0;
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t current_offset = offsets_src[i];
size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte
@ -1054,7 +1054,7 @@ public:
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnString * input_column = checkAndGetColumn<ColumnString>(arguments[0].column.get());
@ -1067,14 +1067,14 @@ public:
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container & vec_res = col_res->getData();
vec_res.resize(input_column->size());
vec_res.resize(input_rows_count);
const ColumnString::Chars & vec_src = input_column->getChars();
const ColumnString::Offsets & offsets_src = input_column->getOffsets();
size_t prev_offset = 0;
UInt32 result = 0;
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = DB::parseIPv4whole(reinterpret_cast<const char *>(&vec_src[prev_offset]), reinterpret_cast<unsigned char *>(&result));
prev_offset = offsets_src[i];
@ -1110,7 +1110,7 @@ public:
return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnString * input_column = checkAndGetColumn<ColumnString>(arguments[0].column.get());
if (!input_column)
@ -1122,14 +1122,14 @@ public:
auto col_res = ColumnUInt8::create();
ColumnUInt8::Container & vec_res = col_res->getData();
vec_res.resize(input_column->size());
vec_res.resize(input_rows_count);
const ColumnString::Chars & vec_src = input_column->getChars();
const ColumnString::Offsets & offsets_src = input_column->getOffsets();
size_t prev_offset = 0;
char buffer[IPV6_BINARY_LENGTH];
for (size_t i = 0; i < vec_res.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
vec_res[i] = DB::parseIPv6whole(reinterpret_cast<const char *>(&vec_src[prev_offset]),
reinterpret_cast<const char *>(&vec_src[offsets_src[i] - 1]),

View File

@ -177,7 +177,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -189,21 +189,20 @@ public:
"Illegal type {} of column {} argument of function {}, expected FixedString({})",
col_type_name.type->getName(), col_in->getName(), getName(), uuid_bytes_length);
const auto size = col_in->size();
const auto & vec_in = col_in->getChars();
auto col_res = ColumnString::create();
ColumnString::Chars & vec_res = col_res->getChars();
ColumnString::Offsets & offsets_res = col_res->getOffsets();
vec_res.resize(size * (uuid_text_length + 1));
offsets_res.resize(size);
vec_res.resize(input_rows_count * (uuid_text_length + 1));
offsets_res.resize(input_rows_count);
size_t src_offset = 0;
size_t dst_offset = 0;
const UUIDSerializer uuid_serializer(variant);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
uuid_serializer.deserialize(&vec_in[src_offset], &vec_res[dst_offset]);
src_offset += uuid_bytes_length;
@ -256,7 +255,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -266,17 +265,16 @@ public:
{
const auto & vec_in = col_in->getChars();
const auto & offsets_in = col_in->getOffsets();
const size_t size = offsets_in.size();
auto col_res = ColumnFixedString::create(uuid_bytes_length);
ColumnString::Chars & vec_res = col_res->getChars();
vec_res.resize(size * uuid_bytes_length);
vec_res.resize(input_rows_count * uuid_bytes_length);
size_t src_offset = 0;
size_t dst_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
/// If string has incorrect length - then return zero UUID.
/// If string has correct length but contains something not like UUID - implementation specific behaviour.
@ -300,18 +298,17 @@ public:
"Illegal type {} of column {} argument of function {}, expected FixedString({})",
col_type_name.type->getName(), col_in_fixed->getName(), getName(), uuid_text_length);
const auto size = col_in_fixed->size();
const auto & vec_in = col_in_fixed->getChars();
auto col_res = ColumnFixedString::create(uuid_bytes_length);
ColumnString::Chars & vec_res = col_res->getChars();
vec_res.resize(size * uuid_bytes_length);
vec_res.resize(input_rows_count * uuid_bytes_length);
size_t src_offset = 0;
size_t dst_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
uuid_serializer.serialize(&vec_in[src_offset], &vec_res[dst_offset]);
src_offset += uuid_text_length;
@ -359,7 +356,7 @@ public:
return std::make_shared<DataTypeFixedString>(uuid_bytes_length);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -370,16 +367,15 @@ public:
{
const auto & vec_in = col_in->getData();
const UUID * uuids = vec_in.data();
const size_t size = vec_in.size();
auto col_res = ColumnFixedString::create(uuid_bytes_length);
ColumnString::Chars & vec_res = col_res->getChars();
vec_res.resize(size * uuid_bytes_length);
vec_res.resize(input_rows_count * uuid_bytes_length);
size_t dst_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]);
uint64_t loBytes = DB::UUIDHelpers::getLowBytes(uuids[i]);
@ -448,7 +444,7 @@ public:
return std::make_shared<DataTypeDateTime64>(datetime_scale, timezone);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & col_type_name = arguments[0];
const ColumnPtr & column = col_type_name.column;
@ -457,12 +453,11 @@ public:
{
const auto & vec_in = col_in->getData();
const UUID * uuids = vec_in.data();
const size_t size = vec_in.size();
auto col_res = ColumnDateTime64::create(size, datetime_scale);
auto col_res = ColumnDateTime64::create(input_rows_count, datetime_scale);
auto & vec_res = col_res->getData();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const uint64_t hiBytes = DB::UUIDHelpers::getHighBytes(uuids[i]);
const uint64_t ms = ((hiBytes & 0xf000) == 0x7000) ? (hiBytes >> 16) : 0;

View File

@ -151,36 +151,36 @@ struct Processor
template <typename FirstArgVectorType, typename SecondArgType>
void NO_INLINE
vectorConstant(const FirstArgVectorType & vec_first, const SecondArgType second_value,
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale,
size_t input_rows_count) const
{
size_t size = vec_first.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_first[i], second_value, scale_a, scale_b, result_scale);
}
template <typename FirstArgVectorType, typename SecondArgVectorType>
void NO_INLINE
vectorVector(const FirstArgVectorType & vec_first, const SecondArgVectorType & vec_second,
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale,
size_t input_rows_count) const
{
size_t size = vec_first.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(vec_first[i], vec_second[i], scale_a, scale_b, result_scale);
}
template <typename FirstArgType, typename SecondArgVectorType>
void NO_INLINE
constantVector(const FirstArgType & first_value, const SecondArgVectorType & vec_second,
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale) const
PaddedPODArray<typename ResultType::FieldType> & vec_to, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale,
size_t input_rows_count) const
{
size_t size = vec_second.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = transform.execute(first_value, vec_second[i], scale_a, scale_b, result_scale);
}
};
@ -189,7 +189,7 @@ struct Processor
template <typename FirstArgType, typename SecondArgType, typename ResultType, typename Transform>
struct DecimalArithmeticsImpl
{
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count)
{
using FirstArgValueType = typename FirstArgType::FieldType;
using FirstArgColumnType = typename FirstArgType::ColumnType;
@ -214,13 +214,13 @@ struct DecimalArithmeticsImpl
if (first_col)
{
if (second_col_const)
op.vectorConstant(first_col->getData(), second_col_const->template getValue<SecondArgValueType>(), col_to->getData(), scale_a, scale_b, result_scale);
op.vectorConstant(first_col->getData(), second_col_const->template getValue<SecondArgValueType>(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count);
else
op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale);
op.vectorVector(first_col->getData(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count);
}
else if (first_col_const)
{
op.constantVector(first_col_const->template getValue<FirstArgValueType>(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale);
op.constantVector(first_col_const->template getValue<FirstArgValueType>(), second_col->getData(), col_to->getData(), scale_a, scale_b, result_scale, input_rows_count);
}
else
{
@ -293,14 +293,14 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
return resolveOverload(arguments, result_type);
return resolveOverload(arguments, result_type, input_rows_count);
}
private:
// long resolver to call proper templated func
ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
ColumnPtr resolveOverload(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
WhichDataType which_dividend(arguments[0].type.get());
WhichDataType which_divisor(arguments[1].type.get());
@ -309,26 +309,26 @@ private:
{
using DividendType = DataTypeDecimal32;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}
else if (which_dividend.isDecimal64())
{
using DividendType = DataTypeDecimal64;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}
@ -336,13 +336,13 @@ private:
{
using DividendType = DataTypeDecimal128;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}
@ -350,13 +350,13 @@ private:
{
using DividendType = DataTypeDecimal256;
if (which_divisor.isDecimal32())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal32, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal64())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal64, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal128())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal128, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
else if (which_divisor.isDecimal256())
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type);
return DecimalArithmeticsImpl<DividendType, DataTypeDecimal256, DataTypeDecimal256, Transform>::execute(Transform{}, arguments, result_type, input_rows_count);
}

View File

@ -181,7 +181,7 @@ public:
bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// The dictionary key that defines the "point of view".
std::string dict_key;
@ -205,10 +205,9 @@ public:
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
typename ColumnVector<T>::Container & vec_to = col_to->getData();
size_t size = vec_from.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(vec_from[i], dict);
return col_to;
@ -273,7 +272,7 @@ public:
bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// The dictionary key that defines the "point of view".
std::string dict_key;
@ -303,10 +302,9 @@ public:
const typename ColumnVector<T>::Container & vec_from1 = col_vec1->getData();
const typename ColumnVector<T>::Container & vec_from2 = col_vec2->getData();
typename ColumnUInt8::Container & vec_to = col_to->getData();
size_t size = vec_from1.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(vec_from1[i], vec_from2[i], dict);
return col_to;
@ -318,10 +316,9 @@ public:
const typename ColumnVector<T>::Container & vec_from1 = col_vec1->getData();
const T const_from2 = col_const2->template getValue<T>();
typename ColumnUInt8::Container & vec_to = col_to->getData();
size_t size = vec_from1.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(vec_from1[i], const_from2, dict);
return col_to;
@ -333,10 +330,9 @@ public:
const T const_from1 = col_const1->template getValue<T>();
const typename ColumnVector<T>::Container & vec_from2 = col_vec2->getData();
typename ColumnUInt8::Container & vec_to = col_to->getData();
size_t size = vec_from2.size();
vec_to.resize(size);
vec_to.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
vec_to[i] = Transform::apply(const_from1, vec_from2[i], dict);
return col_to;
@ -405,7 +401,7 @@ public:
bool isDeterministic() const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// The dictionary key that defines the "point of view".
std::string dict_key;
@ -432,11 +428,10 @@ public:
auto & res_values = col_values->getData();
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
size_t size = vec_from.size();
res_offsets.resize(size);
res_values.reserve(size * 4);
res_offsets.resize(input_rows_count);
res_values.reserve(input_rows_count * 4);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
T cur = vec_from[i];
for (size_t depth = 0; cur && depth < DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH; ++depth)

View File

@ -125,7 +125,7 @@ public:
}
String error;
for (const auto i : collections::range(0, input_rows_count))
for (size_t i = 0; i < input_rows_count; ++i)
{
if (!col_json_const)
{
@ -314,7 +314,7 @@ private:
static size_t calculateMaxSize(const ColumnString::Offsets & offsets)
{
size_t max_size = 0;
for (const auto i : collections::range(0, offsets.size()))
for (size_t i = 0; i < offsets.size(); ++i)
{
size_t size = offsets[i] - offsets[i - 1];
max_size = std::max(max_size, size);

View File

@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED;
struct FunctionDetectLanguageImpl
{
static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string)
static std::string_view codeISO(std::string_view code_string)
{
if (code_string.ends_with("-Latn"))
code_string.remove_suffix(code_string.size() - 5);
@ -63,16 +63,17 @@ struct FunctionDetectLanguageImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
/// Constant 3 is based on the fact that in general we need 2 characters for ISO code + 1 zero byte
res_data.reserve(offsets.size() * 3);
res_offsets.resize(offsets.size());
res_data.reserve(input_rows_count * 3);
res_offsets.resize(input_rows_count);
bool is_reliable;
size_t res_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * str = data.data() + offsets[i - 1];
const size_t str_len = offsets[i] - offsets[i - 1] - 1;

View File

@ -48,7 +48,7 @@ using UInt8Container = ColumnUInt8::Container;
using UInt8ColumnPtrs = std::vector<const ColumnUInt8 *>;
MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, const bool make_nullable)
MutableColumnPtr buildColumnFromTernaryData(const UInt8Container & ternary_data, bool make_nullable)
{
const size_t rows_count = ternary_data.size();

View File

@ -40,17 +40,18 @@ struct FunctionDetectProgrammingLanguageImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
const auto & programming_freq = FrequencyHolder::getInstance().getProgrammingFrequency();
/// Constant 5 is arbitrary
res_data.reserve(offsets.size() * 5);
res_offsets.resize(offsets.size());
res_data.reserve(input_rows_count * 5);
res_offsets.resize(input_rows_count);
size_t res_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * str = data.data() + offsets[i - 1];
const size_t str_len = offsets[i] - offsets[i - 1] - 1;

View File

@ -80,8 +80,7 @@ public:
auto col_to = ColumnVector<ToType>::create();
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
size_t size = input_rows_count;
vec_to.resize(size);
vec_to.resize(input_rows_count);
RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(ToType));
return col_to;

View File

@ -37,12 +37,12 @@ struct FunctionStringDistanceImpl
const ColumnString::Offsets & haystack_offsets,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<ResultType> & res)
PaddedPODArray<ResultType> & res,
size_t input_rows_count)
{
size_t size = res.size();
const char * haystack = reinterpret_cast<const char *>(haystack_data.data());
const char * needle = reinterpret_cast<const char *>(needle_data.data());
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = Op::process(
haystack + haystack_offsets[i - 1],
@ -56,13 +56,13 @@ struct FunctionStringDistanceImpl
const String & haystack,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<ResultType> & res)
PaddedPODArray<ResultType> & res,
size_t input_rows_count)
{
const char * haystack_data = haystack.data();
size_t haystack_size = haystack.size();
const char * needle = reinterpret_cast<const char *>(needle_data.data());
size_t size = res.size();
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = Op::process(haystack_data, haystack_size,
needle + needle_offsets[i - 1], needle_offsets[i] - needle_offsets[i - 1] - 1);
@ -73,9 +73,10 @@ struct FunctionStringDistanceImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
const String & needle,
PaddedPODArray<ResultType> & res)
PaddedPODArray<ResultType> & res,
size_t input_rows_count)
{
constantVector(needle, data, offsets, res);
constantVector(needle, data, offsets, res, input_rows_count);
}
};

View File

@ -315,9 +315,9 @@ struct SimHashImpl
return getSimHash(finger_vec);
}
static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray<UInt64> & res)
static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, size_t shingle_size, PaddedPODArray<UInt64> & res, size_t input_rows_count)
{
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * one_data = &data[offsets[i - 1]];
const size_t data_size = offsets[i] - offsets[i - 1] - 1;
@ -543,12 +543,13 @@ struct MinHashImpl
PaddedPODArray<UInt64> * res1,
PaddedPODArray<UInt64> * res2,
ColumnTuple * res1_strings,
ColumnTuple * res2_strings)
ColumnTuple * res2_strings,
size_t input_rows_count)
{
MinHeap min_heap;
MaxHeap max_heap;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * one_data = &data[offsets[i - 1]];
const size_t data_size = offsets[i] - offsets[i - 1] - 1;

View File

@ -135,7 +135,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
@ -152,9 +152,9 @@ public:
{
auto col_res = ColumnVector<UInt64>::create();
auto & vec_res = col_res->getData();
vec_res.resize(column->size());
vec_res.resize(input_rows_count);
const ColumnString & col_str_vector = checkAndGetColumn<ColumnString>(*column);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, vec_res, input_rows_count);
return col_res;
}
else if constexpr (is_arg) // Min hash arg
@ -171,7 +171,7 @@ public:
auto max_tuple = ColumnTuple::create(std::move(max_columns));
const ColumnString & col_str_vector = checkAndGetColumn<ColumnString>(*column);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get());
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, nullptr, nullptr, min_tuple.get(), max_tuple.get(), input_rows_count);
MutableColumns tuple_columns;
tuple_columns.emplace_back(std::move(min_tuple));
@ -184,10 +184,10 @@ public:
auto col_h2 = ColumnVector<UInt64>::create();
auto & vec_h1 = col_h1->getData();
auto & vec_h2 = col_h2->getData();
vec_h1.resize(column->size());
vec_h2.resize(column->size());
vec_h1.resize(input_rows_count);
vec_h2.resize(input_rows_count);
const ColumnString & col_str_vector = checkAndGetColumn<ColumnString>(*column);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr);
Impl::apply(col_str_vector.getChars(), col_str_vector.getOffsets(), shingle_size, num_hashes, &vec_h1, &vec_h2, nullptr, nullptr, input_rows_count);
MutableColumns tuple_columns;
tuple_columns.emplace_back(std::move(col_h1));
tuple_columns.emplace_back(std::move(col_h2));

View File

@ -224,7 +224,7 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
{
@ -233,11 +233,10 @@ public:
const typename ColumnString::Chars & data = col_from->getChars();
const typename ColumnString::Offsets & offsets = col_from->getOffsets();
auto & chars_to = col_to->getChars();
const auto size = offsets.size();
chars_to.resize(size * Impl::length);
chars_to.resize(input_rows_count * Impl::length);
ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[current_offset]),
@ -253,11 +252,10 @@ public:
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnFixedString::Chars & data = col_from_fix->getChars();
const auto size = col_from_fix->size();
auto & chars_to = col_to->getChars();
const auto length = col_from_fix->getN();
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
chars_to.resize(input_rows_count * Impl::length);
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
@ -268,11 +266,10 @@ public:
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnIPv6::Container & data = col_from_ip->getData();
const auto size = col_from_ip->size();
auto & chars_to = col_to->getChars();
const auto length = sizeof(IPv6::UnderlyingType);
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
chars_to.resize(input_rows_count * Impl::length);
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));

View File

@ -90,7 +90,7 @@ struct NgramDistanceImpl
((cont[Offset + I] = std::tolower(cont[Offset + I])), ...);
}
static ALWAYS_INLINE size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end)
static size_t readASCIICodePoints(CodePoint * code_points, const char *& pos, const char * end)
{
/// Offset before which we copy some data.
constexpr size_t padding_offset = default_padding - N + 1;
@ -120,7 +120,7 @@ struct NgramDistanceImpl
return default_padding;
}
static ALWAYS_INLINE size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end)
static size_t readUTF8CodePoints(CodePoint * code_points, const char *& pos, const char * end)
{
/// The same copying as described in the function above.
memcpy(code_points, code_points + default_padding - N + 1, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(CodePoint));
@ -195,7 +195,7 @@ struct NgramDistanceImpl
}
template <bool save_ngrams>
static ALWAYS_INLINE inline size_t calculateNeedleStats(
static inline size_t calculateNeedleStats(
const char * data,
const size_t size,
NgramCount * ngram_stats,
@ -228,7 +228,7 @@ struct NgramDistanceImpl
}
template <bool reuse_stats>
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
static inline UInt64 calculateHaystackStatsAndMetric(
const char * data,
const size_t size,
NgramCount * ngram_stats,
@ -318,9 +318,9 @@ struct NgramDistanceImpl
const ColumnString::Offsets & haystack_offsets,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
const size_t haystack_offsets_size = haystack_offsets.size();
size_t prev_haystack_offset = 0;
size_t prev_needle_offset = 0;
@ -331,7 +331,7 @@ struct NgramDistanceImpl
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
for (size_t i = 0; i < haystack_offsets_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * haystack = reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]);
const size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
@ -391,12 +391,13 @@ struct NgramDistanceImpl
std::string haystack,
const ColumnString::Chars & needle_data,
const ColumnString::Offsets & needle_offsets,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
/// For symmetric version it is better to use vector_constant
if constexpr (symmetric)
{
vectorConstant(needle_data, needle_offsets, std::move(haystack), res);
vectorConstant(needle_data, needle_offsets, std::move(haystack), res, input_rows_count);
}
else
{
@ -404,7 +405,6 @@ struct NgramDistanceImpl
haystack.resize(haystack_size + default_padding);
/// For logic explanation see vector_vector function.
const size_t needle_offsets_size = needle_offsets.size();
size_t prev_offset = 0;
std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
@ -412,7 +412,7 @@ struct NgramDistanceImpl
std::unique_ptr<UInt16[]> needle_ngram_storage(new UInt16[max_string_size]);
std::unique_ptr<UInt16[]> haystack_ngram_storage(new UInt16[max_string_size]);
for (size_t i = 0; i < needle_offsets_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * needle = reinterpret_cast<const char *>(&needle_data[prev_offset]);
const size_t needle_size = needle_offsets[i] - prev_offset - 1;
@ -456,7 +456,8 @@ struct NgramDistanceImpl
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
std::string needle,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
/// zeroing our map
std::unique_ptr<NgramCount[]> common_stats{new NgramCount[map_size]{}};
@ -472,7 +473,7 @@ struct NgramDistanceImpl
size_t distance = needle_stats_size;
size_t prev_offset = 0;
for (size_t i = 0; i < offsets.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const UInt8 * haystack = &data[prev_offset];
const size_t haystack_size = offsets[i] - prev_offset - 1;

View File

@ -57,7 +57,7 @@ public:
return std::make_shared<DataTypeNumber<typename Impl::ResultType>>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
using ResultType = typename Impl::ResultType;
@ -90,7 +90,7 @@ public:
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(column_haystack->size());
vec_res.resize(input_rows_count);
const ColumnString * col_haystack_vector = checkAndGetColumn<ColumnString>(&*column_haystack);
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(&*column_needle);
@ -110,7 +110,7 @@ public:
Impl::max_string_size);
}
}
Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res);
Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle, vec_res, input_rows_count);
}
else if (col_haystack_vector && col_needle_vector)
{
@ -119,7 +119,8 @@ public:
col_haystack_vector->getOffsets(),
col_needle_vector->getChars(),
col_needle_vector->getOffsets(),
vec_res);
vec_res,
input_rows_count);
}
else if (col_haystack_const && col_needle_vector)
{
@ -136,7 +137,7 @@ public:
Impl::max_string_size);
}
}
Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
Impl::constantVector(haystack, col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res, input_rows_count);
}
else
{

View File

@ -55,7 +55,7 @@ public:
return arguments[0];
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
const ColumnString * col = checkAndGetColumn<ColumnString>(column.get());
@ -65,7 +65,7 @@ public:
arguments[0].column->getName(), getName());
auto col_res = ColumnString::create();
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
Impl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
};
@ -104,7 +104,7 @@ public:
return std::make_shared<DataTypeFloat32>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnPtr & column = arguments[0].column;
const ColumnString * col = checkAndGetColumn<ColumnString>(column.get());
@ -115,9 +115,9 @@ public:
auto col_res = ColumnVector<Float32>::create();
ColumnVector<Float32>::Container & vec_res = col_res->getData();
vec_res.resize(col->size());
vec_res.resize(input_rows_count);
Impl::vector(col->getChars(), col->getOffsets(), vec_res);
Impl::vector(col->getChars(), col->getOffsets(), vec_res, input_rows_count);
return col_res;
}
};

View File

@ -130,7 +130,7 @@ struct TimeWindowImpl
static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name);
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name);
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count);
};
template <TimeWindowFunctionName type>
@ -196,7 +196,7 @@ struct TimeWindowImpl<TUMBLE>
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto & interval_column = arguments[1];
@ -214,38 +214,37 @@ struct TimeWindowImpl<TUMBLE>
{
/// TODO: add proper support for fractional seconds
case IntervalKind::Kind::Second:
return executeTumble<UInt32, IntervalKind::Kind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Second>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Minute:
return executeTumble<UInt32, IntervalKind::Kind::Minute>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Minute>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Hour:
return executeTumble<UInt32, IntervalKind::Kind::Hour>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Hour>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Day:
return executeTumble<UInt32, IntervalKind::Kind::Day>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt32, IntervalKind::Kind::Day>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Week:
return executeTumble<UInt16, IntervalKind::Kind::Week>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Week>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Month:
return executeTumble<UInt16, IntervalKind::Kind::Month>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Month>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Quarter:
return executeTumble<UInt16, IntervalKind::Kind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
case IntervalKind::Kind::Year:
return executeTumble<UInt16, IntervalKind::Kind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
return executeTumble<UInt16, IntervalKind::Kind::Year>(*time_column_vec, std::get<1>(interval), time_zone, input_rows_count);
default:
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
}
}
template <typename ToType, IntervalKind::Kind unit>
static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone)
static ColumnPtr executeTumble(const ColumnDateTime & time_column, UInt64 num_units, const DateLUTImpl & time_zone, size_t input_rows_count)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
for (size_t i = 0; i != size; ++i)
start_data.resize(input_rows_count);
end_data.resize(input_rows_count);
for (size_t i = 0; i != input_rows_count; ++i)
{
start_data[i] = ToStartOfTransform<unit>::execute(time_data[i], num_units, time_zone);
end_data[i] = AddTime<unit>::execute(start_data[i], num_units, time_zone);
@ -283,7 +282,7 @@ struct TimeWindowImpl<TUMBLE_START>
}
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -296,7 +295,7 @@ struct TimeWindowImpl<TUMBLE_START>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 0, function_name);
}
};
@ -311,7 +310,7 @@ struct TimeWindowImpl<TUMBLE_END>
return TimeWindowImpl<TUMBLE_START>::getReturnType(arguments, function_name);
}
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name)
[[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -324,7 +323,7 @@ struct TimeWindowImpl<TUMBLE_END>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 1, function_name);
}
};
@ -372,7 +371,7 @@ struct TimeWindowImpl<HOP>
return std::make_shared<DataTypeTuple>(DataTypes{data_type, data_type});
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
@ -396,28 +395,28 @@ struct TimeWindowImpl<HOP>
/// TODO: add proper support for fractional seconds
case IntervalKind::Kind::Second:
return executeHop<UInt32, IntervalKind::Kind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Minute:
return executeHop<UInt32, IntervalKind::Kind::Minute>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Hour:
return executeHop<UInt32, IntervalKind::Kind::Hour>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Day:
return executeHop<UInt32, IntervalKind::Kind::Day>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Week:
return executeHop<UInt16, IntervalKind::Kind::Week>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Month:
return executeHop<UInt16, IntervalKind::Kind::Month>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Quarter:
return executeHop<UInt16, IntervalKind::Kind::Quarter>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Year:
return executeHop<UInt16, IntervalKind::Kind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
default:
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
}
@ -425,18 +424,17 @@ struct TimeWindowImpl<HOP>
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
executeHop(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count)
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto start = ColumnVector<ToType>::create();
auto end = ColumnVector<ToType>::create();
auto & start_data = start->getData();
auto & end_data = end->getData();
start_data.resize(size);
end_data.resize(size);
start_data.resize(input_rows_count);
end_data.resize(input_rows_count);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
@ -509,7 +507,7 @@ struct TimeWindowImpl<WINDOW_ID>
return std::make_shared<DataTypeUInt32>();
}
static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForHopColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto & hop_interval_column = arguments[1];
@ -533,28 +531,28 @@ struct TimeWindowImpl<WINDOW_ID>
/// TODO: add proper support for fractional seconds
case IntervalKind::Kind::Second:
return executeHopSlice<UInt32, IntervalKind::Kind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Minute:
return executeHopSlice<UInt32, IntervalKind::Kind::Minute>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Hour:
return executeHopSlice<UInt32, IntervalKind::Kind::Hour>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Day:
return executeHopSlice<UInt32, IntervalKind::Kind::Day>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Week:
return executeHopSlice<UInt16, IntervalKind::Kind::Week>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Month:
return executeHopSlice<UInt16, IntervalKind::Kind::Month>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Quarter:
return executeHopSlice<UInt16, IntervalKind::Kind::Quarter>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
case IntervalKind::Kind::Year:
return executeHopSlice<UInt16, IntervalKind::Kind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone, input_rows_count);
default:
throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
}
@ -563,17 +561,16 @@ struct TimeWindowImpl<WINDOW_ID>
template <typename ToType, IntervalKind::Kind kind>
static ColumnPtr
executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone)
executeHopSlice(const ColumnDateTime & time_column, UInt64 hop_num_units, UInt64 window_num_units, const DateLUTImpl & time_zone, size_t input_rows_count)
{
Int64 gcd_num_units = std::gcd(hop_num_units, window_num_units);
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto end = ColumnVector<ToType>::create();
auto & end_data = end->getData();
end_data.resize(size);
for (size_t i = 0; i < size; ++i)
end_data.resize(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
ToType wstart = ToStartOfTransform<kind>::execute(time_data[i], hop_num_units, time_zone);
ToType wend = AddTime<kind>::execute(wstart, hop_num_units, time_zone);
@ -593,23 +590,23 @@ struct TimeWindowImpl<WINDOW_ID>
return end;
}
static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
ColumnPtr column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name);
ColumnPtr column = TimeWindowImpl<TUMBLE>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(column, 1, function_name);
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
if (arguments.size() == 2)
return dispatchForTumbleColumns(arguments, function_name);
return dispatchForTumbleColumns(arguments, function_name, input_rows_count);
else
{
const auto & third_column = arguments[2];
if (arguments.size() == 3 && WhichDataType(third_column.type).isString())
return dispatchForTumbleColumns(arguments, function_name);
return dispatchForTumbleColumns(arguments, function_name, input_rows_count);
else
return dispatchForHopColumns(arguments, function_name);
return dispatchForHopColumns(arguments, function_name, input_rows_count);
}
}
};
@ -639,7 +636,7 @@ struct TimeWindowImpl<HOP_START>
}
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -652,7 +649,7 @@ struct TimeWindowImpl<HOP_START>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 0, function_name);
}
};
@ -667,7 +664,7 @@ struct TimeWindowImpl<HOP_END>
return TimeWindowImpl<HOP_START>::getReturnType(arguments, function_name);
}
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name)
static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name, size_t input_rows_count)
{
const auto & time_column = arguments[0];
const auto which_type = WhichDataType(time_column.type);
@ -680,7 +677,7 @@ struct TimeWindowImpl<HOP_END>
result_column = time_column.column;
}
else
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name);
result_column = TimeWindowImpl<HOP>::dispatchForColumns(arguments, function_name, input_rows_count);
return executeWindowBound(result_column, 1, function_name);
}
@ -693,9 +690,9 @@ DataTypePtr FunctionTimeWindow<type>::getReturnTypeImpl(const ColumnsWithTypeAnd
}
template <TimeWindowFunctionName type>
ColumnPtr FunctionTimeWindow<type>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const
ColumnPtr FunctionTimeWindow<type>::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const
{
return TimeWindowImpl<type>::dispatchForColumns(arguments, name);
return TimeWindowImpl<type>::dispatchForColumns(arguments, name, input_rows_count);
}
}

View File

@ -18,7 +18,7 @@ namespace DB
*/
struct FunctionDetectTonalityImpl
{
static ALWAYS_INLINE inline Float32 detectTonality(
static Float32 detectTonality(
const UInt8 * str,
const size_t str_len,
const FrequencyHolder::Map & emotional_dict)
@ -63,13 +63,13 @@ struct FunctionDetectTonalityImpl
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
PaddedPODArray<Float32> & res)
PaddedPODArray<Float32> & res,
size_t input_rows_count)
{
const auto & emotional_dict = FrequencyHolder::getInstance().getEmotionalDict();
size_t size = offsets.size();
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
res[i] = detectTonality(data.data() + prev_offset, offsets[i] - 1 - prev_offset, emotional_dict);
prev_offset = offsets[i];

View File

@ -73,13 +73,11 @@ public:
size_t array_count = arguments.size();
const auto & last_arg = arguments[array_count - 1];
size_t input_rows_count_local = input_rows_count;
bool null_last = true;
if (!isArray(last_arg.type))
{
--array_count;
null_last = check_condition(last_arg, context, input_rows_count_local);
null_last = check_condition(last_arg, context, input_rows_count);
}
ColumnsWithTypeAndName new_args;
@ -119,11 +117,11 @@ public:
}
auto zipped
= FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local);
= FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count);
ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared<DataTypeArray>(result_type), "zipped"}});
auto sorted_tuple
= FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local);
= FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count);
auto null_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
@ -139,10 +137,10 @@ public:
= std::make_shared<DataTypeArray>(makeNullable(nested_types[i]));
ColumnsWithTypeAndName null_array_arg({
{null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"},
{null_type->createColumnConstWithDefaultValue(input_rows_count), null_type, "NULL"},
});
tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local);
tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count);
tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst();
}
else
@ -153,7 +151,7 @@ public:
auto tuple_coulmn = FunctionFactory::instance()
.get("tupleElement", context)
->build(untuple_args)
->execute(untuple_args, result_type, input_rows_count_local);
->execute(untuple_args, result_type, input_rows_count);
auto out_tmp = ColumnArray::create(nested_types[i]->createColumn());
@ -183,7 +181,7 @@ public:
auto inside_null_type = nested_types[0];
ColumnsWithTypeAndName indexof_args({
arg_of_index,
{inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"},
{inside_null_type->createColumnConstWithDefaultValue(input_rows_count), inside_null_type, "NULL"},
});
auto null_index_datetype = std::make_shared<DataTypeUInt64>();
@ -192,7 +190,7 @@ public:
slice_index.column = FunctionFactory::instance()
.get("indexOf", context)
->build(indexof_args)
->execute(indexof_args, result_type, input_rows_count_local);
->execute(indexof_args, result_type, input_rows_count);
auto null_index_in_array = slice_index.column->get64(0);
if (null_index_in_array > 0)
@ -220,15 +218,15 @@ public:
ColumnsWithTypeAndName slice_args_right(
{{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index});
ColumnWithTypeAndName arr_left{
fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""};
fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count), arg_type, ""};
ColumnWithTypeAndName arr_right{
fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""};
fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count), arg_type, ""};
ColumnsWithTypeAndName arr_cancat({arr_right, arr_left});
auto out_tmp = FunctionFactory::instance()
.get("arrayConcat", context)
->build(arr_cancat)
->execute(arr_cancat, arg_type, input_rows_count_local);
->execute(arr_cancat, arg_type, input_rows_count);
adjusted_columns[i] = std::move(out_tmp);
}
}

View File

@ -124,7 +124,7 @@ public:
bool hasEmptyBound() const { return has_empty_bound; }
inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const
inline bool contains(CoordinateType x, CoordinateType y) const
{
Point point(x, y);
@ -167,7 +167,7 @@ public:
UInt64 getAllocatedBytes() const;
inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const;
bool contains(CoordinateType x, CoordinateType y) const;
private:
enum class CellType : uint8_t
@ -199,7 +199,7 @@ private:
}
/// Inner part of the HalfPlane is the left side of initialized vector.
bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; }
bool contains(CoordinateType x, CoordinateType y) const { return a * x + b * y + c >= 0; }
};
struct Cell
@ -233,7 +233,7 @@ private:
void calcGridAttributes(Box & box);
template <typename T>
T ALWAYS_INLINE getCellIndex(T row, T col) const { return row * grid_size + col; }
T getCellIndex(T row, T col) const { return row * grid_size + col; }
/// Complex case. Will check intersection directly.
inline void addComplexPolygonCell(size_t index, const Box & box);

View File

@ -201,15 +201,15 @@ struct ReplaceRegexpImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
@ -232,13 +232,13 @@ struct ReplaceRegexpImpl
case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All;
}
};
ReplaceStringImpl<Name, convertTrait(replace)>::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets);
ReplaceStringImpl<Name, convertTrait(replace)>::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets, input_rows_count);
return;
}
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -257,19 +257,19 @@ struct ReplaceRegexpImpl
const ColumnString::Offsets & needle_offsets,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
assert(haystack_offsets.size() == needle_offsets.size());
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
@ -302,7 +302,8 @@ struct ReplaceRegexpImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
assert(haystack_offsets.size() == replacement_offsets.size());
@ -311,8 +312,7 @@ struct ReplaceRegexpImpl
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
@ -323,7 +323,7 @@ struct ReplaceRegexpImpl
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
@ -349,20 +349,20 @@ struct ReplaceRegexpImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
assert(haystack_offsets.size() == needle_offsets.size());
assert(needle_offsets.size() == replacement_offsets.size());
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
@ -399,15 +399,15 @@ struct ReplaceRegexpImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
ColumnString::Offset res_offset = 0;
size_t haystack_size = haystack_data.size() / n;
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
re2::RE2::Options regexp_options;
regexp_options.set_log_errors(false); /// don't write error messages to stderr
@ -419,7 +419,7 @@ struct ReplaceRegexpImpl
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t from = i * n;
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);

View File

@ -35,7 +35,8 @@ struct ReplaceStringImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
@ -46,8 +47,7 @@ struct ReplaceStringImpl
ColumnString::Offset res_offset = 0;
res_data.reserve(haystack_data.size());
const size_t haystack_size = haystack_offsets.size();
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
/// The current index in the array of strings.
size_t i = 0;
@ -124,21 +124,20 @@ struct ReplaceStringImpl
const ColumnString::Offsets & needle_offsets,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
chassert(haystack_offsets.size() == needle_offsets.size());
const size_t haystack_size = haystack_offsets.size();
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
ColumnString::Offset res_offset = 0;
size_t prev_haystack_offset = 0;
size_t prev_needle_offset = 0;
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
@ -195,24 +194,23 @@ struct ReplaceStringImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
chassert(haystack_offsets.size() == replacement_offsets.size());
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
const size_t haystack_size = haystack_offsets.size();
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
ColumnString::Offset res_offset = 0;
size_t prev_haystack_offset = 0;
size_t prev_replacement_offset = 0;
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
@ -267,15 +265,14 @@ struct ReplaceStringImpl
const ColumnString::Chars & replacement_data,
const ColumnString::Offsets & replacement_offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
chassert(haystack_offsets.size() == needle_offsets.size());
chassert(needle_offsets.size() == replacement_offsets.size());
const size_t haystack_size = haystack_offsets.size();
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
ColumnString::Offset res_offset = 0;
@ -283,7 +280,7 @@ struct ReplaceStringImpl
size_t prev_needle_offset = 0;
size_t prev_replacement_offset = 0;
for (size_t i = 0; i < haystack_size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
@ -345,7 +342,8 @@ struct ReplaceStringImpl
const String & needle,
const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
if (needle.empty())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
@ -355,9 +353,8 @@ struct ReplaceStringImpl
const UInt8 * pos = begin;
ColumnString::Offset res_offset = 0;
size_t haystack_size = haystack_data.size() / n;
res_data.reserve(haystack_data.size());
res_offsets.resize(haystack_size);
res_offsets.resize(input_rows_count);
/// The current index in the string array.
size_t i = 0;
@ -384,13 +381,13 @@ struct ReplaceStringImpl
/// Copy skipped strings without any changes but
/// add zero byte to the end of each string.
while (i < haystack_size && begin + n * (i + 1) <= match)
while (i < input_rows_count && begin + n * (i + 1) <= match)
{
COPY_REST_OF_CURRENT_STRING();
}
/// If you have reached the end, it's time to stop
if (i == haystack_size)
if (i == input_rows_count)
break;
/// Copy unchanged part of current string.

View File

@ -64,7 +64,7 @@ public:
return arguments[0].type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const ColumnConst * column_tld_list_name = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
FirstSignificantSubdomainCustomLookup tld_lookup(column_tld_list_name->getValue<String>());
@ -72,7 +72,7 @@ public:
if (const ColumnString * col = checkAndGetColumn<ColumnString>(&*arguments[0].column))
{
auto col_res = ColumnString::create();
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets());
vector(tld_lookup, col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), input_rows_count);
return col_res;
}
else
@ -82,11 +82,11 @@ public:
static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup,
const ColumnString::Chars & data, const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets,
size_t input_rows_count)
{
size_t size = offsets.size();
res_offsets.resize(size);
res_data.reserve(size * Extractor::getReserveLengthForElement());
res_offsets.resize(input_rows_count);
res_data.reserve(input_rows_count * Extractor::getReserveLengthForElement());
size_t prev_offset = 0;
size_t res_offset = 0;
@ -95,7 +95,7 @@ public:
Pos start;
size_t length;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Extractor::execute(tld_lookup, reinterpret_cast<const char *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);

Some files were not shown because too many files have changed in this diff Show More