Merge branch 'master' into unified-sets

This commit is contained in:
Nikolai Kochetov 2019-01-21 13:40:47 +03:00
commit e8aa41b6ac
84 changed files with 1091 additions and 772 deletions

View File

@ -90,8 +90,6 @@ if (GLIBC_COMPATIBILITY)
set (USE_INTERNAL_MEMCPY ON)
endif ()
set (COMPILER_FLAGS "${COMPILER_FLAGS}")
string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER})
find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld")
@ -108,10 +106,15 @@ if (LINKER_NAME)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
endif ()
option (PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON)
if (PIPE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -pipe")
endif ()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000)
option(COMPILER_PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON)
endif()
if(COMPILER_PIPE)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe")
else()
message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)")
endif()
include (cmake/test_cpu.cmake)

View File

@ -6,7 +6,7 @@ set (OPENSSL_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES})
if (NOT USE_INTERNAL_SSL_LIBRARY)
if (APPLE)
set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl")
set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl" CACHE INTERNAL "")
# https://rt.openssl.org/Ticket/Display.html?user=guest&pass=guest&id=2232
if (USE_STATIC_LIBRARIES)
message(WARNING "Disable USE_STATIC_LIBRARIES if you have linking problems with OpenSSL on MacOS")

View File

@ -4,6 +4,7 @@
# include (cmake/limit_jobs.cmake)
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd
cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES)
option(PARALLEL_COMPILE_JOBS "Define the maximum number of concurrent compilation jobs" "")
if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
@ -12,7 +13,7 @@ if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_COMPILER_MEM
set (PARALLEL_COMPILE_JOBS 1)
endif ()
endif ()
if (PARALLEL_COMPILE_JOBS)
if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
@ -25,13 +26,12 @@ if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
set (PARALLEL_LINK_JOBS 1)
endif ()
endif ()
if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}")
endif ()
if (LLVM_PARALLEL_LINK_JOBS)
if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS})
endif ()
if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}")
endif ()

View File

@ -139,6 +139,7 @@ if (USE_INTERNAL_CAPNP_LIBRARY)
endif ()
if (USE_INTERNAL_POCO_LIBRARY)
set (POCO_VERBOSE_MESSAGES 0 CACHE INTERNAL "")
set (save_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
set (_save ${ENABLE_TESTS})

2
contrib/jemalloc vendored

@ -1 +1 @@
Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff
Subproject commit 41b7372eadee941b9164751b8d4963f915d3ceae

View File

@ -2,13 +2,17 @@ if (USE_INCLUDE_WHAT_YOU_USE)
set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})
endif ()
set (MAX_COMPILER_MEMORY 2500 CACHE INTERNAL "")
if (MAKE_STATIC_LIBRARIES)
set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
if(COMPILER_PIPE)
set(MAX_COMPILER_MEMORY 2500)
else()
set (MAX_LINKER_MEMORY 2500 CACHE INTERNAL "")
endif ()
include (../cmake/limit_jobs.cmake)
set(MAX_COMPILER_MEMORY 1500)
endif()
if(MAKE_STATIC_LIBRARIES)
set(MAX_LINKER_MEMORY 3500)
else()
set(MAX_LINKER_MEMORY 2500)
endif()
include(../cmake/limit_jobs.cmake)
include(cmake/find_vectorclass.cmake)
@ -16,7 +20,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h)
set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h)
include (cmake/version.cmake)
message (STATUS "Will build ${VERSION_FULL}")
message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}")
configure_file (src/Common/config.h.in ${CONFIG_COMMON})
configure_file (src/Common/config_version.h.in ${CONFIG_VERSION})

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54413 CACHE STRING "") # changed manually for tests
set(VERSION_MAJOR 19 CACHE STRING "")
set(VERSION_MINOR 1 CACHE STRING "")
set(VERSION_PATCH 0 CACHE STRING "")
set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067 CACHE STRING "")
set(VERSION_DESCRIBE v19.1.0-testing CACHE STRING "")
set(VERSION_STRING 19.1.0 CACHE STRING "")
set(VERSION_REVISION 54413)
set(VERSION_MAJOR 19)
set(VERSION_MINOR 1)
set(VERSION_PATCH 0)
set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067)
set(VERSION_DESCRIBE v19.1.0-testing)
set(VERSION_STRING 19.1.0)
# end of autochange
set(VERSION_EXTRA "" CACHE STRING "")
@ -19,8 +19,8 @@ if (VERSION_EXTRA)
string(CONCAT VERSION_STRING ${VERSION_STRING} "." ${VERSION_EXTRA})
endif ()
set (VERSION_NAME "${PROJECT_NAME}" CACHE STRING "")
set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}" CACHE STRING "")
set (VERSION_SO "${VERSION_STRING}" CACHE STRING "")
set (VERSION_NAME "${PROJECT_NAME}")
set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}")
set (VERSION_SO "${VERSION_STRING}")
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")

View File

@ -1,5 +1,5 @@
add_library (clickhouse-client-lib ${LINK_MODE} Client.cpp)
target_link_libraries (clickhouse-client-lib PRIVATE clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries (clickhouse-client-lib PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY})
if (READLINE_INCLUDE_DIR)
target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR})
endif ()

View File

@ -243,7 +243,7 @@ struct ClusterPartition
UInt64 rows_copied = 0;
UInt64 blocks_copied = 0;
size_t total_tries = 0;
UInt64 total_tries = 0;
};
@ -340,7 +340,7 @@ struct TaskCluster
String default_local_database;
/// Limits number of simultaneous workers
size_t max_workers = 0;
UInt64 max_workers = 0;
/// Base settings for pull and push
Settings settings_common;
@ -773,11 +773,11 @@ public:
}
template <typename T>
decltype(auto) retry(T && func, size_t max_tries = 100)
decltype(auto) retry(T && func, UInt64 max_tries = 100)
{
std::exception_ptr exception;
for (size_t try_number = 1; try_number <= max_tries; ++try_number)
for (UInt64 try_number = 1; try_number <= max_tries; ++try_number)
{
try
{
@ -880,7 +880,7 @@ public:
}
/// Compute set of partitions, assume set of partitions aren't changed during the processing
void discoverTablePartitions(TaskTable & task_table, size_t num_threads = 0)
void discoverTablePartitions(TaskTable & task_table, UInt64 num_threads = 0)
{
/// Fetch partitions list from a shard
{
@ -985,7 +985,7 @@ public:
/// Retry table processing
bool table_is_done = false;
for (size_t num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries)
for (UInt64 num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries)
{
if (tryProcessTable(task_table))
{
@ -1044,7 +1044,7 @@ protected:
String workers_path = getWorkersPath();
String current_worker_path = getCurrentWorkerNodePath();
size_t num_bad_version_errors = 0;
UInt64 num_bad_version_errors = 0;
while (true)
{
@ -1055,7 +1055,7 @@ protected:
auto version = stat.version;
zookeeper->get(workers_path, &stat);
if (static_cast<size_t>(stat.numChildren) >= task_cluster->max_workers)
if (static_cast<UInt64>(stat.numChildren) >= task_cluster->max_workers)
{
LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")"
<< ". Postpone processing " << description);
@ -1163,7 +1163,7 @@ protected:
}
// If all task is finished and zxid is not changed then partition could not become dirty again
for (size_t shard_num = 0; shard_num < status_paths.size(); ++shard_num)
for (UInt64 shard_num = 0; shard_num < status_paths.size(); ++shard_num)
{
if (zxid1[shard_num] != zxid2[shard_num])
{
@ -1280,7 +1280,7 @@ protected:
LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query);
/// Limit number of max executing replicas to 1
size_t num_shards = executeQueryOnCluster(cluster_push, query, nullptr, &settings_push, PoolMode::GET_ONE, 1);
UInt64 num_shards = executeQueryOnCluster(cluster_push, query, nullptr, &settings_push, PoolMode::GET_ONE, 1);
if (num_shards < cluster_push->getShardCount())
{
@ -1299,8 +1299,8 @@ protected:
}
static constexpr size_t max_table_tries = 1000;
static constexpr size_t max_shard_partition_tries = 600;
static constexpr UInt64 max_table_tries = 1000;
static constexpr UInt64 max_shard_partition_tries = 600;
bool tryProcessTable(TaskTable & task_table)
{
@ -1317,7 +1317,7 @@ protected:
Stopwatch watch;
TasksShard expected_shards;
size_t num_failed_shards = 0;
UInt64 num_failed_shards = 0;
++cluster_partition.total_tries;
@ -1368,7 +1368,7 @@ protected:
bool is_unprioritized_task = !previous_shard_is_instantly_finished && shard->priority.is_remote;
PartitionTaskStatus task_status = PartitionTaskStatus::Error;
bool was_error = false;
for (size_t try_num = 0; try_num < max_shard_partition_tries; ++try_num)
for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num)
{
task_status = tryProcessPartitionTask(partition, is_unprioritized_task);
@ -1434,8 +1434,8 @@ protected:
}
}
size_t required_partitions = task_table.cluster_partitions.size();
size_t finished_partitions = task_table.finished_cluster_partitions.size();
UInt64 required_partitions = task_table.cluster_partitions.size();
UInt64 finished_partitions = task_table.finished_cluster_partitions.size();
bool table_is_done = finished_partitions >= required_partitions;
if (!table_is_done)
@ -1645,7 +1645,7 @@ protected:
String query = queryToString(create_query_push_ast);
LOG_DEBUG(log, "Create destination tables. Query: " << query);
size_t shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push,
UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push,
PoolMode::GET_MANY);
LOG_DEBUG(log, "Destination tables " << getDatabaseDotTable(task_table.table_push) << " have been created on " << shards
<< " shards of " << task_table.cluster_push->getShardCount());
@ -1699,7 +1699,7 @@ protected:
std::future<Coordination::ExistsResponse> future_is_dirty_checker;
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
constexpr size_t check_period_milliseconds = 500;
constexpr UInt64 check_period_milliseconds = 500;
/// Will asynchronously check that ZooKeeper connection and is_dirty flag appearing while copy data
auto cancel_check = [&] ()
@ -1917,16 +1917,16 @@ protected:
/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
* Returns number of shards for which at least one replica executed query successfully
*/
size_t executeQueryOnCluster(
UInt64 executeQueryOnCluster(
const ClusterPtr & cluster,
const String & query,
const ASTPtr & query_ast_ = nullptr,
const Settings * settings = nullptr,
PoolMode pool_mode = PoolMode::GET_ALL,
size_t max_successful_executions_per_shard = 0) const
UInt64 max_successful_executions_per_shard = 0) const
{
auto num_shards = cluster->getShardsInfo().size();
std::vector<size_t> per_shard_num_successful_replicas(num_shards, 0);
std::vector<UInt64> per_shard_num_successful_replicas(num_shards, 0);
ASTPtr query_ast;
if (query_ast_ == nullptr)
@ -1939,10 +1939,10 @@ protected:
/// We need to execute query on one replica at least
auto do_for_shard = [&] (size_t shard_index)
auto do_for_shard = [&] (UInt64 shard_index)
{
const Cluster::ShardInfo & shard = cluster->getShardsInfo().at(shard_index);
size_t & num_successful_executions = per_shard_num_successful_replicas.at(shard_index);
UInt64 & num_successful_executions = per_shard_num_successful_replicas.at(shard_index);
num_successful_executions = 0;
auto increment_and_check_exit = [&] ()
@ -1951,12 +1951,12 @@ protected:
return max_successful_executions_per_shard && num_successful_executions >= max_successful_executions_per_shard;
};
size_t num_replicas = cluster->getShardsAddresses().at(shard_index).size();
size_t num_local_replicas = shard.getLocalNodeCount();
size_t num_remote_replicas = num_replicas - num_local_replicas;
UInt64 num_replicas = cluster->getShardsAddresses().at(shard_index).size();
UInt64 num_local_replicas = shard.getLocalNodeCount();
UInt64 num_remote_replicas = num_replicas - num_local_replicas;
/// In that case we don't have local replicas, but do it just in case
for (size_t i = 0; i < num_local_replicas; ++i)
for (UInt64 i = 0; i < num_local_replicas; ++i)
{
auto interpreter = InterpreterFactory::get(query_ast, context);
interpreter->execute();
@ -1997,16 +1997,16 @@ protected:
};
{
ThreadPool thread_pool(std::min(num_shards, getNumberOfPhysicalCPUCores()));
ThreadPool thread_pool(std::min<UInt64>(num_shards, getNumberOfPhysicalCPUCores()));
for (size_t shard_index = 0; shard_index < num_shards; ++shard_index)
for (UInt64 shard_index = 0; shard_index < num_shards; ++shard_index)
thread_pool.schedule([=] { do_for_shard(shard_index); });
thread_pool.wait();
}
size_t successful_shards = 0;
for (size_t num_replicas : per_shard_num_successful_replicas)
UInt64 successful_shards = 0;
for (UInt64 num_replicas : per_shard_num_successful_replicas)
successful_shards += (num_replicas > 0);
return successful_shards;

View File

@ -123,7 +123,7 @@ UInt64 hash(Ts... xs)
UInt64 maskBits(UInt64 x, size_t num_bits)
{
return x & ((1 << num_bits) - 1);
return x & ((1ULL << num_bits) - 1);
}
@ -149,7 +149,7 @@ UInt64 feistelNetwork(UInt64 x, size_t num_bits, UInt64 seed, size_t num_rounds
UInt64 bits = maskBits(x, num_bits);
for (size_t i = 0; i < num_rounds; ++i)
bits = feistelRound(bits, num_bits, seed, i);
return (x & ~((1 << num_bits) - 1)) ^ bits;
return (x & ~((1ULL << num_bits) - 1)) ^ bits;
}

View File

@ -9,7 +9,7 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE}
validateODBCConnectionString.cpp
)
target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_common_io daemon dbms)
target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_dictionaries daemon dbms clickhouse_common_io)
target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
if (USE_POCO_SQLODBC)

View File

@ -411,7 +411,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setMarkCache(mark_cache_size);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", std::numeric_limits<UInt64>::max());
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500);
if (compiled_expression_cache_size)
global_context->setCompiledExpressionCache(compiled_expression_cache_size);
#endif

View File

@ -3,7 +3,6 @@
#include "CurrentThread.h"
#include <common/logger_useful.h>
#include <Common/ThreadStatus.h>
#include <Common/ObjectPool.h>
#include <Common/TaskStatsInfoGetter.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/Context.h>
@ -24,8 +23,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
SimpleObjectPool<TaskStatsInfoGetter> task_stats_info_getter_pool;
// Smoker's implementation to avoid thread_local usage: error: undefined symbol: __cxa_thread_atexit
#if defined(ARCADIA_ROOT)
struct ThreadStatusPtrHolder : ThreadStatusPtr

View File

@ -36,7 +36,7 @@ namespace
if (0 != pipe2(fds_rw, O_CLOEXEC))
DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE);
#else
if (0 != pipe(fds))
if (0 != pipe(fds_rw))
DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE);
if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC))
DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE);

View File

@ -21,9 +21,6 @@ namespace ErrorCodes
}
extern SimpleObjectPool<TaskStatsInfoGetter> task_stats_info_getter_pool;
TasksStatsCounters TasksStatsCounters::current()
{
TasksStatsCounters res;
@ -74,7 +71,7 @@ void ThreadStatus::initPerformanceCounters()
if (TaskStatsInfoGetter::checkPermissions())
{
if (!taskstats_getter)
taskstats_getter = task_stats_info_getter_pool.getDefault();
taskstats_getter = std::make_unique<TaskStatsInfoGetter>();
*last_taskstats = TasksStatsCounters::current();
}

View File

@ -2,7 +2,6 @@
#include <Common/ProfileEvents.h>
#include <Common/MemoryTracker.h>
#include <Common/ObjectPool.h>
#include <IO/Progress.h>
@ -175,8 +174,7 @@ protected:
std::unique_ptr<TasksStatsCounters> last_taskstats;
/// Set to non-nullptr only if we have enough capabilities.
/// We use pool because creation and destruction of TaskStatsInfoGetter objects are expensive.
SimpleObjectPool<TaskStatsInfoGetter>::Pointer taskstats_getter;
std::unique_ptr<TaskStatsInfoGetter> taskstats_getter;
};
}

View File

@ -1039,8 +1039,8 @@ void ZooKeeper::sendThread()
{
/// Wait for the next request in queue. No more than operation timeout. No more than until next heartbeat time.
UInt64 max_wait = std::min(
std::chrono::duration_cast<std::chrono::milliseconds>(next_heartbeat_time - now).count(),
operation_timeout.totalMilliseconds());
UInt64(std::chrono::duration_cast<std::chrono::milliseconds>(next_heartbeat_time - now).count()),
UInt64(operation_timeout.totalMilliseconds()));
RequestInfo info;
if (requests_queue.tryPop(info, max_wait))

View File

@ -16,6 +16,7 @@
#cmakedefine01 USE_BASE64
#cmakedefine01 USE_HDFS
#cmakedefine01 USE_XXHASH
#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 LLVM_HAS_RTTI

View File

@ -1,4 +1,4 @@
#include <TableFunctions/parseRemoteDescription.h>
#include "parseRemoteDescription.h"
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>

View File

@ -63,7 +63,7 @@ CompressionCodecZSTD::CompressionCodecZSTD(int level_)
void registerCodecZSTD(CompressionCodecFactory & factory)
{
UInt8 method_code = static_cast<char>(CompressionMethodByte::ZSTD);
UInt8 method_code = UInt8(CompressionMethodByte::ZSTD);
factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr
{
int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL;

View File

@ -78,7 +78,7 @@ Block MergeSortingBlockInputStream::readImpl()
if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort)
{
Poco::File(tmp_path).createDirectories();
temporary_files.emplace_back(new Poco::TemporaryFile(tmp_path));
temporary_files.emplace_back(std::make_unique<Poco::TemporaryFile>(tmp_path));
const std::string & path = temporary_files.back()->path();
WriteBufferFromFile file_buf(path);
CompressedWriteBuffer compressed_buf(file_buf);

View File

@ -713,7 +713,7 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
readIntBinary(low_cardinality_state->num_pending_rows, *indexes_stream);
}
size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows);
size_t num_rows_to_read = std::min<UInt64>(limit, low_cardinality_state->num_pending_rows);
readIndexes(num_rows_to_read);
limit -= num_rows_to_read;
low_cardinality_state->num_pending_rows -= num_rows_to_read;

View File

@ -20,9 +20,7 @@ endif()
if(USE_POCO_SQLODBC)
target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY})
if (NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR} ${Poco_SQL_INCLUDE_DIR})
endif()
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR} ${Poco_SQL_INCLUDE_DIR})
endif()
if(Poco_Data_FOUND)
@ -31,9 +29,7 @@ endif()
if(USE_POCO_DATAODBC)
target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_DataODBC_LIBRARY} ${Poco_Data_LIBRARY})
if (NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR})
endif()
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR})
endif()
if(USE_POCO_MONGODB)

View File

@ -73,7 +73,7 @@ public:
{
size_t language_id = static_cast<size_t>(language);
if (region_id > names_refs[language_id].size())
if (region_id >= names_refs[language_id].size())
return StringRef("", 0);
StringRef ref = names_refs[language_id][region_id];

View File

@ -61,7 +61,7 @@ void PrettyBlockOutputStream::calculateWidths(
elem.type->serializeText(*elem.column, j, out, format_settings);
}
widths[i][j] = std::min(format_settings.pretty.max_column_pad_width,
widths[i][j] = std::min<UInt64>(format_settings.pretty.max_column_pad_width,
UTF8::computeWidth(reinterpret_cast<const UInt8 *>(serialized_value.data()), serialized_value.size(), prefix));
max_widths[i] = std::max(max_widths[i], widths[i][j]);
}
@ -69,7 +69,7 @@ void PrettyBlockOutputStream::calculateWidths(
/// And also calculate widths for names of columns.
{
// name string doesn't contain Tab, no need to pass `prefix`
name_widths[i] = std::min(format_settings.pretty.max_column_pad_width,
name_widths[i] = std::min<UInt64>(format_settings.pretty.max_column_pad_width,
UTF8::computeWidth(reinterpret_cast<const UInt8 *>(elem.name.data()), elem.name.size()));
max_widths[i] = std::max(max_widths[i], name_widths[i]);
}

View File

@ -47,9 +47,11 @@ if (ENABLE_TESTS)
endif ()
if (USE_EMBEDDED_COMPILER)
target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
llvm_libs_all(REQUIRED_LLVM_LIBRARIES)
target_link_libraries(clickhouse_functions PRIVATE ${REQUIRED_LLVM_LIBRARIES})
target_include_directories(clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
endif ()
if (USE_BASE64)
target_include_directories (clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR})
if(USE_BASE64)
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR})
endif()

View File

@ -512,8 +512,8 @@ static std::optional<DataTypes> removeNullables(const DataTypes & types)
if (!typeid_cast<const DataTypeNullable *>(type.get()))
continue;
DataTypes filtered;
for (const auto & type : types)
filtered.emplace_back(removeNullable(type));
for (const auto & sub_type : types)
filtered.emplace_back(removeNullable(sub_type));
return filtered;
}
return {};

View File

@ -132,7 +132,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers &
has_column = remote_columns.hasPhysical(column_name);
}
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, has_column);
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, Field(has_column));
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <atomic>
#include <cstddef>
#include <common/Types.h>
#include <Core/Defines.h>

View File

@ -48,7 +48,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions(
source_column_names.emplace_back(column.name_and_type);
ASTPtr query = expression_list;
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, source_column_names, required_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns);
ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns);
auto joined_block_actions = analyzer.getActions(false);

View File

@ -137,10 +137,7 @@ void AsynchronousMetrics::update()
#if USE_EMBEDDED_COMPILER
{
if (auto compiled_expression_cache = context.getCompiledExpressionCache())
{
set("CompiledExpressionCacheBytes", compiled_expression_cache->weight());
set("CompiledExpressionCacheCount", compiled_expression_cache->count());
}
}
#endif

View File

@ -16,18 +16,21 @@ struct ColumnNamesContext
{
struct JoinedTable
{
const ASTTableExpression * expr;
const ASTTableJoin * join;
const ASTTableExpression * expr = nullptr;
const ASTTableJoin * join = nullptr;
std::optional<String> alias() const
{
String alias;
if (expr->database_and_table_name)
alias = expr->database_and_table_name->tryGetAlias();
else if (expr->table_function)
alias = expr->table_function->tryGetAlias();
else if (expr->subquery)
alias = expr->subquery->tryGetAlias();
if (expr)
{
if (expr->database_and_table_name)
alias = expr->database_and_table_name->tryGetAlias();
else if (expr->table_function)
alias = expr->table_function->tryGetAlias();
else if (expr->subquery)
alias = expr->subquery->tryGetAlias();
}
if (!alias.empty())
return alias;
return {};
@ -35,9 +38,10 @@ struct ColumnNamesContext
std::optional<String> name() const
{
if (auto * node = expr->database_and_table_name.get())
if (auto * identifier = typeid_cast<const ASTIdentifier *>(node))
return identifier->name;
if (expr)
if (auto * node = expr->database_and_table_name.get())
if (auto * identifier = typeid_cast<const ASTIdentifier *>(node))
return identifier->name;
return {};
}

View File

@ -37,7 +37,7 @@ static ASTPtr addTypeConversion(std::unique_ptr<ASTLiteral> && ast, const String
return res;
}
bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
{
/// Processed
if (typeid_cast<ASTSubquery *>(node.get()) ||
@ -48,6 +48,14 @@ bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr
if (typeid_cast<ASTTableExpression *>(node.get()))
return false;
if (typeid_cast<ASTSelectQuery *>(node.get()))
{
/// Do not go to FROM, JOIN, UNION.
if (typeid_cast<ASTTableExpression *>(child.get()) ||
typeid_cast<ASTSelectQuery *>(child.get()))
return false;
}
return true;
}

View File

@ -161,21 +161,21 @@ auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr)
// Actually this should work for 7.0.0 but now we have OLDER 7.0.0svn in contrib
auto flags = [&](const llvm::orc::SymbolNameSet & symbols)
{
llvm::orc::SymbolFlagsMap flags;
llvm::orc::SymbolFlagsMap flags_map;
for (const auto & symbol : symbols)
{
auto resolved = jsr.lookupFlags({*symbol});
if (resolved && resolved->size())
flags.emplace(symbol, resolved->begin()->second);
flags_map.emplace(symbol, resolved->begin()->second);
}
return flags;
return flags_map;
};
#endif
auto symbols = [&](std::shared_ptr<llvm::orc::AsynchronousSymbolQuery> query, llvm::orc::SymbolNameSet symbols)
auto symbols = [&](std::shared_ptr<llvm::orc::AsynchronousSymbolQuery> query, llvm::orc::SymbolNameSet symbols_set)
{
llvm::orc::SymbolNameSet missing;
for (const auto & symbol : symbols)
for (const auto & symbol : symbols_set)
{
auto resolved = jsr.lookup({*symbol});
if (resolved && resolved->size())
@ -189,70 +189,36 @@ auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr)
}
#endif
#if LLVM_VERSION_MAJOR >= 6
struct CountingMMapper final : public llvm::SectionMemoryManager::MemoryMapper
{
MemoryTracker memory_tracker{VariableContext::Global};
llvm::sys::MemoryBlock allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose /*purpose*/,
size_t num_bytes,
const llvm::sys::MemoryBlock * const near_block,
unsigned flags,
std::error_code & error_code) override
{
memory_tracker.alloc(num_bytes);
return llvm::sys::Memory::allocateMappedMemory(num_bytes, near_block, flags, error_code);
}
std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & block, unsigned flags) override
{
return llvm::sys::Memory::protectMappedMemory(block, flags);
}
std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & block) override
{
memory_tracker.free(block.size());
return llvm::sys::Memory::releaseMappedMemory(block);
}
};
#if LLVM_VERSION_MAJOR >= 7
using ModulePtr = std::unique_ptr<llvm::Module>;
#else
using ModulePtr = std::shared_ptr<llvm::Module>;
#endif
struct LLVMContext
{
static inline std::atomic<size_t> id_counter{0};
llvm::LLVMContext context;
std::shared_ptr<llvm::LLVMContext> context;
#if LLVM_VERSION_MAJOR >= 7
llvm::orc::ExecutionSession execution_session;
std::unique_ptr<llvm::Module> module;
#else
std::shared_ptr<llvm::Module> module;
#endif
ModulePtr module;
std::unique_ptr<llvm::TargetMachine> machine;
#if LLVM_VERSION_MAJOR >= 6
std::unique_ptr<CountingMMapper> memory_mapper;
#endif
std::shared_ptr<llvm::SectionMemoryManager> memory_manager;
llvm::orc::RTDyldObjectLinkingLayer object_layer;
llvm::orc::IRCompileLayer<decltype(object_layer), llvm::orc::SimpleCompiler> compile_layer;
llvm::DataLayout layout;
llvm::IRBuilder<> builder;
std::unordered_map<std::string, void *> symbols;
size_t id;
LLVMContext()
: context(std::make_shared<llvm::LLVMContext>())
#if LLVM_VERSION_MAJOR >= 7
: module(std::make_unique<llvm::Module>("jit", context))
, module(std::make_unique<llvm::Module>("jit", *context))
#else
: module(std::make_shared<llvm::Module>("jit", context))
, module(std::make_shared<llvm::Module>("jit", *context))
#endif
, machine(getNativeMachine())
#if LLVM_VERSION_MAJOR >= 6
, memory_mapper(std::make_unique<CountingMMapper>())
, memory_manager(std::make_shared<llvm::SectionMemoryManager>(memory_mapper.get()))
#else
, memory_manager(std::make_shared<llvm::SectionMemoryManager>())
#endif
#if LLVM_VERSION_MAJOR >= 7
, object_layer(execution_session, [this](llvm::orc::VModuleKey)
{
@ -263,32 +229,31 @@ struct LLVMContext
#endif
, compile_layer(object_layer, llvm::orc::SimpleCompiler(*machine))
, layout(machine->createDataLayout())
, builder(context)
, id(id_counter++)
, builder(*context)
{
module->setDataLayout(layout);
module->setTargetTriple(machine->getTargetTriple().getTriple());
}
/// returns used memory
size_t compileAllFunctionsToNativeCode()
void compileAllFunctionsToNativeCode()
{
if (!module->size())
return 0;
llvm::PassManagerBuilder builder;
return;
llvm::PassManagerBuilder pass_manager_builder;
llvm::legacy::PassManager mpm;
llvm::legacy::FunctionPassManager fpm(module.get());
builder.OptLevel = 3;
builder.SLPVectorize = true;
builder.LoopVectorize = true;
builder.RerollLoops = true;
builder.VerifyInput = true;
builder.VerifyOutput = true;
machine->adjustPassManager(builder);
pass_manager_builder.OptLevel = 3;
pass_manager_builder.SLPVectorize = true;
pass_manager_builder.LoopVectorize = true;
pass_manager_builder.RerollLoops = true;
pass_manager_builder.VerifyInput = true;
pass_manager_builder.VerifyOutput = true;
machine->adjustPassManager(pass_manager_builder);
fpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
mpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
builder.populateFunctionPassManager(fpm);
builder.populateModulePassManager(mpm);
pass_manager_builder.populateFunctionPassManager(fpm);
pass_manager_builder.populateModulePassManager(mpm);
fpm.doInitialization();
for (auto & function : *module)
fpm.run(function);
@ -323,26 +288,20 @@ struct LLVMContext
throw Exception("Function " + name + " failed to link", ErrorCodes::CANNOT_COMPILE_CODE);
symbols[name] = reinterpret_cast<void *>(*address);
}
#if LLVM_VERSION_MAJOR >= 6
return memory_mapper->memory_tracker.get();
#else
return 0;
#endif
}
};
class LLVMPreparedFunction : public PreparedFunctionImpl
{
std::string name;
std::shared_ptr<LLVMContext> context;
void * function;
public:
LLVMPreparedFunction(std::string name_, std::shared_ptr<LLVMContext> context)
: name(std::move(name_)), context(context)
LLVMPreparedFunction(const std::string & name_, const std::unordered_map<std::string, void *> & symbols)
: name(name_)
{
auto it = context->symbols.find(name);
if (context->symbols.end() == it)
auto it = symbols.find(name);
if (symbols.end() == it)
throw Exception("Cannot find symbol " + name + " in LLVMContext", ErrorCodes::LOGICAL_ERROR);
function = it->second;
}
@ -373,16 +332,16 @@ public:
}
};
static void compileFunctionToLLVMByteCode(std::shared_ptr<LLVMContext> & context, const IFunctionBase & f)
static void compileFunctionToLLVMByteCode(LLVMContext & context, const IFunctionBase & f)
{
ProfileEvents::increment(ProfileEvents::CompileFunction);
auto & arg_types = f.getArgumentTypes();
auto & b = context->builder;
auto & b = context.builder;
auto * size_type = b.getIntNTy(sizeof(size_t) * 8);
auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type);
auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, data_type->getPointerTo() }, /*isVarArg=*/false);
auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, f.getName(), context->module.get());
auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, f.getName(), context.module.get());
auto args = func->args().begin();
llvm::Value * counter_arg = &*args++;
llvm::Value * columns_arg = &*args++;
@ -504,12 +463,21 @@ static CompilableExpression subexpression(const IFunctionBase & f, std::vector<C
};
}
LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, std::shared_ptr<LLVMContext> context, const Block & sample_block)
: name(actions.back().result_name), context(context)
struct LLVMModuleState
{
std::unordered_map<std::string, void *> symbols;
std::shared_ptr<llvm::LLVMContext> major_context;
std::shared_ptr<llvm::SectionMemoryManager> memory_manager;
};
LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, const Block & sample_block)
: name(actions.back().result_name)
, module_state(std::make_unique<LLVMModuleState>())
{
LLVMContext context;
for (const auto & c : sample_block)
/// TODO: implement `getNativeValue` for all types & replace the check with `c.column && toNativeType(...)`
if (c.column && getNativeValue(toNativeType(context->builder, c.type), *c.column, 0))
if (c.column && getNativeValue(toNativeType(context.builder, c.type), *c.column, 0))
subexpressions[c.name] = subexpression(c.column, c.type);
for (const auto & action : actions)
{
@ -530,6 +498,11 @@ LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, std::shar
originals.push_back(action.function_base);
}
compileFunctionToLLVMByteCode(context, *this);
context.compileAllFunctionsToNativeCode();
module_state->symbols = context.symbols;
module_state->major_context = context.context;
module_state->memory_manager = context.memory_manager;
}
llvm::Value * LLVMFunction::compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const
@ -540,8 +513,7 @@ llvm::Value * LLVMFunction::compile(llvm::IRBuilderBase & builder, ValuePlacehol
return it->second(builder, values);
}
PreparedFunctionPtr LLVMFunction::prepare(const Block &, const ColumnNumbers &, size_t) const { return std::make_shared<LLVMPreparedFunction>(name, context); }
PreparedFunctionPtr LLVMFunction::prepare(const Block &, const ColumnNumbers &, size_t) const { return std::make_shared<LLVMPreparedFunction>(name, module_state->symbols); }
bool LLVMFunction::isDeterministic() const
{
@ -622,28 +594,6 @@ static bool isCompilable(const IFunctionBase & function)
return function.isCompilable();
}
size_t CompiledExpressionCache::weight() const
{
#if LLVM_VERSION_MAJOR >= 6
std::lock_guard lock(mutex);
size_t result{0};
std::unordered_set<size_t> seen;
for (const auto & cell : cells)
{
auto function_context = cell.second.value->getContext();
if (!seen.count(function_context->id))
{
result += function_context->memory_mapper->memory_tracker.get();
seen.insert(function_context->id);
}
}
return result;
#else
return Base::weight();
#endif
}
std::vector<std::unordered_set<std::optional<size_t>>> getActionsDependents(const ExpressionActions::Actions & actions, const Names & output_columns)
{
/// an empty optional is a poisoned value prohibiting the column's producer from being removed
@ -748,21 +698,16 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output
std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&inlined_func=std::as_const(fused[i]), &sample_block] ()
{
Stopwatch watch;
std::shared_ptr<LLVMContext> context = std::make_shared<LLVMContext>();
auto result_fn = std::make_shared<LLVMFunction>(inlined_func, context, sample_block);
size_t used_memory = context->compileAllFunctionsToNativeCode();
ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory);
std::shared_ptr<LLVMFunction> result_fn;
result_fn = std::make_shared<LLVMFunction>(inlined_func, sample_block);
ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds());
return result_fn;
});
}
else
{
std::shared_ptr<LLVMContext> context = std::make_shared<LLVMContext>();
Stopwatch watch;
fn = std::make_shared<LLVMFunction>(fused[i], context, sample_block);
size_t used_memory = context->compileAllFunctionsToNativeCode();
ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory);
fn = std::make_shared<LLVMFunction>(fused[i], sample_block);
ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds());
}

View File

@ -14,19 +14,23 @@
namespace DB
{
struct LLVMContext;
using CompilableExpression = std::function<llvm::Value * (llvm::IRBuilderBase &, const ValuePlaceholders &)>;
struct LLVMModuleState;
class LLVMFunction : public IFunctionBase
{
std::string name;
Names arg_names;
DataTypes arg_types;
std::shared_ptr<LLVMContext> context;
std::vector<FunctionBasePtr> originals;
std::unordered_map<StringRef, CompilableExpression> subexpressions;
std::unique_ptr<LLVMModuleState> module_state;
public:
LLVMFunction(const ExpressionActions::Actions & actions, std::shared_ptr<LLVMContext> context, const Block & sample_block);
LLVMFunction(const ExpressionActions::Actions & actions, const Block & sample_block);
bool isCompilable() const override { return true; }
@ -54,8 +58,7 @@ public:
Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override;
std::shared_ptr<LLVMContext> getContext() const { return context; }
const LLVMModuleState * getLLVMModuleState() const { return module_state.get(); }
};
/** This child of LRUCache breaks one of it's invariants: total weight may be changed after insertion.
@ -63,13 +66,9 @@ public:
*/
class CompiledExpressionCache : public LRUCache<UInt128, LLVMFunction, UInt128Hash>
{
private:
using Base = LRUCache<UInt128, LLVMFunction, UInt128Hash>;
public:
using Base = LRUCache<UInt128, LLVMFunction, UInt128Hash>;
using Base::Base;
size_t weight() const;
};
/// For each APPLY_FUNCTION action, try to compile the function to native code; if the only uses of a compilable

View File

@ -242,7 +242,7 @@ static ColumnsDeclarationAndModifiers parseColumns(const ASTExpressionList & col
/// set missing types and wrap default_expression's in a conversion-function if necessary
if (!defaulted_columns.empty())
{
auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, columns);
auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, columns);
const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true);
const auto block = actions->getSampleBlock();

View File

@ -99,7 +99,6 @@ BlockIO InterpreterInsertQuery::execute()
out = std::make_shared<PushingToViewsBlockOutputStream>(query.database, query.table, table, context, query_ptr, query.no_destination);
/// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side.
/// Client-side bufferization might cause excessive timeouts (especially in case of big blocks).
if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()))

View File

@ -184,8 +184,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (storage)
table_lock = storage->lockStructure(false);
syntax_analyzer_result = SyntaxAnalyzer(context, storage)
.analyze(query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth);
syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze(
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
query_analyzer = std::make_unique<ExpressionAnalyzer>(
query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze);
@ -792,7 +792,7 @@ void InterpreterSelectQuery::executeFetchColumns(
}
auto additional_source_columns_set = ext::map<NameSet>(additional_source_columns, [] (const auto & it) { return it.name; });
auto syntax_result = SyntaxAnalyzer(context, storage).analyze(required_columns_expr_list, additional_source_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(required_columns_expr_list, additional_source_columns, {}, storage);
alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActions(true);
/// The set of required columns could be added as a result of adding an action to calculate ALIAS.
@ -829,7 +829,7 @@ void InterpreterSelectQuery::executeFetchColumns(
}
prewhere_info->prewhere_actions = std::move(new_actions);
auto analyzed_result = SyntaxAnalyzer(context, {}).analyze(required_prewhere_columns_expr_list, storage->getColumns().getAllPhysical());
auto analyzed_result = SyntaxAnalyzer(context).analyze(required_prewhere_columns_expr_list, storage->getColumns().getAllPhysical());
prewhere_info->alias_actions =
ExpressionAnalyzer(required_prewhere_columns_expr_list, analyzed_result, context)
.getActions(true, false);

View File

@ -21,6 +21,7 @@
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <csignal>
#include <algorithm>
namespace DB
@ -289,7 +290,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context)
if (replica_names.empty())
return;
ThreadPool pool(std::min(getNumberOfPhysicalCPUCores(), replica_names.size()));
ThreadPool pool(std::min(size_t(getNumberOfPhysicalCPUCores()), replica_names.size()));
for (auto & table : replica_names)
pool.schedule([&] () { tryRestartReplica(table.first, table.second, system_context); });
pool.wait();

View File

@ -194,7 +194,7 @@ void MutationsInterpreter::prepare(bool dry_run)
if (col_default.kind == ColumnDefaultKind::Materialized)
{
auto query = col_default.expression->clone();
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns);
ExpressionAnalyzer analyzer(query, syntax_result, context);
for (const String & dependency : analyzer.getRequiredSourceColumns())
{
@ -203,10 +203,9 @@ void MutationsInterpreter::prepare(bool dry_run)
}
}
}
}
if (!updated_columns.empty())
validateUpdateColumns(storage, updated_columns, column_to_affected_materialized);
}
/// First, break a sequence of commands into stages.
stages.emplace_back(context);
@ -301,7 +300,7 @@ void MutationsInterpreter::prepare(bool dry_run)
for (const String & column : stage.output_columns)
all_asts->children.push_back(std::make_shared<ASTIdentifier>(column));
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(all_asts, all_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(all_asts, all_columns);
stage.analyzer = std::make_unique<ExpressionAnalyzer>(all_asts, syntax_result, context);
ExpressionActionsChain & actions_chain = stage.expressions_chain;

View File

@ -0,0 +1,108 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTExpressionList.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value)
{
/// numeric constant in condition
if (const ASTLiteral * literal = typeid_cast<ASTLiteral *>(condition.get()))
{
if (literal->value.getType() == Field::Types::Int64 ||
literal->value.getType() == Field::Types::UInt64)
{
value = literal->value.get<Int64>();
return true;
}
}
/// cast of numeric constant in condition to UInt8
if (const ASTFunction * function = typeid_cast<ASTFunction * >(condition.get()))
{
if (function->name == "CAST")
{
if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(function->arguments.get()))
{
const ASTPtr & type_ast = expr_list->children.at(1);
if (const ASTLiteral * type_literal = typeid_cast<ASTLiteral *>(type_ast.get()))
{
if (type_literal->value.getType() == Field::Types::String &&
type_literal->value.get<std::string>() == "UInt8")
return tryExtractConstValueFromCondition(expr_list->children.at(0), value);
}
}
}
}
return false;
}
void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast)
{
if (!current_ast)
return;
for (ASTPtr & child : current_ast->children)
{
auto * function_node = typeid_cast<ASTFunction *>(child.get());
if (!function_node || function_node->name != "if")
{
visit(child);
continue;
}
visit(function_node->arguments);
auto * args = typeid_cast<ASTExpressionList *>(function_node->arguments.get());
if (args->children.size() != 3)
throw Exception("Wrong number of arguments for function 'if' (" + toString(args->children.size()) + " instead of 3)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTPtr condition_expr = args->children[0];
ASTPtr then_expr = args->children[1];
ASTPtr else_expr = args->children[2];
bool condition;
if (tryExtractConstValueFromCondition(condition_expr, condition))
{
ASTPtr replace_ast = condition ? then_expr : else_expr;
ASTPtr child_copy = child;
String replace_alias = replace_ast->tryGetAlias();
String if_alias = child->tryGetAlias();
if (replace_alias.empty())
{
replace_ast->setAlias(if_alias);
child = replace_ast;
}
else
{
/// Only copy of one node is required here.
/// But IAST has only method for deep copy of subtree.
/// This can be a reason of performance degradation in case of deep queries.
ASTPtr replace_ast_deep_copy = replace_ast->clone();
replace_ast_deep_copy->setAlias(if_alias);
child = replace_ast_deep_copy;
}
if (!if_alias.empty())
{
auto alias_it = aliases.find(if_alias);
if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get())
alias_it->second = child;
}
}
}
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <unordered_map>
#include <Parsers/IAST.h>
namespace DB
{
/// It removes Function_if node from AST if condition is constant.
/// TODO: rewrite with InDepthNodeVisitor
class OptimizeIfWithConstantConditionVisitor
{
public:
using Aliases = std::unordered_map<String, ASTPtr>;
OptimizeIfWithConstantConditionVisitor(Aliases & aliases_)
: aliases(aliases_)
{}
void visit(ASTPtr & ast);
private:
Aliases & aliases;
};
}

View File

@ -14,6 +14,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_ELEMENT_IN_AST;
}
static constexpr auto and_function_name = "and";
PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
@ -400,6 +406,8 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que
DatabaseAndTableWithAlias database_and_table_name(*database_and_table_ast);
storage = context.getTable(database_and_table_name.database, database_and_table_name.table);
}
else
throw Exception("Logical error: unexpected table expression", ErrorCodes::LOGICAL_ERROR);
const auto block = storage->getSampleBlock();
for (size_t idx = 0; idx < block.columns(); idx++)

View File

@ -14,12 +14,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}
using PredicateExpressions = std::vector<ASTPtr>;
using ProjectionWithAlias = std::pair<ASTPtr, String>;
using ProjectionsWithAliases = std::vector<ProjectionWithAlias>;

View File

@ -11,6 +11,7 @@
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTLiteral.h>
@ -34,7 +35,6 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ALIAS_REQUIRED;
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
extern const int EMPTY_NESTED_TABLE;
@ -42,141 +42,6 @@ namespace ErrorCodes
extern const int INVALID_JOIN_ON_EXPRESSION;
}
namespace
{
using LogAST = DebugASTLog<false>; /// set to true to enable logs
using Aliases = SyntaxAnalyzerResult::Aliases;
/// Add columns from storage to source_columns list.
void collectSourceColumns(ASTSelectQuery * select_query, const Context & context,
StoragePtr & storage, NamesAndTypesList & source_columns);
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context);
/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases.
void normalizeTree(
ASTPtr & query,
SyntaxAnalyzerResult & result,
const Names & source_columns,
const NameSet & source_columns_set,
const StoragePtr & storage,
const Context & context,
const ASTSelectQuery * select_query,
bool asterisk_left_columns_only);
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns);
/// Replacing scalar subqueries with constant values.
void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query,
const Context & context, size_t subquery_depth);
/// Remove Function_if AST if condition is constant.
void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases);
/// Eliminates injective function calls and constant expressions from group by statement.
void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context);
/// Remove duplicate items from ORDER BY.
void optimizeOrderBy(const ASTSelectQuery * select_query);
/// Remove duplicate items from LIMIT BY.
void optimizeLimitBy(const ASTSelectQuery * select_query);
/// Remove duplicated columns from USING(...).
void optimizeUsing(const ASTSelectQuery * select_query);
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const Names & source_columns, const NameSet & source_columns_set);
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context);
/// Find the columns that are obtained by JOIN.
void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context);
}
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
ASTPtr & query,
const NamesAndTypesList & source_columns_,
const Names & required_result_columns,
size_t subquery_depth) const
{
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns_;
auto * select_query = typeid_cast<ASTSelectQuery *>(query.get());
collectSourceColumns(select_query, context, result.storage, result.source_columns);
const auto & settings = context.getSettingsRef();
Names source_columns_list;
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
source_columns_list.emplace_back(type_name.name);
NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end());
translateQualifiedNames(query, select_query, source_columns_set, context);
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
InJoinSubqueriesPreprocessor(context).process(select_query);
/// Optimizes logical expressions.
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
/// Creates a dictionary `aliases`: alias -> ASTPtr
{
LogAST log;
QueryAliasesVisitor::Data query_aliases_data{result.aliases};
QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query);
}
/// Common subexpression elimination. Rewrite rules.
normalizeTree(query, result, source_columns_list, source_columns_set, result.storage,
context, select_query, settings.asterisk_left_columns_only != 0);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, select_query, context, subquery_depth);
/// Optimize if with constant condition after constants was substituted instead of sclalar subqueries.
optimizeIfWithConstantCondition(query, result.aliases);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
/// Remove duplicate items from ORDER BY.
optimizeOrderBy(select_query);
// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context);
return std::make_shared<const SyntaxAnalyzerResult>(result);
}
void removeDuplicateColumns(NamesAndTypesList & columns)
{
std::set<String> names;
@ -192,15 +57,12 @@ void removeDuplicateColumns(NamesAndTypesList & columns)
namespace
{
void collectSourceColumns(ASTSelectQuery * select_query, const Context & context,
StoragePtr & storage, NamesAndTypesList & source_columns)
{
if (!storage && select_query)
{
if (auto db_and_table = getDatabaseAndTable(*select_query, 0))
storage = context.tryGetTable(db_and_table->database, db_and_table->table);
}
using LogAST = DebugASTLog<false>; /// set to true to enable logs
/// Add columns from storage to source_columns list.
void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, NamesAndTypesList & source_columns)
{
if (storage)
{
auto physical_columns = storage->getColumns().getAllPhysical();
@ -219,10 +81,11 @@ void collectSourceColumns(ASTSelectQuery * select_query, const Context & context
removeDuplicateColumns(source_columns);
}
/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names.
void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
{
if (!select_query || !select_query->tables || select_query->tables->children.empty())
if (!select_query->tables || select_query->tables->children.empty())
return;
std::vector<DatabaseAndTableWithAlias> tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase());
@ -233,6 +96,7 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query,
visitor.visit(query);
}
/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases.
void normalizeTree(
ASTPtr & query,
SyntaxAnalyzerResult & result,
@ -297,11 +161,10 @@ bool hasArrayJoin(const ASTPtr & ast)
return false;
}
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns)
{
if (!select_query)
return;
if (required_result_columns.empty())
return;
@ -335,121 +198,12 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
elements = std::move(new_elements);
}
void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query,
const Context & context, size_t subquery_depth)
/// Replacing scalar subqueries with constant values.
void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t subquery_depth)
{
LogAST log;
if (!select_query)
{
ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth};
ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query);
}
else
{
for (auto & child : query->children)
{
/// Do not go to FROM, JOIN, UNION.
if (!typeid_cast<const ASTTableExpression *>(child.get())
&& !typeid_cast<const ASTSelectQuery *>(child.get()))
{
ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth};
ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(child);
}
}
}
}
bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value)
{
/// numeric constant in condition
if (const ASTLiteral * literal = typeid_cast<ASTLiteral *>(condition.get()))
{
if (literal->value.getType() == Field::Types::Int64 ||
literal->value.getType() == Field::Types::UInt64)
{
value = literal->value.get<Int64>();
return true;
}
}
/// cast of numeric constant in condition to UInt8
if (const ASTFunction * function = typeid_cast<ASTFunction * >(condition.get()))
{
if (function->name == "CAST")
{
if (ASTExpressionList * expr_list = typeid_cast<ASTExpressionList *>(function->arguments.get()))
{
const ASTPtr & type_ast = expr_list->children.at(1);
if (const ASTLiteral * type_literal = typeid_cast<ASTLiteral *>(type_ast.get()))
{
if (type_literal->value.getType() == Field::Types::String &&
type_literal->value.get<std::string>() == "UInt8")
return tryExtractConstValueFromCondition(expr_list->children.at(0), value);
}
}
}
}
return false;
}
void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases)
{
if (!current_ast)
return;
for (ASTPtr & child : current_ast->children)
{
auto * function_node = typeid_cast<ASTFunction *>(child.get());
if (!function_node || function_node->name != "if")
{
optimizeIfWithConstantCondition(child, aliases);
continue;
}
optimizeIfWithConstantCondition(function_node->arguments, aliases);
auto * args = typeid_cast<ASTExpressionList *>(function_node->arguments.get());
if (args->children.size() != 3)
throw Exception("Wrong number of arguments for function 'if' (" + toString(args->children.size()) + " instead of 3)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTPtr condition_expr = args->children[0];
ASTPtr then_expr = args->children[1];
ASTPtr else_expr = args->children[2];
bool condition;
if (tryExtractConstValueFromCondition(condition_expr, condition))
{
ASTPtr replace_ast = condition ? then_expr : else_expr;
ASTPtr child_copy = child;
String replace_alias = replace_ast->tryGetAlias();
String if_alias = child->tryGetAlias();
if (replace_alias.empty())
{
replace_ast->setAlias(if_alias);
child = replace_ast;
}
else
{
/// Only copy of one node is required here.
/// But IAST has only method for deep copy of subtree.
/// This can be a reason of performance degradation in case of deep queries.
ASTPtr replace_ast_deep_copy = replace_ast->clone();
replace_ast_deep_copy->setAlias(if_alias);
child = replace_ast_deep_copy;
}
if (!if_alias.empty())
{
auto alias_it = aliases.find(if_alias);
if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get())
alias_it->second = child;
}
}
}
ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth};
ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query);
}
/** Calls to these functions in the GROUP BY statement would be
@ -491,9 +245,10 @@ const std::unordered_set<String> possibly_injective_function_names
"dictGetDateTime"
};
/// Eliminates injective function calls and constant expressions from group by statement.
void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context)
{
if (!(select_query && select_query->group_expression_list))
if (!select_query->group_expression_list)
return;
const auto is_literal = [] (const ASTPtr & ast)
@ -594,9 +349,10 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
}
}
/// Remove duplicate items from ORDER BY.
void optimizeOrderBy(const ASTSelectQuery * select_query)
{
if (!(select_query && select_query->order_expression_list))
if (!select_query->order_expression_list)
return;
/// Make unique sorting conditions.
@ -620,9 +376,10 @@ void optimizeOrderBy(const ASTSelectQuery * select_query)
elems = unique_elems;
}
/// Remove duplicate items from LIMIT BY.
void optimizeLimitBy(const ASTSelectQuery * select_query)
{
if (!(select_query && select_query->limit_by_expression_list))
if (!select_query->limit_by_expression_list)
return;
std::set<String> elems_set;
@ -641,11 +398,9 @@ void optimizeLimitBy(const ASTSelectQuery * select_query)
elems = unique_elems;
}
/// Remove duplicated columns from USING(...).
void optimizeUsing(const ASTSelectQuery * select_query)
{
if (!select_query)
return;
auto node = const_cast<ASTTablesInSelectQueryElement *>(select_query->join());
if (!node)
return;
@ -676,9 +431,6 @@ void optimizeUsing(const ASTSelectQuery * select_query)
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const Names & source_columns, const NameSet & source_columns_set)
{
if (!select_query)
return;
ASTPtr array_join_expression_list = select_query->array_join_expression_list();
if (array_join_expression_list)
{
@ -740,6 +492,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const
}
}
/// Parse JOIN ON expression and collect ASTs for joined columns.
void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
{
@ -899,12 +652,10 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS
add_columns_from_equals_expr(table_join.on_expression);
}
/// Find the columns that are obtained by JOIN.
void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query,
const NameSet & source_columns, const Context & context)
{
if (!select_query)
return;
const ASTTablesInSelectQueryElement * node = select_query->join();
if (!node)
@ -969,4 +720,94 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s
}
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
ASTPtr & query,
const NamesAndTypesList & source_columns_,
const Names & required_result_columns,
StoragePtr storage) const
{
auto * select_query = typeid_cast<ASTSelectQuery *>(query.get());
if (!storage && select_query)
{
if (auto db_and_table = getDatabaseAndTable(*select_query, 0))
storage = context.tryGetTable(db_and_table->database, db_and_table->table);
}
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns_;
collectSourceColumns(select_query, result.storage, result.source_columns);
const auto & settings = context.getSettingsRef();
Names source_columns_list;
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
source_columns_list.emplace_back(type_name.name);
NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end());
if (select_query)
{
translateQualifiedNames(query, select_query, source_columns_set, context);
/// Depending on the user's profile, check for the execution rights
/// distributed subqueries inside the IN or JOIN sections and process these subqueries.
InJoinSubqueriesPreprocessor(context).process(select_query);
/// Optimizes logical expressions.
LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
}
/// Creates a dictionary `aliases`: alias -> ASTPtr
{
LogAST log;
QueryAliasesVisitor::Data query_aliases_data{result.aliases};
QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query);
}
/// Common subexpression elimination. Rewrite rules.
normalizeTree(query, result, source_columns_list, source_columns_set, result.storage,
context, select_query, settings.asterisk_left_columns_only != 0);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
if (select_query)
removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
/// Executing scalar subqueries - replacing them with constant values.
executeScalarSubqueries(query, context, subquery_depth);
/// Optimize if with constant condition after constants was substituted instead of sclalar subqueries.
OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query);
if (select_query)
{
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, source_columns_set, context);
/// Remove duplicate items from ORDER BY.
optimizeOrderBy(select_query);
/// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);
/// Remove duplicated columns from USING(...).
optimizeUsing(select_query);
/// array_join_alias_to_name, array_join_result_to_source.
getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set);
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context);
}
return std::make_shared<const SyntaxAnalyzerResult>(result);
}
}

View File

@ -54,16 +54,20 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
class SyntaxAnalyzer
{
public:
SyntaxAnalyzer(const Context & context, StoragePtr storage) : context(context), storage(std::move(storage)) {}
SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0)
: context(context_)
, subquery_depth(subquery_depth_)
{}
SyntaxAnalyzerResultPtr analyze(
ASTPtr & query,
const NamesAndTypesList & source_columns_,
const Names & required_result_columns = {},
size_t subquery_depth = 0) const;
StoragePtr storage = {}) const;
private:
const Context & context;
StoragePtr storage;
size_t subquery_depth;
};
}

View File

@ -31,7 +31,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
{
NamesAndTypesList source_columns = {{ "_dummy", std::make_shared<DataTypeUInt8>() }};
auto ast = node->clone();
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, source_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns);
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
/// There must be at least one column in the block so that it knows the number of rows.

View File

@ -48,7 +48,7 @@ void evaluateMissingDefaults(Block & block,
if (!save_unneeded_columns)
{
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList());
auto syntax_result = SyntaxAnalyzer(context).analyze(default_expr_list, block.getNamesAndTypesList());
ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block);
return;
}
@ -57,7 +57,7 @@ void evaluateMissingDefaults(Block & block,
* we are going to operate on a copy instead of the original block */
Block copy_block{block};
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList());
auto syntax_result = SyntaxAnalyzer(context).analyze(default_expr_list, block.getNamesAndTypesList());
ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block);
/// move evaluated columns to the original block, materializing them at the same time

View File

@ -107,13 +107,13 @@ int main(int argc, char ** argv)
AggregateFunctionPtr func_avg = factory.get("avg", data_types_uint64);
AggregateFunctionPtr func_uniq = factory.get("uniq", data_types_uint64);
#define INIT \
{ \
value.resize(3); \
\
value[0] = func_count.get();\
value[1] = func_avg.get(); \
value[2] = func_uniq.get(); \
#define INIT \
{ \
value.resize(3); \
\
value[0] = func_count.get(); \
value[1] = func_avg.get(); \
value[2] = func_uniq.get(); \
}
INIT
@ -162,7 +162,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(&it->second) Value(std::move(value));
new(&it->second) Value;
std::swap(it->second, value);
INIT
}
}
@ -192,7 +193,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(&it->second) Value(std::move(value));
new(&it->second) Value;
std::swap(it->second, value);
INIT
}
}
@ -223,7 +225,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(&it->second) Value(std::move(value));
new(&it->second) Value;
std::swap(it->second, value);
INIT
}
}
@ -248,7 +251,7 @@ int main(int argc, char ** argv)
std::unordered_map<Key, Value, DefaultHash<Key>>::iterator it;
for (size_t i = 0; i < n; ++i)
{
it = map.insert(std::make_pair(data[i], std::move(value))).first;
it = map.insert(std::make_pair(data[i], value)).first;
INIT
}
@ -269,7 +272,7 @@ int main(int argc, char ** argv)
map.set_empty_key(-1ULL);
for (size_t i = 0; i < n; ++i)
{
it = map.insert(std::make_pair(data[i], std::move(value))).first;
it = map.insert(std::make_pair(data[i], value)).first;
INIT
}
@ -289,7 +292,7 @@ int main(int argc, char ** argv)
GOOGLE_NAMESPACE::sparse_hash_map<Key, Value, DefaultHash<Key>>::iterator it;
for (size_t i = 0; i < n; ++i)
{
map.insert(std::make_pair(data[i], std::move(value)));
map.insert(std::make_pair(data[i], value));
INIT
}

View File

@ -398,7 +398,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
{
const auto & default_expression = default_column.second.expression;
ASTPtr query = default_expression;
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns);
const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true);
const auto required_columns = actions->getRequiredColumns();
@ -473,7 +473,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
}
ASTPtr query = default_expr_list;
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns);
const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true);
const auto block = actions->getSampleBlock();

View File

@ -304,7 +304,7 @@ BlockInputStreams StorageKafka::read(
if (num_created_consumers == 0)
return BlockInputStreams();
const size_t stream_count = std::min(num_streams, num_created_consumers);
const size_t stream_count = std::min(size_t(num_streams), num_created_consumers);
BlockInputStreams streams;
streams.reserve(stream_count);

View File

@ -91,7 +91,7 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart()
UInt64 rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes);
if (!rows_to_read)
return rows_to_read;
rows_to_read = std::max(index_granularity, rows_to_read);
rows_to_read = std::max<UInt64>(index_granularity, rows_to_read);
if (current_preferred_max_column_in_block_size_bytes)
{

View File

@ -126,7 +126,7 @@ MergeTreeData::MergeTreeData(
&& !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility.
throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS);
auto syntax = SyntaxAnalyzer(global_context, {}).analyze(sample_by_ast, getColumns().getAllPhysical());
auto syntax = SyntaxAnalyzer(global_context).analyze(sample_by_ast, getColumns().getAllPhysical());
columns_required_for_sampling = ExpressionAnalyzer(sample_by_ast, syntax, global_context)
.getRequiredSourceColumns();
}
@ -282,7 +282,7 @@ void MergeTreeData::setPrimaryKeyAndColumns(
if (!added_key_column_expr_list->children.empty())
{
auto syntax = SyntaxAnalyzer(global_context, {}).analyze(added_key_column_expr_list, all_columns);
auto syntax = SyntaxAnalyzer(global_context).analyze(added_key_column_expr_list, all_columns);
Names used_columns = ExpressionAnalyzer(added_key_column_expr_list, syntax, global_context)
.getRequiredSourceColumns();
@ -305,7 +305,7 @@ void MergeTreeData::setPrimaryKeyAndColumns(
}
}
auto new_sorting_key_syntax = SyntaxAnalyzer(global_context, {}).analyze(new_sorting_key_expr_list, all_columns);
auto new_sorting_key_syntax = SyntaxAnalyzer(global_context).analyze(new_sorting_key_expr_list, all_columns);
auto new_sorting_key_expr = ExpressionAnalyzer(new_sorting_key_expr_list, new_sorting_key_syntax, global_context)
.getActions(false);
auto new_sorting_key_sample =
@ -314,7 +314,7 @@ void MergeTreeData::setPrimaryKeyAndColumns(
checkKeyExpression(*new_sorting_key_expr, new_sorting_key_sample, "Sorting");
auto new_primary_key_syntax = SyntaxAnalyzer(global_context, {}).analyze(new_primary_key_expr_list, all_columns);
auto new_primary_key_syntax = SyntaxAnalyzer(global_context).analyze(new_primary_key_expr_list, all_columns);
auto new_primary_key_expr = ExpressionAnalyzer(new_primary_key_expr_list, new_primary_key_syntax, global_context)
.getActions(false);
@ -376,7 +376,7 @@ void MergeTreeData::initPartitionKey()
return;
{
auto syntax_result = SyntaxAnalyzer(global_context, {}).analyze(partition_key_expr_list, getColumns().getAllPhysical());
auto syntax_result = SyntaxAnalyzer(global_context).analyze(partition_key_expr_list, getColumns().getAllPhysical());
partition_key_expr = ExpressionAnalyzer(partition_key_expr_list, syntax_result, global_context).getActions(false);
}
@ -2269,7 +2269,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector(const DataPartS
for (auto state : affordable_states)
{
buf = std::move(res);
std::swap(buf, res);
res.clear();
auto range = getDataPartsStateRange(state);

View File

@ -488,7 +488,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
}
ASTPtr query = filter_function;
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, available_real_columns);
filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false);
/// Add columns needed for `sample_by_ast` to `column_names_to_read`.
@ -848,7 +848,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition(
arguments->children.push_back(one);
ASTPtr query = function;
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical());
auto syntax_result = SyntaxAnalyzer(context).analyze(query, data.getColumns().getAllPhysical());
out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false);
out_column = function->getColumnName();
}

View File

@ -86,7 +86,6 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
/// We will keep logs after and including this threshold.
UInt64 min_saved_log_pointer = std::numeric_limits<UInt64>::max();
UInt64 min_log_pointer_lost_candidate = std::numeric_limits<UInt64>::max();
Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/log");
@ -118,7 +117,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/host", &host_stat);
String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer");
UInt32 log_pointer = 0;
UInt64 log_pointer = 0;
if (!pointer.empty())
log_pointer = parse<UInt64>(pointer);
@ -190,7 +189,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
for (const String & replica : recovering_replicas)
{
String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer");
UInt32 log_pointer = 0;
UInt64 log_pointer = 0;
if (!pointer.empty())
log_pointer = parse<UInt64>(pointer);
min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer);
@ -200,7 +199,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs()
min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate);
/// We will not touch the last `min_replicated_logs_to_keep` records.
entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end());
entries.erase(entries.end() - std::min<UInt64>(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end());
/// We will not touch records that are no less than `min_saved_log_pointer`.
entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end());

View File

@ -648,7 +648,7 @@ ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsFo
/// Let's find the action to merge this part with others. Let's remember others.
StringSet parts_for_merge;
Queue::iterator merge_entry;
Queue::iterator merge_entry = queue.end();
for (Queue::iterator it = queue.begin(); it != queue.end(); ++it)
{
if ((*it)->type == LogEntry::MERGE_PARTS || (*it)->type == LogEntry::MUTATE_PART)

View File

@ -170,7 +170,7 @@ StorageDistributed::~StorageDistributed() = default;
static ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, const Context & context, NamesAndTypesList columns, bool project)
{
ASTPtr query = sharding_key;
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, columns);
return ExpressionAnalyzer(query, syntax_result, context).getActions(project);
}

View File

@ -15,7 +15,7 @@
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Poco/Path.h>
#include <TableFunctions/parseRemoteDescription.h>
#include <Common/parseRemoteDescription.h>
#include <Common/typeid_cast.h>

View File

@ -26,6 +26,7 @@
#include <DataStreams/MaterializingBlockInputStream.h>
#include <DataStreams/FilterBlockInputStream.h>
#include <ext/range.h>
#include <algorithm>
#include <Parsers/ASTFunction.h>
#include <Parsers/queryToString.h>
@ -219,7 +220,7 @@ BlockInputStreams StorageMerge::read(
size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count);
size_t current_streams = std::min(current_need_streams, remaining_streams);
remaining_streams -= current_streams;
current_streams = std::max(1, current_streams);
current_streams = std::max(size_t(1), current_streams);
StoragePtr storage = it->first;
TableStructureReadLockPtr struct_lock = it->second;
@ -452,7 +453,7 @@ void StorageMerge::convertingSourceStream(const Block & header, const Context &
NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList();
NameAndTypePair virtual_column = getColumn("_table");
source_columns.insert(source_columns.end(), virtual_column);
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(where_expression, source_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns);
ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false);
Names required_columns = actions->getRequiredColumns();

View File

@ -157,7 +157,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c
return;
/// Let's analyze and calculate the expression.
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(expression_ast, block.getNamesAndTypesList());
auto syntax_result = SyntaxAnalyzer(context).analyze(expression_ast, block.getNamesAndTypesList());
ExpressionAnalyzer analyzer(expression_ast, syntax_result, context);
ExpressionActionsPtr actions = analyzer.getActions(false);

View File

@ -28,7 +28,7 @@ static void replaceConstFunction(IAST & node, const Context & context, const Nam
{
NamesAndTypesList source_columns = all_columns;
ASTPtr query = function->ptr();
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, source_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_columns);
auto result_block = KeyCondition::getBlockWithConstants(query, syntax_result, context);
if (!result_block.has(child->getColumnName()))
return;
@ -92,7 +92,7 @@ String transformQueryForExternalDatabase(
const Context & context)
{
auto clone_query = query.clone();
auto syntax_result = SyntaxAnalyzer(context, {}).analyze(clone_query, available_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(clone_query, available_columns);
ExpressionAnalyzer analyzer(clone_query, syntax_result, context);
const Names & used_columns = analyzer.getRequiredSourceColumns();

View File

@ -34,7 +34,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, const
res->startup();
return res;
}
throw new Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
void registerTableFunctionNumbers(TableFunctionFactory & factory)

View File

@ -1,3 +1,5 @@
#include "TableFunctionRemote.h"
#include <Storages/getStructureOfRemoteTable.h>
#include <Storages/StorageDistributed.h>
#include <Parsers/ASTIdentifier.h>
@ -8,10 +10,8 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Common/typeid_cast.h>
#include <TableFunctions/TableFunctionRemote.h>
#include <Common/parseRemoteDescription.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/parseRemoteDescription.h>
namespace DB

View File

@ -1,4 +1,6 @@
FROM ubuntu:18.10
FROM ubuntu:18.04
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
@ -21,9 +23,6 @@ RUN apt-get update -y \
lld-7 \
libclang-7-dev \
liblld-7-dev \
llvm-7 \
libllvm7 \
llvm-7-dev \
libicu-dev \
libreadline-dev \
ninja-build \

View File

@ -1,8 +1,10 @@
FROM ubuntu:18.10
FROM ubuntu:18.04
RUN apt-get update -y \
RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
bash \
fakeroot \
cmake \
@ -22,9 +24,6 @@ RUN apt-get update -y \
lld-7 \
libclang-7-dev \
liblld-7-dev \
llvm-7 \
libllvm7 \
llvm-7-dev \
libicu-dev \
libreadline-dev \
ninja-build \
@ -33,8 +32,8 @@ RUN apt-get update -y \
devscripts \
debhelper \
git \
libc++abi-dev \
libc++-dev \
libc++abi-dev \
libboost-program-options-dev \
libboost-system-dev \
libboost-filesystem-dev \

View File

@ -2,7 +2,7 @@
Array of `T`-type items.
`T` can be anything, including an array. Use multi-dimensional arrays with caution. ClickHouse has limited support for multi-dimensional arrays. For example, they can't be stored in `MergeTree` tables.
`T` can be anything, including an array.
## Creating an array

View File

@ -625,11 +625,11 @@ Path to temporary data for processing large queries.
```
## uncompressed_cache_size
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../operations/table_engines/mergetree.md).
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../settings/settings.md) is enabled.
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../settings/settings.md#setting-use_uncompressed_cache) is enabled.
The uncompressed cache is advantageous for very short queries in individual cases.

View File

@ -110,6 +110,56 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
However, the block size cannot be more than `max_block_size` rows.
Disabled by default (set to 0). It only works when reading from MergeTree engines.
## merge_tree_uniform_read_distribution {#setting-merge_tree_uniform_read_distribution}
When reading from [MergeTree*](../table_engines/mergetree.md) tables, ClickHouse uses several threads. This setting turns on/off the uniform distribution of reading tasks over the working threads. The algorithm of the uniform distribution aims to make execution time for all the threads approximately equal in a `SELECT` query.
**Possible values**
- 0 — Uniform read distribution turned off.
- 1 — Uniform read distribution turned on.
**Default value** — 1.
## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read}
If a number of rows to be read from a file of [MergeTree*](../table_engines/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file by several threads.
**Possible values**
Any positive integer.
**Default value** — 163840.
## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek}
If the distance between two data blocks to be read in one file less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file, it reads the data sequentially.
**Possible values**
Any positive integer.
**Default value** — 0.
## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity}
When searching data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range for `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively.
**Possible values**
Any positive even integer.
**Default value** — 8.
## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache}
If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it does not use the cash of uncompressed blocks. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
**Possible values**
Any positive integer.
**Default value** — 1048576.
## log_queries
@ -242,10 +292,10 @@ Whether to count extreme values (the minimums and maximums in columns of a query
For more information, see the section "Extreme values".
## use_uncompressed_cache
## use_uncompressed_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the 'uncompressed_cache_size' configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted.
The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted.
For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1.

View File

@ -1,7 +1,7 @@
# clickhouse-local
Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../query_language/index.md#queries) ClickHouse.
Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../query_language/index.md) ClickHouse.
`clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер.

View File

@ -1,40 +1,69 @@
## ATTACH {#queries}
# Прочие виды запросов
## ATTACH
Запрос полностью аналогичен запросу `CREATE`, но:
- вместо слова `CREATE` используется слово `ATTACH`;
- запрос не создаёт данные на диске, а предполагает, что данные уже лежат в соответствующих местах, и всего лишь добавляет информацию о таблице в сервер.
- запрос не создаёт данные на диске, а предполагает, что данные уже лежат в соответствующих местах, и всего лишь добавляет информацию о таблице на сервер. После выполнения запроса `ATTACH` сервер будет знать о существовании таблицы.
После выполнения `ATTACH`, сервер будет знать о существовании таблицы.
Если таблица перед этим была отсоединена (`DETACH`), т.е. её структура известна, можно использовать сокращенную форму записи без определения структуры.
Если таблица перед этим была отсоединена (`DETACH`), т.е. её структура известна, то можно использовать сокращенную форму записи без определения структуры.
``` sql
```sql
ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster]
```
Этот запрос используется при старте сервера. Сервер хранит метаданные таблиц в виде файлов с запросами `ATTACH`, которые он просто исполняет при запуске (за исключением системных таблиц, создание которых явно вписано в сервер).
Этот запрос используется при старте сервера. Сервер хранит метаданные таблиц в виде файлов с запросами `ATTACH`, которые он просто исполняет при запуске (за исключением системных таблиц, которые явно создаются на сервере).
## DROP
Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`.
## CHECK TABLE
``` sql
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
Проверяет таблицу на повреждение данных.
```sql
CHECK TABLE [db.]name
```
Удаляет все таблицы внутри базы данных db, а затем саму базу данных db.
Если указано `IF EXISTS` - не выдавать ошибку, если база данных не существует.
Запрос `CHECK TABLE` сравнивает текущие размеры файлов (в которых хранятся данные из колонок) с ожидаемыми значениями. Если значения не совпадают, данные в таблице считаются поврежденными. Искажение возможно, например, из-за сбоя при записи данных.
``` sql
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
Ответ содержит колонку `result`, содержащую одну строку с типом [Boolean](../data_types/boolean.md). Допустимые значения:
- 0 - данные в таблице повреждены;
- 1 - данные не повреждены.
Запрос `CHECK TABLE` поддерживается только для следующих движков:
- [Log](../operations/table_engines/log.md)
- [TinyLog](../operations/table_engines/tinylog.md)
- StripeLog
В этих движках не предусмотрено автоматическое восстановление данных после сбоя. Поэтому используйте запрос `CHECK TABLE`, чтобы своевременно выявить повреждение данных.
Обратите внимание, высокая защита целостности данных обеспечивается в таблицах семейства [MergeTree](../operations/table_engines/mergetree.md). Для избежания потери данных рекомендуется использовать именно эти таблицы.
**Что делать, если данные повреждены**
В этом случае можно скопировать оставшиеся неповрежденные данные в другую таблицу. Для этого:
1. Создайте новую таблицу с такой же структурой, как у поврежденной таблицы. Для этого выполните запрос `CREATE TABLE <new_table_name> AS <damaged_table_name>`.
2. Установите значение параметра [max_threads](../operations/settings/settings.md#settings-max_threads) в 1. Это нужно для того, чтобы выполнить следующий запрос в одном потоке. Установить значение параметра можно через запрос: `SET max_threads = 1`.
3. Выполните запрос `INSERT INTO <new_table_name> SELECT * FROM <damaged_table_name>`. В результате неповрежденные данные будут скопированы в другую таблицу. Обратите внимание, будут скопированы только те данные, которые следуют до поврежденного участка.
4. Перезапустите `clickhouse-client`, чтобы вернуть предыдущее значение параметра `max_threads`.
## DESCRIBE TABLE
```sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Удаляет таблицу.
Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует.
Возвращает два столбца: `name`, `type` типа `String`, в которых описаны имена и типы столбцов указанной таблицы.
Вложенные структуры данных выводятся в "развёрнутом" виде. То есть, каждый столбец - по отдельности, с именем через точку.
## DETACH
Удаляет из сервера информацию о таблице name. Сервер перестаёт знать о существовании таблицы.
``` sql
```sql
DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
@ -43,139 +72,35 @@ DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
Запроса `DETACH DATABASE` нет.
## RENAME
Переименовывает одну или несколько таблиц.
## DROP
``` sql
RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster]
Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`.
```sql
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
```
Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка).
Удаляет все таблицы внутри базы данных db, а затем саму базу данных db.
Если указано `IF EXISTS` - не выдавать ошибку, если база данных не существует.
## SHOW DATABASES
``` sql
SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
```sql
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
Выводит список всех баз данных.
Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
Смотрите также раздел "Форматы".
## SHOW TABLES
``` sql
SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]
```
Выводит список таблиц
- из текущей БД или из БД db, если указано FROM db;
- всех, или имя которых соответствует шаблону pattern, если указано LIKE 'pattern';
Запрос полностью аналогичен запросу: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`.
Смотрите также раздел "Оператор LIKE".
## SHOW PROCESSLIST
``` sql
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
```
Выводит список запросов, выполняющихся в данный момент времени, кроме запросов `SHOW PROCESSLIST`.
Выдаёт таблицу, содержащую столбцы:
**user** - пользователь, под которым был задан запрос. Следует иметь ввиду, что при распределённой обработке запроса на удалённые серверы запросы отправляются под пользователем default. И SHOW PROCESSLIST показывает имя пользователя для конкретного запроса, а не для запроса, который данный запрос инициировал.
**address** - имя хоста, с которого был отправлен запрос. При распределённой обработке запроса на удалённых серверах — это имя хоста-инициатора запроса. Чтобы проследить, откуда был задан распределённый запрос изначально, следует смотреть SHOW PROCESSLIST на сервере-инициаторе запроса.
**elapsed** - время выполнения запроса, в секундах. Запросы выводятся упорядоченными по убыванию времени выполнения.
**rows_read**, **bytes_read** - сколько было прочитано строк, байт несжатых данных при обработке запроса. При распределённой обработке запроса суммируются данные со всех удалённых серверов. Именно эти данные используются для ограничений и квот.
**memory_usage** - текущее потребление оперативки в байтах. Смотрите настройку max_memory_usage.
**query** - сам запрос. В запросах INSERT данные для вставки не выводятся.
**query_id** - идентификатор запроса. Непустой, только если был явно задан пользователем. При распределённой обработке запроса идентификатор запроса не передаётся на удалённые серверы.
Запрос полностью аналогичен запросу: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`.
Полезный совет (выполните в консоли):
```bash
watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
```
## SHOW CREATE TABLE
``` sql
SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица.
## DESCRIBE TABLE
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Возвращает два столбца: `name`, `type` типа `String`, в которых описаны имена и типы столбцов указанной таблицы.
Вложенные структуры данных выводятся в "развёрнутом" виде. То есть, каждый столбец - по отдельности, с именем через точку.
Удаляет таблицу.
Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует.
## EXISTS
``` sql
```sql
EXISTS [TEMPORARY] TABLE [db.]name [INTO OUTFILE filename] [FORMAT format]
```
Возвращает один столбец типа `UInt8`, содержащий одно значение - `0`, если таблицы или БД не существует и `1`, если таблица в указанной БД существует.
## USE
``` sql
USE db
```
Позволяет установить текущую базу данных для сессии.
Текущая база данных используется для поиска таблиц, если база данных не указана в запросе явно через точку перед именем таблицы.
При использовании HTTP протокола, запрос не может быть выполнен, так как понятия сессии не существует.
## SET
``` sql
SET param = value
```
Позволяет установить настройку `param` в значение `value`. Также можно одним запросом установить все настройки из заданного профиля настроек - для этого, укажите в качестве имени настройки profile. Подробнее смотри раздел "Настройки".
Настройка устанавливается на сессию, или на сервер (глобально), если указано `GLOBAL`.
При установке глобальной настройки, настройка на все уже запущенные сессии, включая текущую сессию, не устанавливается, а будет использована только для новых сессий.
При перезапуске сервера, теряются настройки, установленные с помощью `SET`.
Установить настройки, которые переживут перезапуск сервера, можно только с помощью конфигурационного файла сервера.
## OPTIMIZE
``` sql
OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL]
```
Просит движок таблицы сделать что-нибудь, что может привести к более оптимальной работе.
Поддерживается только движками `*MergeTree`, в котором выполнение этого запроса инициирует внеочередное слияние кусков данных.
Если указан `PARTITION`, то оптимизация будет производиться только для указаной партиции.
Если указан `FINAL`, то оптимизация будет производиться даже когда все данные уже лежат в одном куске.
!!! warning "Внимание"
Запрос OPTIMIZE не может устранить причину появления ошибки "Too many parts".
## KILL QUERY
``` sql
```sql
KILL QUERY [ON CLUSTER cluster]
WHERE <where expression to SELECT FROM system.processes query>
[SYNC|ASYNC|TEST]
@ -185,8 +110,9 @@ KILL QUERY [ON CLUSTER cluster]
Пытается принудительно остановить исполняющиеся в данный момент запросы.
Запросы для принудительной остановки выбираются из таблицы system.processes с помощью условия, указанного в секции `WHERE` запроса `KILL`.
Примеры:
``` sql
Примеры
```sql
-- Принудительно останавливает все запросы с указанным query_id:
KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90'
@ -208,3 +134,126 @@ Readonly-пользователи могут останавливать толь
Тестовый вариант запроса (`TEST`) только проверяет права пользователя и выводит список запросов для остановки.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/misc/) <!--hide-->
## OPTIMIZE
```sql
OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL]
```
Просит движок таблицы сделать что-нибудь, что может привести к более оптимальной работе.
Поддерживается только движками `*MergeTree`, в котором выполнение этого запроса инициирует внеочередное слияние кусков данных.
Если указан `PARTITION`, то оптимизация будет производиться только для указаной партиции.
Если указан `FINAL`, то оптимизация будет производиться даже когда все данные уже лежат в одном куске.
!!! warning "Внимание"Запрос OPTIMIZE не может устранить причину появления ошибки "Too many parts".
## RENAME
Переименовывает одну или несколько таблиц.
```sql
RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster]
```
Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка).
## SET
```sql
SET param = value
```
Позволяет установить настройку `param` в значение `value`. Также можно одним запросом установить все настройки из заданного профиля настроек. Для этого укажите 'profile' в качестве имени настройки. Подробнее смотрите в разделе "Настройки".
Настройка устанавливается на сессию, или на сервер (глобально), если указано `GLOBAL`.
При установке глобальных настроек, эти настройки не применяются к уже запущенной сессии, включая текущую сессию. Она будет использована только для новых сессий.
При перезапуске сервера теряются настройки, установленные с помощью `SET`.
Установить настройки, которые переживут перезапуск сервера, можно только с помощью конфигурационного файла сервера.
## SHOW CREATE TABLE
```sql
SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица.
## SHOW DATABASES
```sql
SHOW DATABASES [INTO OUTFILE filename] [FORMAT format]
```
Выводит список всех баз данных.
Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`.
Смотрите также раздел "Форматы".
## SHOW PROCESSLIST
```sql
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
```
Выводит список запросов, выполняющихся в данный момент времени, кроме запросов `SHOW PROCESSLIST`.
Выдаёт таблицу, содержащую столбцы:
**user** - пользователь, под которым был задан запрос. Следует иметь ввиду, что при распределённой обработке запроса на удалённые серверы запросы отправляются под пользователем 'default'. И SHOW PROCESSLIST показывает имя пользователя для конкретного запроса, а не для запроса, который данный запрос инициировал.
**address** - имя хоста, с которого был отправлен запрос. При распределённой обработке запроса на удалённых серверах — это имя хоста-инициатора запроса. Чтобы проследить, откуда был задан распределённый запрос изначально, следует смотреть SHOW PROCESSLIST на сервере-инициаторе запроса.
**elapsed** - время выполнения запроса, в секундах. Запросы выводятся в порядке убывания времени выполнения.
**rows_read**, **bytes_read** - сколько было прочитано строк, байт несжатых данных при обработке запроса. При распределённой обработке запроса суммируются данные со всех удалённых серверов. Именно эти данные используются для ограничений и квот.
**memory_usage** - текущее потребление оперативки в байтах. Смотрите настройку 'max_memory_usage'.
**query** - сам запрос. В запросах INSERT данные для вставки не выводятся.
**query_id** - идентификатор запроса. Непустой, только если был явно задан пользователем. При распределённой обработке запроса идентификатор запроса не передаётся на удалённые серверы.
Запрос полностью аналогичен запросу: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`.
Полезный совет (выполните в консоли):
```bash
watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
```
## SHOW TABLES
```sql
SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]
```
Выводит список таблиц:
- из текущей базы данных или из базы db, если указано `FROM db`;
- всех, или имя которых соответствует шаблону pattern, если указано `LIKE 'pattern'`;
Запрос полностью аналогичен запросу: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`.
Смотрите также раздел "Оператор LIKE".
## TRUNCATE
```sql
TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
Удаляет все данные из таблицы. Если условие `IF EXISTS` не указано, запрос вернет ошибку, если таблицы не существует.
Запрос `TRUNCATE` не поддерживается для следующих движков: [View](../operations/table_engines/view.md), [File](../operations/table_engines/file.md), [URL](../operations/table_engines/url.md) and [Null](../operations/table_engines/null.md).
## USE
```sql
USE db
```
Позволяет установить текущую базу данных для сессии.
Текущая база данных используется для поиска таблиц, если база данных не указана в запросе явно через точку перед именем таблицы.
При использовании HTTP протокола запрос не может быть выполнен, так как понятия сессии не существует.

View File

@ -178,11 +178,11 @@ nav:
- '开发者指南':
- 'hidden': 'development/index.md'
- 'Overview of ClickHouse architecture': 'development/architecture.md'
- 'How to build ClickHouse on Linux': 'development/build.md'
- 'How to build ClickHouse on Mac OS X': 'development/build_osx.md'
- 'How to write C++ code': 'development/style.md'
- 'How to run ClickHouse tests': 'development/tests.md'
- 'ClickHouse架构概述': 'development/architecture.md'
- '如何在Linux中编译ClickHouse': 'development/build.md'
- '如何在Mac OS X中编译ClickHouse': 'development/build_osx.md'
- '如何编写C++代码': 'development/style.md'
- '如何运行ClickHouse测试': 'development/tests.md'
- '新功能特性':
- '路线图': 'roadmap.md'

View File

@ -99,7 +99,7 @@ def build_for_lang(lang, args):
site_dir=os.path.join(args.output_dir, lang),
strict=True,
theme=theme_cfg,
copyright='©20162018 Yandex LLC',
copyright='©20162019 Yandex LLC',
use_directory_urls=True,
repo_name='yandex/ClickHouse',
repo_url='https://github.com/yandex/ClickHouse/',

View File

@ -40,8 +40,10 @@
{% block htmltitle %}
{% if page and page.meta and page.meta.title %}
<title>{{ page.meta.title }}</title>
{% elif page and page.title and not page.is_homepage %}
{% elif page and page.title and not page.is_homepage and page.title != 'hidden' %}
<title>{{ page.title }} - {{ config.site_name }}</title>
{% elif page and page.title and not page.is_homepage and page.title == 'hidden' and page.ancestors %}
<title>{{ (page.ancestors | first).title }} - {{ config.site_name }}</title>
{% else %}
<title>{{ config.site_name }}</title>
{% endif %}

View File

@ -1 +0,0 @@
../../en/development/tests.md

View File

@ -0,0 +1,257 @@
# ClickHouse 测试
## 功能性测试
功能性测试是最简便使用的。绝大部分 ClickHouse 的功能可以通过功能性测试来测试,任何代码的更改都必须通过该测试。
每个功能测试会向正在运行的 ClickHouse服 务器发送一个或多个查询,并将结果与预期结果进行比较。
测试用例在 `dbms/src/tests/queries` 目录中。这里有两个子目录:`stateless` 和 `stateful`目录。 无状态的测试无需预加载测试数据集 - 通常是在测试运行期间动态创建小量的数据集。有状态测试需要来自 Yandex.Metrica 的预加载测试数据,而不向一般公众提供。 我们倾向于仅使用“无状态”测试并避免添加新的“有状态”测试。
每个测试用例可以是两种类型之一:`.sql` 和 `.sh`。`.sql` 测试文件是用于管理`clickhouse-client --multiquery --testmode`的简单SQL脚本。`.sh` 测试文件是一个可以自己运行的脚本。
要运行所有测试,请使用 `dbms/tests/clickhouse-test` 工具,用 `--help` 可以获取所有的选项列表。您可以简单地运行所有测试或运行测试名称中的子字符串过滤的测试子集:`./clickhouse-test substring`。
调用功能测试最简单的方法是将 `clickhouse-client` 复制到`/usr/bin/`,运行`clickhouse-server`,然后从自己的目录运行`./ clickhouse-test`。
要添加新测试,请在 `dbms/src/tests/queries/0_stateless` 目录内添加新的 `.sql``.sh` 文件,手动检查,然后按以下方式生成 `.reference` 文件: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`
测试应该只使用(创建,删除等)`test` 数据库中的表,这些表假定是事先创建的; 测试也可以使用临时表。
如果要在功能测试中使用分布式查询,可以利用 `remote` 表函数和 `127.0.0.{1..2}` 地址为服务器查询自身; 或者您可以在服务器配置文件中使用预定义的测试集群,例如`test_shard_localhost`。
有些测试在名称中标有 `zookeeper``shard` 或 `long`。`zookeeper` 用于使用ZooKeeper的测试; `shard` 用于需要服务器监听`127.0.0.*`的测试。`long` 适用于运行时间稍长一秒的测试。
## 已知的bug
如果我们知道一些可以通过功能测试轻松复制的错误,我们将准备好的功能测试放在 `dbms/src/tests/queries/bugs` 目录中。当修复错误时,这些测试将被移动到 `dbms/src/tests/queries/0_stateless` 目录中。
## 集成测试
集成测试允许在集群配置中测试 ClickHouse并与其他服务器如MySQLPostgresMongoDB进行 ClickHouse 交互。它们可用于模拟网络拆分数据包丢弃等。这些测试在Docker 下运行,并使用各种软件创建多个容器。
参考 `dbms/tests/integration/README.md` 文档关于如何使用集成测试。
请注意ClickHouse 与第三方驱动程序的集成未经过测试。此外,我们目前还没有与 JDBC 和ODBC 驱动程序进行集成测试。
## 单元测试
当您想要测试整个 ClickHouse而不是单个独立的库或类时单元测试非常有用。您可以使用`ENABLE_TESTS` CMake 选项启用或禁用测试构建。单元测试(和其他测试程序)位于代码中的`tests` 子目录中。要运行单元测试,请键入 `ninja test`。有些测试使用 `gtest`,但有些只是在测试失败时返回非零状态码。
如果代码已经被功能测试覆盖(并且功能测试通常使用起来要简单得多),则不一定要进行单元测试。
## 性能测试
性能测试允许测量和比较综合查询中 ClickHouse 的某些独立部分的性能。测试位于`dbms/tests/performance` 目录中。每个测试都由 `.xml` 文件表示,并附有测试用例的描述。使用 `clickhouse performance-test` 工具(嵌入在 `clickhouse` 二进制文件中)运行测试。请参阅 `--help` 以进行调用。
每个测试在循环中运行一个或多个查询(可能带有参数组合),并具有一些停止条件(如“最大执行速度不会在三秒内更改”)并测量一些有关查询性能的指标(如“最大执行速度”))。某些测试可以包含预加载的测试数据集的前提条件。
如果要在某些情况下提高 ClickHouse 的性能,并且如果可以在简单查询上观察到改进,则强烈建议编写性能测试。在测试过程中使用 `perf top` 或其他 perf 工具总是有意义的。
性能测试不是基于每个提交运行的。不收集性能测试结果,我们手动比较它们。
## 测试工具和脚本
`tests`目录中的一些程序不是准备测试,而是测试工具。例如,对于`Lexer`,有一个工具`dbms/src/Parsers/tests/lexer` 标准输出。您可以使用这些工具作为代码示例以及探索和手动测试。
您还可以将一对文件 `.sh``.reference` 与工具放在一些预定义的输入上运行它 - 然后可以将脚本结果与 `.reference` 文件进行比较。这些测试不是自动化的。
## 杂项测试
有一些外部字典的测试位于 `dbms/tests/external_dictionaries`,机器学习模型在`dbms/tests/external_models`目录。这些测试未更新,必须转移到集成测试。
对于分布式数据的插入,有单独的测试。此测试在单独的服务器上运行 ClickHouse 集群并模拟各种故障情况网络拆分数据包丢弃ClickHouse 节点之间ClickHouse 和 ZooKeeper之间ClickHouse 服务器和客户端之间等),进行 `kill -9``kill -STOP` 和`kill -CONT` 等操作,类似[Jepsen](https://aphyr.com/tags/Jepsen)。然后,测试检查是否已写入所有已确认的插入,并且所有已拒绝的插入都未写入。
在 ClickHouse 开源之前,分布式测试是由单独的团队编写的,但该团队不再使用 ClickHouse测试是在 Java 中意外编写的。由于这些原因,必须重写分布式测试并将其移至集成测试。
## 手动测试
当您开发了新的功能,做手动测试也是合理的。可以按照以下步骤来进行:
编译 ClickHouse。在命令行中运行 ClickHouse进入 `dbms/src/programs/clickhouse-server` 目录并运行 `./clickhouse-server`。它会默认使用当前目录的配置文件 (`config.xml` `users.xml` 以及在 `config.d``users.d` 目录的文件)。可以使用 `dbms/src/programs/clickhouse-client/clickhouse-client` 来连接数据库。
或者,您可以安装 ClickHouse 软件包:从 Yandex 存储库中获得稳定版本或者您可以在ClickHouse源根目录中使用 `./release` 构建自己的软件包。然后使用 `sudo service clickhouse-server start` 启动服务器(或停止服务器)。在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志。
当您的系统上已经安装了 ClickHouse 时,您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件:
```
sudo service clickhouse-server stop
sudo cp ./clickhouse /usr/bin/
sudo service clickhouse-server start
```
您也可以停止 clickhouse-server 并使用相同的配置运行您自己的服务器,日志打印到终端:
```
sudo service clickhouse-server stop
sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
使用 gdb 的一个示例:
```
sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml
```
如果 clickhouse-server 已经运行并且您不想停止它,您可以更改 `config.xml` 中的端口号(或在 `config.d` 目录中的文件中覆盖它们),配置适当的数据路径,然后运行它。
`clickhouse` 二进制文件几乎没有依赖关系,适用于各种 Linux 发行版。要快速地测试服务器上的更改,您可以简单地将新建的 `clickhouse` 二进制文件 `scp` 到其他服务器,然后按照上面的示例运行它。
## 测试环境
在将版本发布为稳定之前,我们将其部署在测试环境中 测试环境是一个处理[Yandex.Metrica]https://metrica.yandex.com/总数据的1/39部分大小的集群。 我们与 Yandex.Metrica 团队公用我们的测试环境。ClickHouse 在现有数据的基础上无需停机即可升级。 我们首先看到数据处理成功而不会实时滞后,复制继续工作,并且 Yandex.Metrica 团队无法看到问题。 首先的检查可以通过以下方式完成:
```
SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
```
在某些情况下,我们还部署到 Yandex 的合作团队的测试环境:市场,云等。此外,我们还有一些用于开发目的的硬件服务器。
## 负载测试
部署到测试环境后,我们使用生产群集中的查询运行负载测试。 这是手动完成的。
确保在生产集群中开启了 `query_log` 选项。
收集一天或更多的查询日志:
```
clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv
```
这是一个复杂的例子。`type = 2` 将过滤成功执行的查询。`query LIKE'ym'` 用于从 Yandex.Metrica 中选择相关查询。`is_initial_query` 是仅选择由客户端发起的查询,而不是由 ClickHouse 本身(作为分布式查询处理的一部分)。
`scp` 这份日志到测试机器,并运行以下操作:
```
clickhouse benchmark --concurrency 16 < queries.tsv
```
(可能你需要指定运行的用户 `--user`)
然后离开它一晚或周末休息一下。
你要检查下 `clickhouse-server` 是否崩溃,内存占用是否合理,性能也不会随着时间的推移而降低。
由于查询和环境的高度可变性,不会记录精确的查询执行时序并且不进行比较。
## 编译测试
构建测试允许检查构建在各种替代配置和某些外部系统上是否被破坏。测试位于`ci`目录。 它们从 DockerVagrant 中的源代码运行构建,有时在 Docker 中运行 `qemu-user-static`。这些测试正在开发中,测试运行不是自动化的。
动机:
通常我们会在 ClickHouse 构建的单个版本上发布并运行所有测试。 但是有一些未经过彻底测试的替代构建版本。 例子:
- 在 FreeBSD 中的构建;
- 在 Debian 中使用系统包中的库进行构建;
- 使用库的共享链接构建;
- 在 AArch64 平台进行构建。
例如,使用系统包构建是不好的做法,因为我们无法保证系统具有的确切版本的软件包。但 Debian 维护者确实需要这样做。出于这个原因,我们至少必须支持这种构建。另一个例子:共享链接是一个常见的麻烦来源,但是对于一些爱好者来说需要它。
虽然我们无法对所有构建版本运行所有测试,但我们想要检查至少不会破坏各种构建变体。为此,我们使用构建测试。
## 测试协议兼容性
当我们扩展 ClickHouse 网络协议时,我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 和新的clickhouse-client 一起使用旧的 clickhouse-server (只需从相应的包中运行二进制文件)
## 来自编译器的帮助
ClickHouse 主要的代码 (位于`dbms`目录中) 使用 `-Wall -Wextra -Werror` 构建,并带有一些其他已启用的警告。 虽然没有为第三方库启用这些选项。
Clang 有更多有用的警告 - 您可以使用 `-Weverything` 查找它们并选择默认构建的东西。
对于生产构建,使用 gcc它仍然生成比 clang 稍高效的代码。对于开发来说clang 通常更方便使用。您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电量),但请注意,由于更好的控制流程和过程分析,编译器使用 `-O3` 会生成更多警告。 当使用 clang 构建时,使用 `libc++` 而不是 `libstdc++`,并且在使用调试模式构建时,使用调试版本的 `libc++`,它允许在运行时捕获更多错误。
## Sanitizers
**Address sanitizer**.
我们在每个提交的基础上在 ASan 下运行功能和集成测试。
**Valgrind (Memcheck)**.
我们在 Valgrind 过夜进行功能测试。 这需要几个小时。 目前在 `re2` 库中有一个已知的误报,请参阅 [文章](https://research.swtch.com/sparse)。
**Thread sanitizer**.
我们在 TSan 下进行功能测试。ClickHouse 必须通过所有测试。在 TSan 下运行不是自动化的,只是偶尔执行。
**Memory sanitizer**.
目前我们不使用 MSan。
**Undefined behaviour sanitizer.**
我们仍然不会在每次提交的基础上使用 UBSan。 有一些地方需要解决。
**Debug allocator.**
您可以使用 `DEBUG_TCMALLOC` CMake 选项启用 `tcmalloc` 的调试版本。我们在每次提交的基础上使用调试分配器运行测试。
更多请参阅 `dbms/tests/instructions/sanitizers.txt`
## 模糊测试
我们使用简单的模糊测试来生成随机SQL查询并检查服务器是否正常使用 Address sanitizer 执行模糊测试。你可以在`00746_sql_fuzzy.pl` 找到它。 测试应连续进行(过夜和更长时间)。
截至2018年12月我们仍然不使用库代码的孤立模糊测试。
## 安全审计
Yandex Cloud 部门的人员从安全角度对 ClickHouse 功能进行了一些基本概述。
## 静态分析
我们偶尔使用静态分析。我们已经评估过 `clang-tidy` `Coverity` `cppcheck` `PVS-Studio` `tscancode`。您将在 `dbms/tests/instructions/` 目录中找到使用说明。你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/).
如果您使用 `CLion` 作为 IDE您可以开箱即用一些 `clang-tidy` 检查。
## 其他强化
默认情况下使用 `FORTIFY_SOURCE`。它几乎没用,但在极少数情况下仍然有意义,我们不会禁用它。
## 代码风格
代码风格在[这里](https://clickhouse.yandex/docs/en/development/style/) 有说明。
要检查一些常见的样式冲突,您可以使用 `utils/check-style` 脚本。
为了强制你的代码的正确风格,你可以使用 `clang-format` 文件。`.clang-format` 位于源代码根目录, 它主要与我们的实际代码风格对应。但不建议将 `clang-format` 应用于现有文件,因为它会使格式变得更糟。您可以使用 `clang-format-diff` 工具,您可以在 clang 源代码库中找到
或者,您可以尝试`uncrustify` 工具来格式化您的代码。配置文件在源代码的根目录中的`uncrustify.cfg`。它比 `clang-format` 经过更少的测试。
`CLion` 有自己的代码格式化程序,必须调整为我们的代码风格。
## Metrica B2B 测试
每个 ClickHouse 版本都经过 Yandex Metrica 和 AppMetrica 引擎的测试。测试和稳定版本的 ClickHouse 部署在虚拟机上,并使用处理输入数据固定样本的度量引擎的小副本运行。 将度量引擎的两个实例的结果一起进行比较
这些测试是由单独的团队自动完成的。由于移动部件的数量很多,大部分时间的测试都是完全无关的,很难弄清楚。很可能这些测试对我们来说是负值。然而,这些测试被证明是有用的大约一个或两个倍的数百。
## 测试覆盖率
截至2018年7月我们不会跟踪测试复盖率。
## 自动化测试
我们使用 Yandex 内部 CI 和名为"沙箱"的作业自动化系统运行测试。 我们还继续使用 Jenkins可在Yandex内部使用
构建作业和测试在沙箱中按每次提交的基础上运行。结果包和测试结果发布在 GitHub 上,可以通过直接链接下载,结果会被永久存储。当您在 GitHub 上发送拉取请求时,我们将其标记为"可以测试",我们的 CI 系统将为您构建 ClickHouse 包(发布,调试,地址消除等)。
由于时间和计算能力的限制,我们不使用 Travis CI。
在 Jenkins我们运行字典测试指标B2B测试。 我们使用 Jenkins 来准备和发布版本。 Jenkins是一种传统的技术所有的工作将被转移到沙箱中。
[来源文章](https://clickhouse.yandex/docs/zh/development/tests/) <!--hide-->

View File

@ -1,6 +1,5 @@
#pragma once
#include <cstdint>
#include <cstddef>
using Int8 = int8_t;
using Int16 = int16_t;
@ -11,30 +10,3 @@ using UInt8 = uint8_t;
using UInt16 = uint16_t;
using UInt32 = uint32_t;
using UInt64 = uint64_t;
/** This is not the best way to overcome an issue of different definitions
* of uint64_t and size_t on Linux and Mac OS X (both 64 bit).
*
* Note that on both platforms, long and long long are 64 bit types.
* But they are always different types (with the same physical representation).
*/
namespace std
{
inline UInt64 max(unsigned long x, unsigned long long y) { return x > y ? x : y; }
inline UInt64 max(unsigned long long x, unsigned long y) { return x > y ? x : y; }
inline UInt64 min(unsigned long x, unsigned long long y) { return x < y ? x : y; }
inline UInt64 min(unsigned long long x, unsigned long y) { return x < y ? x : y; }
inline Int64 max(long x, long long y) { return x > y ? x : y; }
inline Int64 max(long long x, long y) { return x > y ? x : y; }
inline Int64 min(long x, long long y) { return x < y ? x : y; }
inline Int64 min(long long x, long y) { return x < y ? x : y; }
}
/// Workaround for the issue, that KDevelop doesn't see time_t and size_t types (for syntax highlight).
#ifdef IN_KDEVELOP_PARSER
using time_t = Int64;
using size_t = UInt64;
#endif

View File

@ -7,6 +7,7 @@ add_executable (date_lut4 date_lut4.cpp)
add_executable (date_lut_default_timezone date_lut_default_timezone.cpp)
add_executable (multi_version multi_version.cpp)
add_executable (local_date_time_comparison local_date_time_comparison.cpp)
add_executable (realloc-perf allocator.cpp)
set(PLATFORM_LIBS ${CMAKE_DL_LIBS})
@ -17,6 +18,7 @@ target_link_libraries (date_lut4 common ${PLATFORM_LIBS})
target_link_libraries (date_lut_default_timezone common ${PLATFORM_LIBS})
target_link_libraries (multi_version common)
target_link_libraries (local_date_time_comparison common)
target_link_libraries (realloc-perf common)
add_check(multi_version)
add_check(local_date_time_comparison)

View File

@ -0,0 +1,47 @@
#include <cstdlib>
#include <cstring>
#include <vector>
#include <thread>
void thread_func()
{
for (size_t i = 0; i < 100; ++i)
{
size_t size = 4096;
void * buf = malloc(size);
if (!buf)
abort();
memset(buf, 0, size);
while (size < 1048576)
{
size_t next_size = size * 4;
void * new_buf = realloc(buf, next_size);
if (!new_buf)
abort();
buf = new_buf;
memset(reinterpret_cast<char*>(buf) + size, 0, next_size - size);
size = next_size;
}
free(buf);
}
}
int main(int, char **)
{
std::vector<std::thread> threads(16);
for (size_t i = 0; i < 1000; ++i)
{
for (auto & thread : threads)
thread = std::thread(thread_func);
for (auto & thread : threads)
thread.join();
}
return 0;
}

View File

@ -32,7 +32,7 @@ set -e
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd $CURDIR
source "./release_lib.sh"
source "./utils/release/release_lib.sh"
PBUILDER_AUTOUPDATE=${PBUILDER_AUTOUPDATE=4320}

View File

@ -3,12 +3,12 @@ if (NOT NO_WERROR)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
endif ()
if (MAKE_STATIC_LIBRARIES)
set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
if(MAKE_STATIC_LIBRARIES)
set(MAX_LINKER_MEMORY 3500)
else()
set (MAX_LINKER_MEMORY 2500 CACHE INTERNAL "")
endif ()
include (../cmake/limit_jobs.cmake)
set(MAX_LINKER_MEMORY 2500)
endif()
include(../cmake/limit_jobs.cmake)
# Utils used in package
add_subdirectory (config-processor)

View File

@ -22,5 +22,5 @@ env TEST_RUN=1 \
`# Use all possible contrib libs from system` \
`# psmisc - killall` \
`# gdb - symbol test in pbuilder` \
EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev $EXTRAPACKAGES" \
EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libxml2-dev libgsasl7-dev $EXTRAPACKAGES" \
pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT

View File

@ -0,0 +1,6 @@
#!/bin/bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
CMAKE_FLAGS+=" -DCLICKHOUSE_SPLIT_BINARY=1 "
. $CUR_DIR/build_debian_unbundled.sh

View File

@ -106,7 +106,7 @@ static void mutate(pcg64 & generator, void * src, size_t length)
&& isAlphaASCII(pos[2]))
{
auto res = rand(generator, 0, 3);
if (res == 2)
if (res == 2)
{
std::swap(pos[0], pos[1]);
}
@ -118,7 +118,7 @@ static void mutate(pcg64 & generator, void * src, size_t length)
else if (pos + 5 <= end
&& pos[0] >= 0xC0 && pos[0] <= 0xDF && pos[1] >= 0x80 && pos[1] <= 0xBF
&& pos[2] >= 0x20 && pos[2] < 0x80 && !isAlphaASCII(pos[2])
&& pos[3] >= 0xC0 && pos[0] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF)
&& pos[3] >= 0xC0 && pos[3] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF)
{
auto res = rand(generator, 0, 3);
if (res == 2)

View File

@ -9,7 +9,7 @@ function gen_version_string {
}
function get_version {
BASEDIR=$(dirname "${BASH_SOURCE[0]}")
BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../
VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'`
VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'`
VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'`