diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index db923369296..ac0463b447a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,25 +2,23 @@ I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla Changelog category (leave one): - New Feature -- Bug Fix - Improvement +- Bug Fix - Performance Improvement - Backward Incompatible Change - Build/Testing/Packaging Improvement - Documentation (changelog entry is not required) -- Other - Not for changelog (changelog entry is not required) Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md): - ... Detailed description / Documentation draft: - ... + By adding documentation, you'll allow users to try your new feature immediately, not when someone else will have time to document it later. Documentation is necessary for all features that affect user experience in any way. You can add brief documentation draft above, or add documentation right into your patch as Markdown files in [docs](https://github.com/ClickHouse/ClickHouse/tree/master/docs) folder. If you are doing this for the first time, it's recommended to read the lightweight [Contributing to ClickHouse Documentation](https://github.com/ClickHouse/ClickHouse/tree/master/docs/README.md) guide first. diff --git a/.gitmodules b/.gitmodules index 1d9d4d25baf..4df7798e1e7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -193,7 +193,7 @@ url = https://github.com/danlark1/miniselect [submodule "contrib/rocksdb"] path = contrib/rocksdb - url = https://github.com/ClickHouse-Extras/rocksdb.git + url = https://github.com/ClickHouse-Extras/rocksdb.git [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz @@ -228,3 +228,9 @@ [submodule "contrib/libpqxx"] path = contrib/libpqxx url = https://github.com/ClickHouse-Extras/libpqxx.git +[submodule "contrib/sqlite-amalgamation"] + path = contrib/sqlite-amalgamation + url = https://github.com/azadkuh/sqlite-amalgamation +[submodule "contrib/s2geometry"] + path = contrib/s2geometry + url = https://github.com/ClickHouse-Extras/s2geometry.git diff --git a/CMakeLists.txt b/CMakeLists.txt index d23e5f540d3..875a6d1ab61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -536,10 +536,12 @@ include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) include (cmake/find/odbc.cmake) include (cmake/find/nanodbc.cmake) +include (cmake/find/sqlite.cmake) include (cmake/find/rocksdb.cmake) include (cmake/find/libpqxx.cmake) include (cmake/find/nuraft.cmake) include (cmake/find/yaml-cpp.cmake) +include (cmake/find/s2geometry.cmake) if(NOT USE_INTERNAL_PARQUET_LIBRARY) set (ENABLE_ORC OFF CACHE INTERNAL "") diff --git a/cmake/find/s2geometry.cmake b/cmake/find/s2geometry.cmake new file mode 100644 index 00000000000..2364c6ba193 --- /dev/null +++ b/cmake/find/s2geometry.cmake @@ -0,0 +1,24 @@ + +option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) + +if (ENABLE_S2_GEOMETRY) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/s2geometry") + message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_S2_GEOMETRY 0) + set (USE_S2_GEOMETRY 0) + else() + if (OPENSSL_FOUND) + set (S2_GEOMETRY_LIBRARY s2) + set (S2_GEOMETRY_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src/s2) + set (USE_S2_GEOMETRY 1) + else() + message (WARNING "S2 uses OpenSSL, but the latter is absent.") + endif() + endif() + + if (NOT USE_S2_GEOMETRY) + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable S2 geometry library") + endif() +endif() + +message (STATUS "Using s2geometry=${USE_S2_GEOMETRY} : ${S2_GEOMETRY_INCLUDE_DIR}") diff --git a/cmake/find/sqlite.cmake b/cmake/find/sqlite.cmake new file mode 100644 index 00000000000..cfa33fdebbb --- /dev/null +++ b/cmake/find/sqlite.cmake @@ -0,0 +1,16 @@ +option(ENABLE_SQLITE "Enable sqlite" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_SQLITE) + return() +endif() + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation/sqlite3.c") + message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init --recursive") + message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sqlite library") + set (USE_SQLITE 0) + return() +endif() + +set (USE_SQLITE 1) +set (SQLITE_LIBRARY sqlite) +message (STATUS "Using sqlite=${USE_SQLITE}") diff --git a/cmake/find/stats.cmake b/cmake/find/stats.cmake index 339e8524598..dea108ed920 100644 --- a/cmake/find/stats.cmake +++ b/cmake/find/stats.cmake @@ -1,4 +1,4 @@ -option(ENABLE_STATS "Enalbe StatsLib library" ${ENABLE_LIBRARIES}) +option(ENABLE_STATS "Enable StatsLib library" ${ENABLE_LIBRARIES}) if (ENABLE_STATS) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/stats") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 164692fb893..2b6629d0817 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,3 +1,4 @@ +# Third-party libraries may have substandard code. # Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will @@ -10,10 +11,8 @@ else () endif () unset (_current_dir_name) -# Third-party libraries may have substandard code. -# Also remove a possible source of nondeterminism. -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -D__DATE__= -D__TIME__= -D__TIMESTAMP__=") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -D__DATE__= -D__TIME__= -D__TIMESTAMP__=") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") if (WITH_COVERAGE) set (WITHOUT_COVERAGE_LIST ${WITHOUT_COVERAGE}) @@ -329,3 +328,10 @@ endif() add_subdirectory(fast_float) +if (USE_SQLITE) + add_subdirectory(sqlite-cmake) +endif() + +if (USE_S2_GEOMETRY) + add_subdirectory(s2geometry-cmake) +endif() diff --git a/contrib/poco b/contrib/poco index 59945069080..7351c4691b5 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 5994506908028612869fee627d68d8212dfe7c1e +Subproject commit 7351c4691b5d401f59e3959adfc5b4fa263b32da diff --git a/contrib/rocksdb b/contrib/rocksdb index 07c77549a20..dac0e9a6808 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 07c77549a20b63ff6981b400085eba36bb5c80c4 +Subproject commit dac0e9a68080c837d6b6223921f3fc151abbfcdc diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index bccc9ed5294..e7ff1f548e3 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -70,11 +70,6 @@ else() endif() endif() -set(BUILD_VERSION_CC rocksdb_build_version.cc) -add_library(rocksdb_build_version OBJECT ${BUILD_VERSION_CC}) - -target_include_directories(rocksdb_build_version PRIVATE "${ROCKSDB_SOURCE_DIR}/util") - include(CheckCCompilerFlag) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9) @@ -243,272 +238,293 @@ find_package(Threads REQUIRED) # Main library source code set(SOURCES - "${ROCKSDB_SOURCE_DIR}/cache/cache.cc" - "${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc" - "${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc" - "${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc" - "${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_cache.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc" - "${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc" - "${ROCKSDB_SOURCE_DIR}/db/builder.cc" - "${ROCKSDB_SOURCE_DIR}/db/c.cc" - "${ROCKSDB_SOURCE_DIR}/db/column_family.cc" - "${ROCKSDB_SOURCE_DIR}/db/compacted_db_impl.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_job.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_fifo.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_level.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_universal.cc" - "${ROCKSDB_SOURCE_DIR}/db/compaction/sst_partitioner.cc" - "${ROCKSDB_SOURCE_DIR}/db/convenience.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_filesnapshot.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_readonly.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_secondary.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_info_dumper.cc" - "${ROCKSDB_SOURCE_DIR}/db/db_iter.cc" - "${ROCKSDB_SOURCE_DIR}/db/dbformat.cc" - "${ROCKSDB_SOURCE_DIR}/db/error_handler.cc" - "${ROCKSDB_SOURCE_DIR}/db/event_helpers.cc" - "${ROCKSDB_SOURCE_DIR}/db/experimental.cc" - "${ROCKSDB_SOURCE_DIR}/db/external_sst_file_ingestion_job.cc" - "${ROCKSDB_SOURCE_DIR}/db/file_indexer.cc" - "${ROCKSDB_SOURCE_DIR}/db/flush_job.cc" - "${ROCKSDB_SOURCE_DIR}/db/flush_scheduler.cc" - "${ROCKSDB_SOURCE_DIR}/db/forward_iterator.cc" - "${ROCKSDB_SOURCE_DIR}/db/import_column_family_job.cc" - "${ROCKSDB_SOURCE_DIR}/db/internal_stats.cc" - "${ROCKSDB_SOURCE_DIR}/db/logs_with_prep_tracker.cc" - "${ROCKSDB_SOURCE_DIR}/db/log_reader.cc" - "${ROCKSDB_SOURCE_DIR}/db/log_writer.cc" - "${ROCKSDB_SOURCE_DIR}/db/malloc_stats.cc" - "${ROCKSDB_SOURCE_DIR}/db/memtable.cc" - "${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc" - "${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc" - "${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc" - "${ROCKSDB_SOURCE_DIR}/db/output_validator.cc" - "${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc" - "${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc" - "${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc" - "${ROCKSDB_SOURCE_DIR}/db/repair.cc" - "${ROCKSDB_SOURCE_DIR}/db/snapshot_impl.cc" - "${ROCKSDB_SOURCE_DIR}/db/table_cache.cc" - "${ROCKSDB_SOURCE_DIR}/db/table_properties_collector.cc" - "${ROCKSDB_SOURCE_DIR}/db/transaction_log_impl.cc" - "${ROCKSDB_SOURCE_DIR}/db/trim_history_scheduler.cc" - "${ROCKSDB_SOURCE_DIR}/db/version_builder.cc" - "${ROCKSDB_SOURCE_DIR}/db/version_edit.cc" - "${ROCKSDB_SOURCE_DIR}/db/version_edit_handler.cc" - "${ROCKSDB_SOURCE_DIR}/db/version_set.cc" - "${ROCKSDB_SOURCE_DIR}/db/wal_edit.cc" - "${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc" - "${ROCKSDB_SOURCE_DIR}/db/write_batch.cc" - "${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc" - "${ROCKSDB_SOURCE_DIR}/db/write_controller.cc" - "${ROCKSDB_SOURCE_DIR}/db/write_thread.cc" - "${ROCKSDB_SOURCE_DIR}/env/env.cc" - "${ROCKSDB_SOURCE_DIR}/env/env_chroot.cc" - "${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc" - "${ROCKSDB_SOURCE_DIR}/env/env_hdfs.cc" - "${ROCKSDB_SOURCE_DIR}/env/file_system.cc" - "${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc" - "${ROCKSDB_SOURCE_DIR}/env/mock_env.cc" - "${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc" - "${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc" - "${ROCKSDB_SOURCE_DIR}/file/file_util.cc" - "${ROCKSDB_SOURCE_DIR}/file/filename.cc" - "${ROCKSDB_SOURCE_DIR}/file/random_access_file_reader.cc" - "${ROCKSDB_SOURCE_DIR}/file/read_write_util.cc" - "${ROCKSDB_SOURCE_DIR}/file/readahead_raf.cc" - "${ROCKSDB_SOURCE_DIR}/file/sequence_file_reader.cc" - "${ROCKSDB_SOURCE_DIR}/file/sst_file_manager_impl.cc" - "${ROCKSDB_SOURCE_DIR}/file/writable_file_writer.cc" - "${ROCKSDB_SOURCE_DIR}/logging/auto_roll_logger.cc" - "${ROCKSDB_SOURCE_DIR}/logging/event_logger.cc" - "${ROCKSDB_SOURCE_DIR}/logging/log_buffer.cc" - "${ROCKSDB_SOURCE_DIR}/memory/arena.cc" - "${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc" - "${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc" - "${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc" - "${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc" - "${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc" - "${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc" - "${ROCKSDB_SOURCE_DIR}/memtable/skiplistrep.cc" - "${ROCKSDB_SOURCE_DIR}/memtable/vectorrep.cc" - "${ROCKSDB_SOURCE_DIR}/memtable/write_buffer_manager.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/histogram.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/histogram_windowing.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/in_memory_stats_history.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/instrumented_mutex.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/iostats_context.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/perf_context.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc" - "${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc" - "${ROCKSDB_SOURCE_DIR}/options/cf_options.cc" - "${ROCKSDB_SOURCE_DIR}/options/configurable.cc" - "${ROCKSDB_SOURCE_DIR}/options/customizable.cc" - "${ROCKSDB_SOURCE_DIR}/options/db_options.cc" - "${ROCKSDB_SOURCE_DIR}/options/options.cc" - "${ROCKSDB_SOURCE_DIR}/options/options_helper.cc" - "${ROCKSDB_SOURCE_DIR}/options/options_parser.cc" - "${ROCKSDB_SOURCE_DIR}/port/stack_trace.cc" - "${ROCKSDB_SOURCE_DIR}/table/adaptive/adaptive_table_factory.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/binary_search_index_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_filter_block.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_builder.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_factory.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_iterator.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_builder.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefetcher.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefix_index.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_footer.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/filter_block_reader_common.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/filter_policy.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/flush_block_policy.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/full_filter_block.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/hash_index_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/index_builder.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/index_reader_common.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/parsed_full_filter_block.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_filter_block.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_iterator.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/reader_common.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_based/uncompression_dict_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/block_fetcher.cc" - "${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_builder.cc" - "${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_factory.cc" - "${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/format.cc" - "${ROCKSDB_SOURCE_DIR}/table/get_context.cc" - "${ROCKSDB_SOURCE_DIR}/table/iterator.cc" - "${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc" - "${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc" - "${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc" - "${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc" - "${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_builder.cc" - "${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_factory.cc" - "${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_index.cc" - "${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_key_coding.cc" - "${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc" - "${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc" - "${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc" - "${ROCKSDB_SOURCE_DIR}/table/table_factory.cc" - "${ROCKSDB_SOURCE_DIR}/table/table_properties.cc" - "${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc" - "${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc" - "${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc" - "${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc" - "${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc" - "${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc" - "${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc" - "${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc" - "${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc" - "${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc" - "${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc" - "${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc" - "${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc" - "${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc" - "${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc" - "${ROCKSDB_SOURCE_DIR}/util/coding.cc" - "${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc" - "${ROCKSDB_SOURCE_DIR}/util/comparator.cc" - "${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc" - "${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc" - "${ROCKSDB_SOURCE_DIR}/util/crc32c.cc" - "${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc" - "${ROCKSDB_SOURCE_DIR}/util/hash.cc" - "${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc" - "${ROCKSDB_SOURCE_DIR}/util/random.cc" - "${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc" - "${ROCKSDB_SOURCE_DIR}/util/slice.cc" - "${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc" - "${ROCKSDB_SOURCE_DIR}/util/status.cc" - "${ROCKSDB_SOURCE_DIR}/util/string_util.cc" - "${ROCKSDB_SOURCE_DIR}/util/thread_local.cc" - "${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc" - "${ROCKSDB_SOURCE_DIR}/util/xxhash.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/backupable/backupable_db.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_compaction_filter.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/debug.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/sortlist.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend2.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/uint64add.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/object_registry.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/option_change_migration/option_change_migration.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/options/options_util.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_file.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_metadata.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/persistent_cache_tier.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction_db_impl.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction_db.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/snapshot_checker.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_base.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_db_mutex_impl.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_util.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc" - "${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc" - $) + ${ROCKSDB_SOURCE_DIR}/cache/cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc + ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc + ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc + ${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_cache.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_garbage_meter.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/builder.cc + ${ROCKSDB_SOURCE_DIR}/db/c.cc + ${ROCKSDB_SOURCE_DIR}/db/column_family.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_job.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_fifo.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_level.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_universal.cc + ${ROCKSDB_SOURCE_DIR}/db/compaction/sst_partitioner.cc + ${ROCKSDB_SOURCE_DIR}/db/convenience.cc + ${ROCKSDB_SOURCE_DIR}/db/db_filesnapshot.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/compacted_db_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_readonly.cc + ${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_secondary.cc + ${ROCKSDB_SOURCE_DIR}/db/db_info_dumper.cc + ${ROCKSDB_SOURCE_DIR}/db/db_iter.cc + ${ROCKSDB_SOURCE_DIR}/db/dbformat.cc + ${ROCKSDB_SOURCE_DIR}/db/error_handler.cc + ${ROCKSDB_SOURCE_DIR}/db/event_helpers.cc + ${ROCKSDB_SOURCE_DIR}/db/experimental.cc + ${ROCKSDB_SOURCE_DIR}/db/external_sst_file_ingestion_job.cc + ${ROCKSDB_SOURCE_DIR}/db/file_indexer.cc + ${ROCKSDB_SOURCE_DIR}/db/flush_job.cc + ${ROCKSDB_SOURCE_DIR}/db/flush_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/forward_iterator.cc + ${ROCKSDB_SOURCE_DIR}/db/import_column_family_job.cc + ${ROCKSDB_SOURCE_DIR}/db/internal_stats.cc + ${ROCKSDB_SOURCE_DIR}/db/logs_with_prep_tracker.cc + ${ROCKSDB_SOURCE_DIR}/db/log_reader.cc + ${ROCKSDB_SOURCE_DIR}/db/log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/malloc_stats.cc + ${ROCKSDB_SOURCE_DIR}/db/memtable.cc + ${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc + ${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc + ${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc + ${ROCKSDB_SOURCE_DIR}/db/output_validator.cc + ${ROCKSDB_SOURCE_DIR}/db/periodic_work_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc + ${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc + ${ROCKSDB_SOURCE_DIR}/db/repair.cc + ${ROCKSDB_SOURCE_DIR}/db/snapshot_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/table_cache.cc + ${ROCKSDB_SOURCE_DIR}/db/table_properties_collector.cc + ${ROCKSDB_SOURCE_DIR}/db/transaction_log_impl.cc + ${ROCKSDB_SOURCE_DIR}/db/trim_history_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/db/version_builder.cc + ${ROCKSDB_SOURCE_DIR}/db/version_edit.cc + ${ROCKSDB_SOURCE_DIR}/db/version_edit_handler.cc + ${ROCKSDB_SOURCE_DIR}/db/version_set.cc + ${ROCKSDB_SOURCE_DIR}/db/wal_edit.cc + ${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc + ${ROCKSDB_SOURCE_DIR}/db/write_batch.cc + ${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc + ${ROCKSDB_SOURCE_DIR}/db/write_controller.cc + ${ROCKSDB_SOURCE_DIR}/db/write_thread.cc + ${ROCKSDB_SOURCE_DIR}/env/composite_env.cc + ${ROCKSDB_SOURCE_DIR}/env/env.cc + ${ROCKSDB_SOURCE_DIR}/env/env_chroot.cc + ${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc + ${ROCKSDB_SOURCE_DIR}/env/env_hdfs.cc + ${ROCKSDB_SOURCE_DIR}/env/file_system.cc + ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc + ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc + ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc + ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc + ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc + ${ROCKSDB_SOURCE_DIR}/file/file_util.cc + ${ROCKSDB_SOURCE_DIR}/file/filename.cc + ${ROCKSDB_SOURCE_DIR}/file/line_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/file/random_access_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/file/read_write_util.cc + ${ROCKSDB_SOURCE_DIR}/file/readahead_raf.cc + ${ROCKSDB_SOURCE_DIR}/file/sequence_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/file/sst_file_manager_impl.cc + ${ROCKSDB_SOURCE_DIR}/file/writable_file_writer.cc + ${ROCKSDB_SOURCE_DIR}/logging/auto_roll_logger.cc + ${ROCKSDB_SOURCE_DIR}/logging/event_logger.cc + ${ROCKSDB_SOURCE_DIR}/logging/log_buffer.cc + ${ROCKSDB_SOURCE_DIR}/memory/arena.cc + ${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc + ${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc + ${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/skiplistrep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/vectorrep.cc + ${ROCKSDB_SOURCE_DIR}/memtable/write_buffer_manager.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/histogram.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/histogram_windowing.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/in_memory_stats_history.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/instrumented_mutex.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/iostats_context.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/perf_context.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc + ${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc + ${ROCKSDB_SOURCE_DIR}/options/cf_options.cc + ${ROCKSDB_SOURCE_DIR}/options/configurable.cc + ${ROCKSDB_SOURCE_DIR}/options/customizable.cc + ${ROCKSDB_SOURCE_DIR}/options/db_options.cc + ${ROCKSDB_SOURCE_DIR}/options/options.cc + ${ROCKSDB_SOURCE_DIR}/options/options_helper.cc + ${ROCKSDB_SOURCE_DIR}/options/options_parser.cc + ${ROCKSDB_SOURCE_DIR}/port/stack_trace.cc + ${ROCKSDB_SOURCE_DIR}/table/adaptive/adaptive_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/binary_search_index_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefetcher.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefix_index.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_footer.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/filter_block_reader_common.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/filter_policy.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/flush_block_policy.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/full_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/hash_index_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/index_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/index_reader_common.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/parsed_full_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_filter_block.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/reader_common.cc + ${ROCKSDB_SOURCE_DIR}/table/block_based/uncompression_dict_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/block_fetcher.cc + ${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/format.cc + ${ROCKSDB_SOURCE_DIR}/table/get_context.cc + ${ROCKSDB_SOURCE_DIR}/table/iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc + ${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_builder.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_index.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_key_coding.cc + ${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc + ${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc + ${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc + ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc + ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc + ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc + ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc + ${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc + ${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc + ${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc + ${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc + ${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc + ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc + ${ROCKSDB_SOURCE_DIR}/util/coding.cc + ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc + ${ROCKSDB_SOURCE_DIR}/util/comparator.cc + ${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc + ${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc + ${ROCKSDB_SOURCE_DIR}/util/crc32c.cc + ${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc + ${ROCKSDB_SOURCE_DIR}/util/hash.cc + ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc + ${ROCKSDB_SOURCE_DIR}/util/random.cc + ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc + ${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc + ${ROCKSDB_SOURCE_DIR}/util/slice.cc + ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc + ${ROCKSDB_SOURCE_DIR}/util/status.cc + ${ROCKSDB_SOURCE_DIR}/util/string_util.cc + ${ROCKSDB_SOURCE_DIR}/util/thread_local.cc + ${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc + ${ROCKSDB_SOURCE_DIR}/util/xxhash.cc + ${ROCKSDB_SOURCE_DIR}/utilities/backupable/backupable_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_compaction_filter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc + ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc + ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc + ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc + ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc + ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc + ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc + ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/sortlist.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend2.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/uint64add.cc + ${ROCKSDB_SOURCE_DIR}/utilities/object_registry.cc + ${ROCKSDB_SOURCE_DIR}/utilities/option_change_migration/option_change_migration.cc + ${ROCKSDB_SOURCE_DIR}/utilities/options/options_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_metadata.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/persistent_cache_tier.cc + ${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc + ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc + ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc + ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction_db_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/snapshot_checker.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_base.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_db_mutex_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc + ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc + ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc + ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc + rocksdb_build_version.cc) if(HAVE_SSE42 AND NOT MSVC) set_source_files_properties( diff --git a/contrib/rocksdb-cmake/rocksdb_build_version.cc b/contrib/rocksdb-cmake/rocksdb_build_version.cc index 8697652ae9f..f9639da516f 100644 --- a/contrib/rocksdb-cmake/rocksdb_build_version.cc +++ b/contrib/rocksdb-cmake/rocksdb_build_version.cc @@ -1,3 +1,62 @@ -const char* rocksdb_build_git_sha = "rocksdb_build_git_sha:0"; -const char* rocksdb_build_git_date = "rocksdb_build_git_date:2000-01-01"; -const char* rocksdb_build_compile_date = "2000-01-01"; +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +/// This file was edited for ClickHouse. + +#include + +#include "rocksdb/version.h" +#include "util/string_util.h" + +// The build script may replace these values with real values based +// on whether or not GIT is available and the platform settings +static const std::string rocksdb_build_git_sha = "rocksdb_build_git_sha:0"; +static const std::string rocksdb_build_git_tag = "rocksdb_build_git_tag:master"; +static const std::string rocksdb_build_date = "rocksdb_build_date:2000-01-01"; + +namespace ROCKSDB_NAMESPACE { +static void AddProperty(std::unordered_map *props, const std::string& name) { + size_t colon = name.find(":"); + if (colon != std::string::npos && colon > 0 && colon < name.length() - 1) { + // If we found a "@:", then this property was a build-time substitution that failed. Skip it + size_t at = name.find("@", colon); + if (at != colon + 1) { + // Everything before the colon is the name, after is the value + (*props)[name.substr(0, colon)] = name.substr(colon + 1); + } + } +} + +static std::unordered_map* LoadPropertiesSet() { + auto * properties = new std::unordered_map(); + AddProperty(properties, rocksdb_build_git_sha); + AddProperty(properties, rocksdb_build_git_tag); + AddProperty(properties, rocksdb_build_date); + return properties; +} + +const std::unordered_map& GetRocksBuildProperties() { + static std::unique_ptr> props(LoadPropertiesSet()); + return *props; +} + +std::string GetRocksVersionAsString(bool with_patch) { + std::string version = ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR); + if (with_patch) { + return version + "." + ToString(ROCKSDB_PATCH); + } else { + return version; + } +} + +std::string GetRocksBuildInfoAsString(const std::string& program, bool verbose) { + std::string info = program + " (RocksDB) " + GetRocksVersionAsString(true); + if (verbose) { + for (const auto& it : GetRocksBuildProperties()) { + info.append("\n "); + info.append(it.first); + info.append(": "); + info.append(it.second); + } + } + return info; +} +} // namespace ROCKSDB_NAMESPACE diff --git a/contrib/s2geometry b/contrib/s2geometry new file mode 160000 index 00000000000..20ea540d81f --- /dev/null +++ b/contrib/s2geometry @@ -0,0 +1 @@ +Subproject commit 20ea540d81f4575a3fc0aea585aac611bcd03ede diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt new file mode 100644 index 00000000000..41d570c9afd --- /dev/null +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -0,0 +1,128 @@ +set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src") + +set(S2_SRCS + "${S2_SOURCE_DIR}/s2/base/stringprintf.cc" + "${S2_SOURCE_DIR}/s2/base/strtoint.cc" + "${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc" + "${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc" + "${S2_SOURCE_DIR}/s2/encoded_s2shape_index.cc" + "${S2_SOURCE_DIR}/s2/encoded_string_vector.cc" + "${S2_SOURCE_DIR}/s2/id_set_lexicon.cc" + "${S2_SOURCE_DIR}/s2/mutable_s2shape_index.cc" + "${S2_SOURCE_DIR}/s2/r2rect.cc" + "${S2_SOURCE_DIR}/s2/s1angle.cc" + "${S2_SOURCE_DIR}/s2/s1chord_angle.cc" + "${S2_SOURCE_DIR}/s2/s1interval.cc" + "${S2_SOURCE_DIR}/s2/s2boolean_operation.cc" + "${S2_SOURCE_DIR}/s2/s2builder.cc" + "${S2_SOURCE_DIR}/s2/s2builder_graph.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_closed_set_normalizer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_find_polygon_degeneracies.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_lax_polygon_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2point_vector_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2polygon_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_vector_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_snap_functions.cc" + "${S2_SOURCE_DIR}/s2/s2cap.cc" + "${S2_SOURCE_DIR}/s2/s2cell.cc" + "${S2_SOURCE_DIR}/s2/s2cell_id.cc" + "${S2_SOURCE_DIR}/s2/s2cell_index.cc" + "${S2_SOURCE_DIR}/s2/s2cell_union.cc" + "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" + "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" + "${S2_SOURCE_DIR}/s2/s2contains_vertex_query.cc" + "${S2_SOURCE_DIR}/s2/s2convex_hull_query.cc" + "${S2_SOURCE_DIR}/s2/s2coords.cc" + "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2earth.cc" + "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" + "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" + "${S2_SOURCE_DIR}/s2/s2edge_crossings.cc" + "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" + "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" + "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2latlng.cc" + "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" + "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" + "${S2_SOURCE_DIR}/s2/s2lax_loop_shape.cc" + "${S2_SOURCE_DIR}/s2/s2lax_polygon_shape.cc" + "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" + "${S2_SOURCE_DIR}/s2/s2loop.cc" + "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2measures.cc" + "${S2_SOURCE_DIR}/s2/s2metrics.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" + "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" + "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" + "${S2_SOURCE_DIR}/s2/s2point_compression.cc" + "${S2_SOURCE_DIR}/s2/s2point_region.cc" + "${S2_SOURCE_DIR}/s2/s2pointutil.cc" + "${S2_SOURCE_DIR}/s2/s2polygon.cc" + "${S2_SOURCE_DIR}/s2/s2polyline.cc" + "${S2_SOURCE_DIR}/s2/s2polyline_alignment.cc" + "${S2_SOURCE_DIR}/s2/s2polyline_measures.cc" + "${S2_SOURCE_DIR}/s2/s2polyline_simplifier.cc" + "${S2_SOURCE_DIR}/s2/s2predicates.cc" + "${S2_SOURCE_DIR}/s2/s2projections.cc" + "${S2_SOURCE_DIR}/s2/s2r2rect.cc" + "${S2_SOURCE_DIR}/s2/s2region.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" + "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_union.cc" + "${S2_SOURCE_DIR}/s2/s2shape_index.cc" + "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" + "${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc" + "${S2_SOURCE_DIR}/s2/s2shape_measures.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2text_format.cc" + "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" + "${S2_SOURCE_DIR}/s2/strings/ostringstream.cc" + "${S2_SOURCE_DIR}/s2/strings/serialize.cc" + # ClickHouse doesn't use strings from abseil. + # So, there is no duplicate symbols. + "${S2_SOURCE_DIR}/s2/third_party/absl/base/dynamic_annotations.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/raw_logging.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/throw_delegate.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/numeric/int128.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/ascii.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/match.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/numbers.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_cat.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_split.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/string_view.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/strip.cc" + "${S2_SOURCE_DIR}/s2/third_party/absl/strings/internal/memutil.cc" + "${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc" + "${S2_SOURCE_DIR}/s2/util/bits/bits.cc" + "${S2_SOURCE_DIR}/s2/util/coding/coder.cc" + "${S2_SOURCE_DIR}/s2/util/coding/varint.cc" + "${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc" + "${S2_SOURCE_DIR}/s2/util/math/mathutil.cc" + "${S2_SOURCE_DIR}/s2/util/units/length-units.cc" +) + +add_library(s2 ${S2_SRCS}) + +set_property(TARGET s2 PROPERTY CXX_STANDARD 11) + +if (OPENSSL_FOUND) + target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES}) +endif() + +target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") + +if(M_LIBRARY) + target_link_libraries(s2 PRIVATE ${M_LIBRARY}) +endif() diff --git a/contrib/sqlite-amalgamation b/contrib/sqlite-amalgamation new file mode 160000 index 00000000000..9818baa5d02 --- /dev/null +++ b/contrib/sqlite-amalgamation @@ -0,0 +1 @@ +Subproject commit 9818baa5d027ffb26d57f810dc4c597d4946781c diff --git a/contrib/sqlite-cmake/CMakeLists.txt b/contrib/sqlite-cmake/CMakeLists.txt new file mode 100644 index 00000000000..495cb63798d --- /dev/null +++ b/contrib/sqlite-cmake/CMakeLists.txt @@ -0,0 +1,6 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation") + +set(SRCS ${LIBRARY_DIR}/sqlite3.c) + +add_library(sqlite ${SRCS}) +target_include_directories(sqlite SYSTEM PUBLIC "${LIBRARY_DIR}") diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 2f1d28efe61..241b691cd23 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -27,7 +27,7 @@ RUN apt-get update \ # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \ +RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ && chmod +x dpkg-deb \ && cp dpkg-deb /usr/bin diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile index 4dd6dbc61d8..d2bda7db833 100644 --- a/docker/packager/unbundled/Dockerfile +++ b/docker/packager/unbundled/Dockerfile @@ -2,7 +2,7 @@ FROM yandex/clickhouse-deb-builder RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ - && wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \ + && wget -nv -O /tmp/arrow-keyring.deb "https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-${CODENAME}.deb" \ && dpkg -i /tmp/arrow-keyring.deb # Libraries from OS are only needed to test the "unbundled" build (that is not used in production). diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index a722132c3a5..6877a786b76 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -27,7 +27,7 @@ RUN apt-get update \ # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \ +RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ && chmod +x dpkg-deb \ && cp dpkg-deb /usr/bin diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 2864f7fc4da..bf59106c106 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -27,7 +27,7 @@ RUN apt-get update \ # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \ +RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ && chmod +x dpkg-deb \ && cp dpkg-deb /usr/bin diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index bba20f64e5a..3e8bf306a83 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -378,6 +378,16 @@ function run_tests # needs pv 01923_network_receive_time_metric_insert + + 01889_sqlite_read_write + + # needs s2 + 01849_geoToS2 + 01851_s2_to_geo + 01852_s2_get_neighbours + 01853_s2_cells_intersect + 01854_s2_cap_contains + 01854_s2_cap_union ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index e15697da029..344c1b9a698 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -32,7 +32,7 @@ RUN rm -rf \ RUN apt-get clean # Install MySQL ODBC driver -RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so +RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so # Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. # ZooKeeper is not started by default, but consumes some space in containers. @@ -49,4 +49,3 @@ RUN mkdir /zookeeper && chmod -R 777 /zookeeper ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 0665ab7560f..6bde4ef60db 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -76,6 +76,7 @@ RUN python3 -m pip install \ pytest \ pytest-timeout \ pytest-xdist \ + pytest-repeat \ redis \ tzlocal \ urllib3 \ diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index 4b83ed21410..c444e71798e 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -2,7 +2,7 @@ version: '2.3' services: postgres1: image: postgres - command: ["postgres", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all"] + command: ["postgres", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_filename=postgresql.log", "-c", "log_statement=all", "-c", "max_connections=200"] restart: always expose: - ${POSTGRES_PORT} diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 658ae1f27ba..17c89232e17 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -29,7 +29,8 @@ RUN apt-get update -y \ unixodbc \ wget \ mysql-client=5.7* \ - postgresql-client + postgresql-client \ + sqlite3 RUN pip3 install numpy scipy pandas diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 428fdb9fdb7..87d127ab946 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -58,11 +58,11 @@ function start() echo "Cannot start clickhouse-server" cat /var/log/clickhouse-server/stdout.log tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n1000 /var/log/clickhouse-server/clickhouse-server.log + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v ' RaftInstance:' -e ' RaftInstance' | tail -n1000 break fi # use root to match with current uid - clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>/var/log/clickhouse-server/stderr.log + clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log sleep 0.5 counter=$((counter + 1)) done @@ -118,35 +118,35 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_ [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" # Print Fatal log messages to stdout -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp -zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" > /dev/null \ +zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM -zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ +zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors -zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ +zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv # Crash -zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ +zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docs/en/development/adding_test_queries.md b/docs/en/development/adding_test_queries.md index 95dfd076a12..547d8b0fa37 100644 --- a/docs/en/development/adding_test_queries.md +++ b/docs/en/development/adding_test_queries.md @@ -105,11 +105,11 @@ clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee te 5) ensure everything is correct, if the test output is incorrect (due to some bug for example), adjust the reference file using text editor. -#### How to create good test +#### How to create a good test -- test should be +- A test should be - minimal - create only tables related to tested functionality, remove unrelated columns and parts of query - - fast - should not take longer than few seconds (better subseconds) + - fast - should not take longer than a few seconds (better subseconds) - correct - fails then feature is not working - deterministic - isolated / stateless @@ -126,6 +126,16 @@ clickhouse-client -nmT < tests/queries/0_stateless/01521_dummy_test.sql | tee te - use other SQL files in the `0_stateless` folder as an example - ensure the feature / feature combination you want to test is not yet covered with existing tests +#### Test naming rules + +It's important to name tests correctly, so one could turn some tests subset off in clickhouse-test invocation. + +| Tester flag| What should be in test name | When flag should be added | +|---|---|---|---| +| `--[no-]zookeeper`| "zookeeper" or "replica" | Test uses tables from ReplicatedMergeTree family | +| `--[no-]shard` | "shard" or "distributed" or "global"| Test using connections to 127.0.0.2 or similar | +| `--[no-]long` | "long" or "deadlock" or "race" | Test runs longer than 60 seconds | + #### Commit / push / create PR. 1) commit & push your changes diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index ac39c496c72..a65ddb40af0 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -79,6 +79,7 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li | re2 | BSD 3-clause | /contrib/re2/LICENSE | | replxx | BSD 3-clause | /contrib/replxx/LICENSE.md | | rocksdb | BSD 3-clause | /contrib/rocksdb/LICENSE.leveldb | +| s2geometry | Apache | /contrib/s2geometry/LICENSE | | sentry-native | MIT | /contrib/sentry-native/LICENSE | | simdjson | Apache | /contrib/simdjson/LICENSE | | snappy | Public Domain | /contrib/snappy/COPYING | diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 90f406f3ba8..537ed6a9c4f 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -123,7 +123,7 @@ For installing CMake and Ninja on Mac OS X first install Homebrew and then insta /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" brew install cmake ninja -Next, check the version of CMake: `cmake --version`. If it is below 3.3, you should install a newer version from the website: https://cmake.org/download/. +Next, check the version of CMake: `cmake --version`. If it is below 3.12, you should install a newer version from the website: https://cmake.org/download/. ## Optional External Libraries {#optional-external-libraries} diff --git a/docs/en/engines/database-engines/atomic.md b/docs/en/engines/database-engines/atomic.md index 4f5f69a5ab7..beee04c3157 100644 --- a/docs/en/engines/database-engines/atomic.md +++ b/docs/en/engines/database-engines/atomic.md @@ -47,7 +47,7 @@ EXCHANGE TABLES new_table AND old_table; ### ReplicatedMergeTree in Atomic Database {#replicatedmergetree-in-atomic-database} -For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables, it is recommended to not specify engine parameters - path in ZooKeeper and replica name. In this case, configuration parameters will be used [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). If you want to specify engine parameters explicitly, it is recommended to use {uuid} macros. This is useful so that unique paths are automatically generated for each table in ZooKeeper. +For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables, it is recommended to not specify engine parameters - path in ZooKeeper and replica name. In this case, configuration parameters will be used [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). If you want to specify engine parameters explicitly, it is recommended to use `{uuid}` macros. This is useful so that unique paths are automatically generated for each table in ZooKeeper. ## See Also diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index b6892099378..efc9cae5026 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -22,4 +22,4 @@ You can also use the following database engines: - [PostgreSQL](../../engines/database-engines/postgresql.md) -[Original article](https://clickhouse.tech/docs/en/database_engines/) +- [Replicated](../../engines/database-engines/replicated.md) diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md index 93e4aedfd5a..198808fa952 100644 --- a/docs/en/engines/database-engines/materialize-mysql.md +++ b/docs/en/engines/database-engines/materialize-mysql.md @@ -82,6 +82,8 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([ - If `_sign` is not specified in the `SELECT` query, `WHERE _sign=1` is used by default. So the deleted rows are not included into the result set. +- The result includes columns comments in case they exist in MySQL database tables. + ### Index Conversion {#index-conversion} MySQL `PRIMARY KEY` and `INDEX` clauses are converted into `ORDER BY` tuples in ClickHouse tables. diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md new file mode 100644 index 00000000000..575aa9d3ee5 --- /dev/null +++ b/docs/en/engines/database-engines/replicated.md @@ -0,0 +1,115 @@ +# [experimental] Replicated {#replicated} + +The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database. + +One ClickHouse server can have multiple replicated databases running and updating at the same time. But there can't be multiple replicas of the same replicated database. + +## Creating a Database {#creating-a-database} +``` sql + CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] +``` + +**Engine Parameters** + +- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database. +- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. +- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. + +!!! note "Warning" + For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. + +## Specifics and Recommendations {#specifics-and-recommendations} + +DDL queries with `Replicated` database work in a similar way to [ON CLUSTER](../../sql-reference/distributed-ddl.md) queries, but with minor differences. + +First, the DDL request tries to execute on the initiator (the host that originally received the request from the user). If the request is not fulfilled, then the user immediately receives an error, other hosts do not try to fulfill it. If the request has been successfully completed on the initiator, then all other hosts will automatically retry until they complete it. The initiator will try to wait for the query to be completed on other hosts (no longer than [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout)) and will return a table with the query execution statuses on each host. + +The behavior in case of errors is regulated by the [distributed_ddl_output_mode](../../operations/settings/settings.md#distributed_ddl_output_mode) setting, for a `Replicated` database it is better to set it to `null_status_on_timeout` — i.e. if some hosts did not have time to execute the request for [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout), then do not throw an exception, but show the `NULL` status for them in the table. + +The [system.clusters](../../operations/system-tables/clusters.md) system table contains a cluster named like the replicated database, which consists of all replicas of the database. This cluster is updated automatically when creating/deleting replicas, and it can be used for [Distributed](../../engines/table-engines/special/distributed.md#distributed) tables. + +When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata). + +## Usage Example {#usage-example} + +Creating a cluster with three hosts: + +``` sql +node1 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','replica1'); +node2 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','other_replica'); +node3 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','{replica}'); +``` + +Running the DDL-query: + +``` sql +CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n; +``` + +``` text +┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┠+│ shard1|replica1 │ 0 │ │ 2 │ 0 │ +│ shard1|other_replica │ 0 │ │ 1 │ 0 │ +│ other_shard|r1 │ 0 │ │ 0 │ 0 │ +└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘ +``` + +Showing the system table: + +``` sql +SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local +FROM system.clusters WHERE cluster='r'; +``` + +``` text +┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┠+│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ +│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │ +│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │ +└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘ +``` + +Creating a distributed table and inserting the data: + +``` sql +node2 :) CREATE TABLE r.d (n UInt64) ENGINE=Distributed('r','r','rmt', n % 2); +node3 :) INSERT INTO r SELECT * FROM numbers(10); +node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host; +``` + +``` text +┌─hosts─┬─groupArray(n)─┠+│ node1 │ [1,3,5,7,9] │ +│ node2 │ [0,2,4,6,8] │ +└───────┴───────────────┘ +``` + +Adding replica on the one more host: + +``` sql +node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2'); +``` + +The cluster configuration will look like this: + +``` text +┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┠+│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ +│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │ +│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │ +│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │ +└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘ +``` + +The distributed table also will get data from the new host: + +```sql +node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host; +``` + +```text +┌─hosts─┬─groupArray(n)─┠+│ node2 │ [1,3,5,7,9] │ +│ node4 │ [0,2,4,6,8] │ +└───────┴───────────────┘ +``` \ No newline at end of file diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index 70f61c5b550..142639507d6 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -1,6 +1,6 @@ --- toc_priority: 12 -toc_title: MateriaziePostgreSQL +toc_title: MaterializedPostgreSQL --- # MaterializedPostgreSQL {#materialize-postgresql} diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9d259456ea5..f0cdd75f90d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -76,7 +76,7 @@ For a description of parameters, see the [CREATE query description](../../../sql - `SAMPLE BY` — An expression for sampling. Optional. - If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. + If a sampling expression is used, the primary key must contain it. The result of sampling expression must be unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. - `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 6de6602a216..5c911c6cc0a 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -37,6 +37,14 @@ Also, it accepts the following settings: - `max_delay_to_insert` - max delay of inserting data into Distributed table in seconds, if there are a lot of pending bytes for async send. Default 60. +- `monitor_batch_inserts` - same as [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) + +- `monitor_split_batch_on_failure` - same as [distributed_directory_monitor_split_batch_on_failure](../../../operations/settings/settings.md#distributed_directory_monitor_split_batch_on_failure) + +- `monitor_sleep_time_ms` - same as [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) + +- `monitor_max_sleep_time_ms` - same as [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) + !!! note "Note" **Durability settings** (`fsync_...`): diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index eb288721231..015afd1cd24 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1130,17 +1130,18 @@ The table below shows supported data types and how they match ClickHouse [data t | `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` | | `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` | | `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` | +| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* | | `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` | | `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` | | `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` | | `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` | | `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` | -| `int (date)` \* | [Date](../sql-reference/data-types/date.md) | `int (date)` \* | -| `long (timestamp-millis)` \* | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \* | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | +| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** | +| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#settings-output_format_avro_string_column_pattern) +\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` @@ -1246,12 +1247,14 @@ The table below shows supported data types and how they match ClickHouse [data t | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | | `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` | -| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | +| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | -Arrays can be nested and can have a value of the `Nullable` type as an argument. +Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type. @@ -1299,13 +1302,17 @@ The table below shows supported data types and how they match ClickHouse [data t | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | | `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` | -| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | -Arrays can be nested and can have a value of the `Nullable` type as an argument. +Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. + +The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](../sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type. @@ -1358,8 +1365,10 @@ The table below shows supported data types and how they match ClickHouse [data t | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | -Arrays can be nested and can have a value of the `Nullable` type as an argument. +Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type. diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 990cb30346c..eed673234ba 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -157,5 +157,6 @@ toc_title: Adopters | SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | | ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | | Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | +| Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 6af12eb9b01..a5258f47175 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -10,7 +10,7 @@ ClickHouse server use [ZooKeeper](https://zookeeper.apache.org/) coordination sy !!! warning "Warning" This feature currently in pre-production stage. We test it in our CI and on small internal installations. -## Implemetation details +## Implementation details ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper `clickhouse-keeper` written in C++ and use [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 791ac344bcf..9ace094a4d8 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -278,4 +278,16 @@ Possible values: Default value: `0`. +## check_sample_column_is_correct {#check_sample_column_is_correct} + +Enables to check column for sampling or sampling expression is correct at table creation. + +Possible values: + +- true — Check column or sampling expression is correct at table creation. +- false — Do not check column or sampling expression is correct at table creation. + +Default value: `true`. + +By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index fc5a911cd7a..c2c0c3877c1 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -509,6 +509,23 @@ Possible values: Default value: `ALL`. +## join_algorithm {#settings-join_algorithm} + +Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm. + +Possible values: + +- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. +- `partial_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) is used. +- `prefer_partial_merge` — ClickHouse always tries to use `merge` join if possible. +- `auto` — ClickHouse tries to change `hash` join to `merge` join on the fly to avoid out of memory. + +Default value: `hash`. + +When using `hash` algorithm the right part of `JOIN` is uploaded into RAM. + +When using `partial_merge` algorithm ClickHouse sorts the data and dumps it to the disk. The `merge` algorithm in ClickHouse differs a bit from the classic realization. First ClickHouse sorts the right table by [join key](../../sql-reference/statements/select/join.md#select-join) in blocks and creates min-max index for sorted blocks. Then it sorts parts of left table by `join key` and joins them over right table. The min-max index is also used to skip unneeded right table blocks. + ## join_any_take_last_row {#settings-join_any_take_last_row} Changes behaviour of join operations with `ANY` strictness. @@ -1213,7 +1230,15 @@ Default value: `3`. ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} -If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. +Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format. +Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. + +Possible values: + +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. + +Default value: 1. ## output_format_json_quote_denormals {#settings-output_format_json_quote_denormals} @@ -1730,7 +1755,7 @@ Default value: 0. ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. +Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. These functions can be transformed: @@ -1961,6 +1986,13 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) +## output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} + +Regexp of column names of type String to output as Avro `string` (default is `bytes`). +RE2 syntax is supported. + +Type: string + ## format_avro_schema_registry_url {#format_avro_schema_registry_url} Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format. @@ -1990,6 +2022,16 @@ Possible values: Default value: 16. +## merge_selecting_sleep_ms {#merge_selecting_sleep_ms} + +Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters + +Possible values: + +- Any positive integer. + +Default value: 5000 + ## parallel_distributed_insert_select {#parallel_distributed_insert_select} Enables parallel distributed `INSERT ... SELECT` query. @@ -3123,6 +3165,53 @@ SELECT FROM fuse_tbl ``` +## allow_experimental_database_replicated {#allow_experimental_database_replicated} + +Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + +## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec} + +Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds. + +Possible values: + +- Positive integer. +- 0 — Unlimited. + +Default value: `300`. + +## distributed_ddl_task_timeout {#distributed_ddl_task_timeout} + +Sets timeout for DDL query responses from all hosts in cluster. If a DDL request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. + +Possible values: + +- Positive integer. +- 0 — Async mode. +- Negative integer — infinite timeout. + +Default value: `180`. + +## distributed_ddl_output_mode {#distributed_ddl_output_mode} + +Sets format of distributed DDL query result. + +Possible values: + +- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception. +- `none` — Is similar to throw, but distributed DDL query returns no result set. +- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. +- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. + +Default value: `throw`. + ## flatten_nested {#flatten-nested} Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns. @@ -3202,3 +3291,14 @@ Default value: `1`. **Usage** If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays. + +## output_format_arrow_low_cardinality_as_dictionary {#output-format-arrow-low-cardinality-as-dictionary} + +Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md#data-format-arrow) format for `SELECT` queries. + +Possible values: + +- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. +- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. + +Default value: `0`. diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index 515f704797a..683666e1f77 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -8,12 +8,11 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `name` ([String](../../sql-reference/data-types/string.md)) — Index name. - `type` ([String](../../sql-reference/data-types/string.md)) — Index type. -- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression used to calculate the index. -- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of granules in the block. +- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. **Example** - ```sql SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical; ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index d487a187945..619e9a5093e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -34,7 +34,7 @@ Input table: Query: ``` sql -SELECT medianDeterministic(val, 1) FROM t +SELECT medianDeterministic(val, 1) FROM t; ``` Result: diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 5f0f400ce43..b3ff26a943d 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -47,6 +47,7 @@ Settings: - [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) - [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) - [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) +- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) Functions: @@ -57,5 +58,3 @@ Functions: - [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality). - [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). - [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). - -[Original article](https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index 86ea55004fd..ad8b091023d 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -9,11 +9,8 @@ toc_title: Map(key, value) **Parameters** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). -- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). - -!!! warning "Warning" - Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`. +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [Array](../../sql-reference/data-types/array.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), or [FixedString](../../sql-reference/data-types/fixedstring.md). To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index c06711b3cd2..29de9ee4b70 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -211,7 +211,7 @@ SELECT nullIf(1, 2); ## assumeNotNull {#assumenotnull} -Results in a value of type [Nullable](../../sql-reference/data-types/nullable.md) for a non- `Nullable`, if the value is not `NULL`. +Results in an equivalent non-`Nullable` value for a [Nullable](../../sql-reference/data-types/nullable.md) type. In case the original value is `NULL` the result is undetermined. See also `ifNull` and `coalesce` functions. ``` sql assumeNotNull(x) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 20dc7b29902..6c03f55cebe 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -195,6 +195,41 @@ Result: └────────────────────┘ ``` +## h3ToGeo {#h3togeo} + +Returns `(lon, lat)` that corresponds to the provided H3 index. + +**Syntax** + +``` sql +h3ToGeo(h3Index) +``` + +**Arguments** + +- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Query: + +``` sql +SELECT h3ToGeo(644325524701193974) coordinates; +``` + +Result: + +``` text +┌─coordinates───────────────────────────┠+│ (37.79506616830252,55.71290243145668) │ +└───────────────────────────────────────┘ +``` ## h3kRing {#h3kring} Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order. diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index e731180c393..596ad17f07d 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -306,3 +306,49 @@ Result: └───────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` +## toJSONString {#tojsonstring} + +Serializes a value to its JSON representation. Various data types and nested structures are supported. +64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior. +Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them. +When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name. + +**Syntax** + +``` sql +toJSONString(value) +``` + +**Arguments** + +- `value` — Value to serialize. Value may be of any data type. + +**Returned value** + +- JSON representation of the value. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +The first example shows serialization of a [Map](../../sql-reference/data-types/map.md). +The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md). + +Query: + +``` sql +SELECT toJSONString(map('key1', 1, 'key2', 2)); +SELECT toJSONString(tuple(1.25, NULL, NaN, +inf, -inf, [])) SETTINGS output_format_json_quote_denormals = 1; +``` + +Result: + +``` text +{"key1":1,"key2":2} +[1.25,null,"nan","inf","-inf",[]] +``` + +**See Also** + +- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) +- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 661469e6901..17bb75b9a2f 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -465,27 +465,29 @@ Result: ## CAST(x, T) {#type_conversion_function-cast} -Converts input value `x` to the `T` data type. Unlike to `reinterpret` function, type conversion is performed in a natural way. - -The syntax `CAST(x AS t)` is also supported. - -!!! note "Note" - If value `x` does not fit the bounds of type `T`, the function overflows. For example, `CAST(-1, 'UInt8')` returns `255`. +Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised. +Several syntax variants are supported. **Syntax** ``` sql CAST(x, T) +CAST(x AS t) +x::t ``` **Arguments** -- `x` — Any type. -- `T` — Destination type. [String](../../sql-reference/data-types/string.md). +- `x` — A value to convert. May be of any type. +- `T` — The name of the target data type. [String](../../sql-reference/data-types/string.md). +- `t` — The target data type. **Returned value** -- Destination type value. +- Converted value. + +!!! note "Note" + If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. **Examples** @@ -494,16 +496,16 @@ Query: ```sql SELECT CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint, - CAST(toInt8(1), 'Float32') AS cast_int_to_float, - CAST('1', 'UInt32') AS cast_string_to_int; + CAST(1.5 AS Decimal(3,2)) AS cast_float_to_decimal, + '1'::Int32 AS cast_string_to_int; ``` Result: ``` -┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┠-│ 255 │ 1 │ 1 │ -└──────────────────┴───────────────────┴────────────────────┘ +┌─cast_int_to_uint─┬─cast_float_to_decimal─┬─cast_string_to_int─┠+│ 255 │ 1.50 │ 1 │ +└──────────────────┴───────────────────────┴────────────────────┘ ``` Query: diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 70ac9acd186..8944ca26947 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -189,7 +189,7 @@ CREATE TABLE codec_example dt Date CODEC(ZSTD), ts DateTime CODEC(LZ4HC), float_value Float32 CODEC(NONE), - double_value Float64 CODEC(LZ4HC(9)) + double_value Float64 CODEC(LZ4HC(9)), value Float32 CODEC(Delta, ZSTD) ) ENGINE = diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index c90b4bf0eaa..7805af5e51e 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -36,14 +36,23 @@ Additional join types available in ClickHouse: - `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. - `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. -## Setting {#join-settings} +## Settings {#join-settings} -!!! note "Note" - The default join type can be overriden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting. +The default join type can be overriden using [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting. - Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. +The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. -### ASOF JOIN Usage {#asof-join-usage} +**See also** + +- [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) +- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) +- [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) +- [partial_merge_join_optimizations](../../../operations/settings/settings.md#partial_merge_join_optimizations) +- [partial_merge_join_rows_in_right_blocks](../../../operations/settings/settings.md#partial_merge_join_rows_in_right_blocks) +- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge) +- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) + +## ASOF JOIN Usage {#asof-join-usage} `ASOF JOIN` is useful when you need to join records that have no exact match. @@ -93,7 +102,7 @@ For example, consider the following tables: !!! note "Note" `ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. -## Distributed Join {#global-join} +## Distributed JOIN {#global-join} There are two ways to execute join involving distributed tables: @@ -102,6 +111,42 @@ There are two ways to execute join involving distributed tables: Be careful when using `GLOBAL`. For more information, see the [Distributed subqueries](../../../sql-reference/operators/in.md#select-distributed-subqueries) section. +## Implicit Type Conversion {#implicit-type-conversion} + +`INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, and `FULL JOIN` queries support the implicit type conversion for "join keys". However the query can not be executed, if join keys from the left and the right tables cannot be converted to a single type (for example, there is no data type that can hold all values from both `UInt64` and `Int64`, or `String` and `Int32`). + +**Example** + +Consider the table `t_1`: +```text +┌─a─┬─b─┬─toTypeName(a)─┬─toTypeName(b)─┠+│ 1 │ 1 │ UInt16 │ UInt8 │ +│ 2 │ 2 │ UInt16 │ UInt8 │ +└───┴───┴───────────────┴───────────────┘ +``` +and the table `t_2`: +```text +┌──a─┬────b─┬─toTypeName(a)─┬─toTypeName(b)───┠+│ -1 │ 1 │ Int16 │ Nullable(Int64) │ +│ 1 │ -1 │ Int16 │ Nullable(Int64) │ +│ 1 │ 1 │ Int16 │ Nullable(Int64) │ +└────┴──────┴───────────────┴─────────────────┘ +``` + +The query +```sql +SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b); +``` +returns the set: +```text +┌──a─┬────b─┬─toTypeName(a)─┬─toTypeName(b)───┠+│ 1 │ 1 │ Int32 │ Nullable(Int64) │ +│ 2 │ 2 │ Int32 │ Nullable(Int64) │ +│ -1 │ 1 │ Int32 │ Nullable(Int64) │ +│ 1 │ -1 │ Int32 │ Nullable(Int64) │ +└────┴──────┴───────────────┴─────────────────┘ +``` + ## Usage Recommendations {#usage-recommendations} ### Processing of Empty or NULL Cells {#processing-of-empty-or-null-cells} @@ -139,9 +184,9 @@ If you need a `JOIN` for joining with dimension tables (these are relatively sma ### Memory Limitations {#memory-limitations} -By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the `` and creates a hash table for it in RAM. After some threshold of memory consumption, ClickHouse falls back to merge join algorithm. +By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the right_table and creates a hash table for it in RAM. If `join_algorithm = 'auto'` is enabled, then after some threshold of memory consumption, ClickHouse falls back to [merge](https://en.wikipedia.org/wiki/Sort-merge_join) join algorithm. For `JOIN` algorithms description see the [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) setting. -If you need to restrict join operation memory consumption use the following settings: +If you need to restrict `JOIN` operation memory consumption use the following settings: - [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — Limits number of rows in the hash table. - [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — Limits size of the hash table. diff --git a/docs/en/whats-new/security-changelog.md b/docs/en/whats-new/security-changelog.md index bebc9a6035f..97cad9965fd 100644 --- a/docs/en/whats-new/security-changelog.md +++ b/docs/en/whats-new/security-changelog.md @@ -3,6 +3,16 @@ toc_priority: 76 toc_title: Security Changelog --- +## Fixed in ClickHouse 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2021-04-12} + +### CVE-2021-25263 {#cve-2021-25263} + +An attacker that has CREATE DICTIONARY privilege, can read arbitary file outside permitted directory. + +Fix has been pushed to versions 20.8.18.32-lts, 21.1.9.41-stable, 21.2.9.41-stable, 21.3.6.55-lts, 21.4.3.21-stable and later. + +Credits: [Vyacheslav Egoshin](https://twitter.com/vegoshin) + ## Fixed in ClickHouse Release 19.14.3.3, 2019-09-10 {#fixed-in-clickhouse-release-19-14-3-3-2019-09-10} ### CVE-2019-15024 {#cve-2019-15024} diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md deleted file mode 120000 index 8e172b919d8..00000000000 --- a/docs/ru/development/build-osx.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/build-osx.md \ No newline at end of file diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md new file mode 100644 index 00000000000..49da9f2b359 --- /dev/null +++ b/docs/ru/development/build-osx.md @@ -0,0 +1,125 @@ +--- +toc_priority: 65 +toc_title: Сборка на Mac OS X +--- +# Как Ñобрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x} + +Сборка должна запуÑкатьÑÑ Ñ x86_64 (Intel) на macOS верÑии 10.15 (Catalina) и выше в поÑледней верÑии компилÑтора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компилÑторах. + +## УÑтановка Homebrew {#install-homebrew} + +``` bash +$ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +``` + +## УÑтановка Xcode и инÑтрументов командной Ñтроки {#install-xcode-and-command-line-tools} + + 1. УÑтановите из App Store поÑледнюю верÑию [Xcode](https://apps.apple.com/am/app/xcode/id497799835?mt=12). + + 2. ЗапуÑтите ее, чтобы принÑÑ‚ÑŒ лицензионное Ñоглашение. Ðеобходимые компоненты уÑтановÑÑ‚ÑÑ Ð°Ð²Ñ‚Ð¾Ð¼Ð°Ñ‚Ð¸Ñ‡ÐµÑки. + + 3. Затем убедитеÑÑŒ, что в ÑиÑтеме выбрана поÑледнÑÑ Ð²ÐµÑ€ÑÐ¸Ñ Ð¸Ð½Ñтрументов командной Ñтроки: + + ``` bash + $ sudo rm -rf /Library/Developer/CommandLineTools + $ sudo xcode-select --install + ``` + + 4. ПерезагрузитеÑÑŒ. + +## УÑтановка компилÑторов, инÑтрументов и библиотек {#install-required-compilers-tools-and-libraries} + + ``` bash + $ brew update + $ brew install cmake ninja libtool gettext llvm gcc + ``` + +## ПроÑмотр иÑходников ClickHouse {#checkout-clickhouse-sources} + + ``` bash + $ git clone --recursive git@github.com:ClickHouse/ClickHouse.git # or https://github.com/ClickHouse/ClickHouse.git + ``` + +## Сборка ClickHouse {#build-clickhouse} + + Чтобы запуÑтить Ñборку в компилÑторе Xcode's native AppleClang: + + ``` bash + $ cd ClickHouse + $ rm -rf build + $ mkdir build + $ cd build + $ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake --build . --config RelWithDebInfo + $ cd .. + ``` + +Чтобы запуÑтить Ñборку в компилÑторе Homebrew's vanilla Clang: + + ``` bash + $ cd ClickHouse + $ rm -rf build + $ mkdir build + $ cd build + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake --build . --config RelWithDebInfo + $ cd .. + ``` + +Чтобы Ñобрать Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ компилÑтора Homebrew's vanilla GCC: + + ``` bash + $ cd ClickHouse + $ rm -rf build + $ mkdir build + $ cd build + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake --build . --config RelWithDebInfo + $ cd .. + ``` + +## ÐŸÑ€ÐµÐ´ÑƒÐ¿Ñ€ÐµÐ¶Ð´ÐµÐ½Ð¸Ñ {#caveats} + +ЕÑли будете запуÑкать `clickhouse-server`, убедитеÑÑŒ, что увеличили ÑиÑтемную переменную `maxfiles`. + +!!! info "Note" + Вам понадобитÑÑ ÐºÐ¾Ð¼Ð°Ð½Ð´Ð° `sudo`. + +1. Создайте файл `/Library/LaunchDaemons/limit.maxfiles.plist` и помеÑтите в него Ñледующее: + + ``` xml + + + + + Label + limit.maxfiles + ProgramArguments + + launchctl + limit + maxfiles + 524288 + 524288 + + RunAtLoad + + ServiceIPC + + + + ``` + +2. Выполните команду: + + ``` bash + $ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist + ``` + +3. ПерезагрузитеÑÑŒ. + +4. Чтобы проверить, как Ñто работает, выполните команду `ulimit -n`. + +[Original article](https://clickhouse.tech/docs/en/development/build_osx/) diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 463d38a44fb..2a49a7015a9 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -128,7 +128,7 @@ Ninja - ÑиÑтема запуÑка Ñборочных задач. /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" brew install cmake ninja -Проверьте верÑию CMake: `cmake --version`. ЕÑли верÑÐ¸Ñ Ð¼ÐµÐ½ÑŒÑˆÐµ 3.3, то уÑтановите новую верÑию Ñ Ñайта https://cmake.org/download/ +Проверьте верÑию CMake: `cmake --version`. ЕÑли верÑÐ¸Ñ Ð¼ÐµÐ½ÑŒÑˆÐµ 3.12, то уÑтановите новую верÑию Ñ Ñайта https://cmake.org/download/ ## ÐеобÑзательные внешние библиотеки {#neobiazatelnye-vneshnie-biblioteki} diff --git a/docs/ru/engines/database-engines/index.md b/docs/ru/engines/database-engines/index.md index d4fad8f43a9..119a0f53ff7 100644 --- a/docs/ru/engines/database-engines/index.md +++ b/docs/ru/engines/database-engines/index.md @@ -20,3 +20,5 @@ toc_title: "Введение" - [PostgreSQL](../../engines/database-engines/postgresql.md) +- [Replicated](../../engines/database-engines/replicated.md) + diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md index 2067dfecca0..b62670bc3c9 100644 --- a/docs/ru/engines/database-engines/materialize-mysql.md +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -1,3 +1,4 @@ + --- toc_priority: 29 toc_title: MaterializeMySQL @@ -49,6 +50,7 @@ ENGINE = MaterializeMySQL('host:port', ['database' | database], 'user', 'passwor | DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | +| ENUM | [Enum](../../sql-reference/data-types/enum.md) | | STRING | [String](../../sql-reference/data-types/string.md) | | VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | @@ -79,7 +81,9 @@ DDL-запроÑÑ‹ в MySQL конвертируютÑÑ Ð² ÑоответÑтв - ЕÑли в запроÑе `SELECT` напрÑмую не указан Ñтолбец `_version`, то иÑпользуетÑÑ Ð¼Ð¾Ð´Ð¸Ñ„Ð¸ÐºÐ°Ñ‚Ð¾Ñ€ [FINAL](../../sql-reference/statements/select/from.md#select-from-final). Таким образом, выбираютÑÑ Ñ‚Ð¾Ð»ÑŒÐºÐ¾ Ñтроки Ñ `MAX(_version)`. -- ЕÑли в запроÑе `SELECT` напрÑмую не указан Ñтолбец `_sign`, то по умолчанию иÑпользуетÑÑ `WHERE _sign=1`. Таким образом, удаленные Ñтроки не включаютÑÑ Ð² результирующий набор. +- ЕÑли в запроÑе `SELECT` напрÑмую не указан Ñтолбец `_sign`, то по умолчанию иÑпользуетÑÑ `WHERE _sign=1`. Таким образом, удаленные Ñтроки не включаютÑÑ Ð² результирующий набор. + +- Результат включает комментарии к Ñтолбцам, еÑли они ÑущеÑтвуют в таблицах базы данных MySQL. ### ÐšÐ¾Ð½Ð²ÐµÑ€Ñ‚Ð°Ñ†Ð¸Ñ Ð¸Ð½Ð´ÐµÐºÑов {#index-conversion} diff --git a/docs/ru/engines/database-engines/replicated.md b/docs/ru/engines/database-engines/replicated.md new file mode 100644 index 00000000000..62be07f617c --- /dev/null +++ b/docs/ru/engines/database-engines/replicated.md @@ -0,0 +1,119 @@ + +# [ÑкÑпериментальный] Replicated {#replicated} + +Движок оÑнован на движке [Atomic](../../engines/database-engines/atomic.md). Он поддерживает репликацию метаданных через журнал DDL, запиÑываемый в ZooKeeper и выполнÑемый на вÑех репликах Ð´Ð»Ñ Ð´Ð°Ð½Ð½Ð¾Ð¹ базы данных. + +Ðа одном Ñервере ClickHouse может одновременно работать и обновлÑÑ‚ÑŒÑÑ Ð½ÐµÑколько реплицированных баз данных. Ðо не может ÑущеÑтвовать неÑкольких реплик одной и той же реплицированной базы данных. + +## Создание базы данных {#creating-a-database} +``` sql + CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] +``` + +**Параметры движка** + +- `zoo_path` — путь в ZooKeeper. Один и тот же путь ZooKeeper ÑоответÑтвует одной и той же базе данных. +- `shard_name` — Ð˜Ð¼Ñ ÑˆÐ°Ñ€Ð´Ð°. Реплики базы данных группируютÑÑ Ð² шарды по имени. +- `replica_name` — Ð˜Ð¼Ñ Ñ€ÐµÐ¿Ð»Ð¸ÐºÐ¸. Имена реплик должны быть разными Ð´Ð»Ñ Ð²Ñех реплик одного и того же шарда. + +!!! note "Предупреждение" + Ð”Ð»Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ† [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) еÑли аргументы не заданы, то иÑпользуютÑÑ Ð°Ñ€Ð³ÑƒÐ¼ÐµÐ½Ñ‚Ñ‹ по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в Ñерверных наÑтройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). ÐœÐ°ÐºÑ€Ð¾Ñ `{uuid}` раÑкрываетÑÑ Ð² `UUID` таблицы, `{shard}` и `{replica}` — в Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð¸Ð· конфига Ñервера. Ð’ будущем поÑвитÑÑ Ð²Ð¾Ð·Ð¼Ð¾Ð¶Ð½Ð¾ÑÑ‚ÑŒ иÑпользовать Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ `shard_name` и `replica_name` аргументов движка базы данных `Replicated`. + +## ОÑобенноÑти и рекомендации {#specifics-and-recommendations} + +DDL-запроÑÑ‹ Ñ Ð±Ð°Ð·Ð¾Ð¹ данных `Replicated` работают похожим образом на [ON CLUSTER](../../sql-reference/distributed-ddl.md) запроÑÑ‹, но Ñ Ð½ÐµÐ±Ð¾Ð»ÑŒÑˆÐ¸Ð¼Ð¸ отличиÑми. + +Сначала DDL-Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð¿Ñ‹Ñ‚Ð°ÐµÑ‚ÑÑ Ð²Ñ‹Ð¿Ð¾Ð»Ð½Ð¸Ñ‚ÑŒÑÑ Ð½Ð° инициаторе (том хоÑте, который изначально получил Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð¾Ñ‚ пользователÑ). ЕÑли Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ выполнилÑÑ, то пользователь Ñразу получает ошибку, другие хоÑÑ‚Ñ‹ не пытаютÑÑ ÐµÐ³Ð¾ выполнить. ЕÑли Ð·Ð°Ð¿Ñ€Ð¾Ñ ÑƒÑпешно выполнилÑÑ Ð½Ð° инициаторе, то вÑе оÑтальные хоÑÑ‚Ñ‹ будут автоматичеÑки делать попытки выполнить его. +Инициатор попытаетÑÑ Ð´Ð¾Ð¶Ð´Ð°Ñ‚ÑŒÑÑ Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñа на других хоÑтах (не дольше [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout)) и вернёт таблицу Ñо ÑтатуÑами Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñа на каждом хоÑте. + +Поведение в Ñлучае ошибок регулируетÑÑ Ð½Ð°Ñтройкой [distributed_ddl_output_mode](../../operations/settings/settings.md#distributed_ddl_output_mode), Ð´Ð»Ñ `Replicated` лучше выÑтавлÑÑ‚ÑŒ её в `null_status_on_timeout` — Ñ‚.е. еÑли какие-то хоÑÑ‚Ñ‹ не уÑпели выполнить Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð·Ð° [distributed_ddl_task_timeout](../../operations/settings/settings.md#distributed_ddl_task_timeout), то вмеÑто иÑÐºÐ»ÑŽÑ‡ÐµÐ½Ð¸Ñ Ð´Ð»Ñ Ð½Ð¸Ñ… будет показан ÑÑ‚Ð°Ñ‚ÑƒÑ `NULL` в таблице. + +Ð’ ÑиÑтемной таблице [system.clusters](../../operations/system-tables/clusters.md) еÑÑ‚ÑŒ клаÑтер Ñ Ð¸Ð¼ÐµÐ½ÐµÐ¼, как у реплицируемой базы, который ÑоÑтоит из вÑех реплик базы. Этот клаÑтер обновлÑетÑÑ Ð°Ð²Ñ‚Ð¾Ð¼Ð°Ñ‚Ð¸Ñ‡ÐµÑки при Ñоздании/удалении реплик, и его можно иÑпользовать Ð´Ð»Ñ [Distributed](../../engines/table-engines/special/distributed.md#distributed) таблиц. + + При Ñоздании новой реплики базы, Ñта реплика Ñама Ñоздаёт таблицы. ЕÑли реплика долго была недоÑтупна и отÑтала от лога репликации — она ÑверÑет Ñвои локальные метаданные Ñ Ð°ÐºÑ‚ÑƒÐ°Ð»ÑŒÐ½Ñ‹Ð¼Ð¸ метаданными в ZooKeeper, перекладывает лишние таблицы Ñ Ð´Ð°Ð½Ð½Ñ‹Ð¼Ð¸ в отдельную нереплицируемую базу (чтобы Ñлучайно не удалить что-нибудь лишнее), Ñоздаёт недоÑтающие таблицы, обновлÑет имена таблиц, еÑли были переименованиÑ. Данные реплицируютÑÑ Ð½Ð° уровне `ReplicatedMergeTree`, Ñ‚.е. еÑли таблица не реплицируемаÑ, то данные реплицироватьÑÑ Ð½Ðµ будут (база отвечает только за метаданные). + +## Примеры иÑÐ¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ð½Ð¸Ñ {#usage-example} + +Создадим реплицируемую базу на трех хоÑтах: + +``` sql +node1 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','replica1'); +node2 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','shard1','other_replica'); +node3 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','{replica}'); +``` + +Выполним DDL-Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ð° одном из хоÑтов: + +``` sql +CREATE TABLE r.rmt (n UInt64) ENGINE=ReplicatedMergeTree ORDER BY n; +``` + +Ð—Ð°Ð¿Ñ€Ð¾Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½Ð¸Ñ‚ÑÑ Ð½Ð° вÑех оÑтальных хоÑтах: + +``` text +┌─────hosts────────────┬──status─┬─error─┬─num_hosts_remaining─┬─num_hosts_active─┠+│ shard1|replica1 │ 0 │ │ 2 │ 0 │ +│ shard1|other_replica │ 0 │ │ 1 │ 0 │ +│ other_shard|r1 │ 0 │ │ 0 │ 0 │ +└──────────────────────┴─────────┴───────┴─────────────────────┴──────────────────┘ +``` + +КлаÑтер в ÑиÑтемной таблице `system.clusters`: + +``` sql +SELECT cluster, shard_num, replica_num, host_name, host_address, port, is_local +FROM system.clusters WHERE cluster='r'; +``` + +``` text +┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┠+│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ +│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │ +│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │ +└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘ +``` + +Создадим раÑпределенную таблицу и вÑтавим в нее данные: + +``` sql +node2 :) CREATE TABLE r.d (n UInt64) ENGINE=Distributed('r','r','rmt', n % 2); +node3 :) INSERT INTO r SELECT * FROM numbers(10); +node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host; +``` + +``` text +┌─hosts─┬─groupArray(n)─┠+│ node1 │ [1,3,5,7,9] │ +│ node2 │ [0,2,4,6,8] │ +└───────┴───────────────┘ +``` + +Добавление реплики: + +``` sql +node4 :) CREATE DATABASE r ENGINE=Replicated('some/path/r','other_shard','r2'); +``` + +ÐÐ¾Ð²Ð°Ñ Ñ€ÐµÐ¿Ð»Ð¸ÐºÐ° автоматичеÑки ÑоздаÑÑ‚ вÑе таблицы, которые еÑÑ‚ÑŒ в базе, а Ñтарые реплики перезагрузÑÑ‚ из ZooKeeper-а конфигурацию клаÑтера: + +``` text +┌─cluster─┬─shard_num─┬─replica_num─┬─host_name─┬─host_address─┬─port─┬─is_local─┠+│ r │ 1 │ 1 │ node3 │ 127.0.0.1 │ 9002 │ 0 │ +│ r │ 1 │ 2 │ node4 │ 127.0.0.1 │ 9003 │ 0 │ +│ r │ 2 │ 1 │ node2 │ 127.0.0.1 │ 9001 │ 0 │ +│ r │ 2 │ 2 │ node1 │ 127.0.0.1 │ 9000 │ 1 │ +└─────────┴───────────┴─────────────┴───────────┴──────────────┴──────┴──────────┘ +``` + +РаÑÐ¿Ñ€ÐµÐ´ÐµÐ»ÐµÐ½Ð½Ð°Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ†Ð° также получит данные от нового хоÑта: + +```sql +node2 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY host; +``` + +```text +┌─hosts─┬─groupArray(n)─┠+│ node2 │ [1,3,5,7,9] │ +│ node4 │ [0,2,4,6,8] │ +└───────┴───────────────┘ +``` \ No newline at end of file diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 66a94bcfbca..b6e7c3a2793 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -100,9 +100,9 @@ sudo ./clickhouse install Ð”Ð»Ñ Ð´Ñ€ÑƒÐ³Ð¸Ñ… операционных ÑиÑтем и архитектуры AArch64 Ñборки ClickHouse предоÑтавлÑÑŽÑ‚ÑÑ Ð² виде кроÑÑ-компилированного бинарного файла из поÑледнего коммита ветки `master` (Ñ Ð·Ð°Ð´ÐµÑ€Ð¶ÐºÐ¾Ð¹ в неÑколько чаÑов). -- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` -- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` -- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` +- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse` +- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` +- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` ПоÑле ÑÐºÐ°Ñ‡Ð¸Ð²Ð°Ð½Ð¸Ñ Ð¼Ð¾Ð¶Ð½Ð¾ воÑпользоватьÑÑ `clickhouse client` Ð´Ð»Ñ Ð¿Ð¾Ð´ÐºÐ»ÑŽÑ‡ÐµÐ½Ð¸Ñ Ðº Ñерверу или `clickhouse local` Ð´Ð»Ñ Ð¾Ð±Ñ€Ð°Ð±Ð¾Ñ‚ÐºÐ¸ локальных данных. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 7780a75a706..563a137ac17 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1165,12 +1165,14 @@ SELECT * FROM topic1_stream; | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `DOUBLE` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | | `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` | -| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | +| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | -МаÑÑивы могут быть вложенными и иметь в качеÑтве аргумента значение типа `Nullable`. +МаÑÑивы могут быть вложенными и иметь в качеÑтве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными. ClickHouse поддерживает наÑтраиваемую точноÑÑ‚ÑŒ Ð´Ð»Ñ Ñ„Ð¾Ñ€Ð¼Ð°Ñ‚Ð° `Decimal`. При выполнении запроÑа `INSERT` ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`. @@ -1218,12 +1220,17 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ | `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` | | `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` | | `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` | -| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` | +| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | +| `DECIMAL256` | [Decimal256](../sql-reference/data-types/decimal.md)| `DECIMAL256` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | -МаÑÑивы могут быть вложенными и иметь в качеÑтве аргумента значение типа `Nullable`. +МаÑÑивы могут быть вложенными и иметь в качеÑтве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными. + +Тип `DICTIONARY` поддерживаетÑÑ Ð´Ð»Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов `INSERT`. Ð”Ð»Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов `SELECT` еÑÑ‚ÑŒ наÑтройка [output_format_arrow_low_cardinality_as_dictionary](../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary), ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ Ð¿Ð¾Ð·Ð²Ð¾Ð»Ñет выводить тип [LowCardinality](../sql-reference/data-types/lowcardinality.md) как `DICTIONARY`. ClickHouse поддерживает наÑтраиваемую точноÑÑ‚ÑŒ Ð´Ð»Ñ Ñ„Ð¾Ñ€Ð¼Ð°Ñ‚Ð° `Decimal`. При выполнении запроÑа `INSERT` ClickHouse обрабатывает тип данных Arrow `DECIMAL` как `Decimal128`. @@ -1276,8 +1283,10 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam | `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` | | `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` | | `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](../sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](../sql-reference/data-types/map.md) | `MAP` | -МаÑÑивы могут быть вложенными и иметь в качеÑтве аргумента значение типа `Nullable`. +МаÑÑивы могут быть вложенными и иметь в качеÑтве аргумента значение типа `Nullable`. Типы `Tuple` и `Map` также могут быть вложенными. ClickHouse поддерживает наÑтраиваемую точноÑÑ‚ÑŒ Ð´Ð»Ñ Ñ„Ð¾Ñ€Ð¼Ð°Ñ‚Ð° `Decimal`. При выполнении запроÑа `INSERT` ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 625453c94c6..e5335a64078 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -490,6 +490,23 @@ ClickHouse может парÑить только базовый формат `Y Значение по умолчанию: `ALL`. +## join_algorithm {#settings-join_algorithm} + +ОпределÑет алгоритм Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñа [JOIN](../../sql-reference/statements/select/join.md). + +Возможные значениÑ: + +- `hash` — иÑпользуетÑÑ [алгоритм ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñ…ÐµÑˆÐ¸Ñ€Ð¾Ð²Ð°Ð½Ð¸ÐµÐ¼](https://ru.wikipedia.org/wiki/Ðлгоритм_ÑоединениÑ_хешированием). +- `partial_merge` — иÑпользуетÑÑ [алгоритм ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ ÑлиÑнием Ñортированных ÑпиÑков](https://ru.wikipedia.org/wiki/Ðлгоритм_ÑоединениÑ_ÑлиÑнием_Ñортированных_ÑпиÑков). +- `prefer_partial_merge` — иÑпользуетÑÑ Ð°Ð»Ð³Ð¾Ñ€Ð¸Ñ‚Ð¼ ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ ÑлиÑнием Ñортированных ÑпиÑков, когда Ñто возможно. +- `auto` — Ñервер ClickHouse пытаетÑÑ Ð½Ð° лету заменить алгоритм `hash` на `merge`, чтобы избежать Ð¿ÐµÑ€ÐµÐ¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð¿Ð°Ð¼Ñти. + +Значение по умолчанию: `hash`. + +При иÑпользовании алгоритма `hash` Ð¿Ñ€Ð°Ð²Ð°Ñ Ñ‡Ð°ÑÑ‚ÑŒ `JOIN` загружаетÑÑ Ð² оперативную памÑÑ‚ÑŒ. + +При иÑпользовании алгоритма `partial_merge` Ñервер Ñортирует данные и ÑбраÑывает их на диÑк. Работа алгоритма `merge` в ClickHouse немного отличаетÑÑ Ð¾Ñ‚ клаÑÑичеÑкой реализации. Сначала ClickHouse Ñортирует правую таблицу по блокам на оÑнове [ключей ÑоединениÑ](../../sql-reference/statements/select/join.md#select-join) и Ð´Ð»Ñ Ð¾Ñ‚Ñортированных блоков Ñтроит индекÑÑ‹ min-max. Затем он Ñортирует куÑки левой таблицы на оÑнове ключей ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ð¸ объединÑет их Ñ Ð¿Ñ€Ð°Ð²Ð¾Ð¹ таблицей операцией `JOIN`. Созданные min-max индекÑÑ‹ иÑпользуютÑÑ Ð´Ð»Ñ Ð¿Ñ€Ð¾Ð¿ÑƒÑка тех блоков из правой таблицы, которые не учаÑтвуют в данной операции `JOIN`. + ## join_any_take_last_row {#settings-join_any_take_last_row} ИзменÑет поведение операций, выполнÑемых Ñо ÑтрогоÑтью `ANY`. @@ -1204,8 +1221,15 @@ load_balancing = round_robin Работает Ð´Ð»Ñ Ñ„Ð¾Ñ€Ð¼Ð°Ñ‚Ð¾Ð² JSONEachRow и TSKV. ## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers} +УправлÑет кавычками при выводе 64-битных или более [целых чиÑел](../../sql-reference/data-types/int-uint.md) (например, `UInt64` или `Int128`) в формате [JSON](../../interfaces/formats.md#json). +По умолчанию такие чиÑла заключаютÑÑ Ð² кавычки. Это поведение ÑоответÑтвует большинÑтву реализаций JavaScript. -ЕÑли значение иÑтинно, то при иÑпользовании JSON\* форматов UInt64 и Int64 чиÑла выводÑÑ‚ÑÑ Ð² кавычках (из Ñоображений ÑовмеÑтимоÑти Ñ Ð±Ð¾Ð»ÑŒÑˆÐ¸Ð½Ñтвом реализаций JavaScript), иначе - без кавычек. +Возможные значениÑ: + +- 0 — чиÑла выводÑÑ‚ÑÑ Ð±ÐµÐ· кавычек. +- 1 — чиÑла выводÑÑ‚ÑÑ Ð² кавычках. + +Значение по умолчанию: 1. ## output_format_json_quote_denormals {#settings-output_format_json_quote_denormals} @@ -2979,6 +3003,53 @@ SELECT FROM fuse_tbl ``` +## allow_experimental_database_replicated {#allow_experimental_database_replicated} + +ПозволÑет Ñоздавать базы данных Ñ Ð´Ð²Ð¸Ð¶ÐºÐ¾Ð¼ [Replicated](../../engines/database-engines/replicated.md). + +Возможные значениÑ: + +- 0 — Disabled. +- 1 — Enabled. + +Значение по умолчанию: `0`. + +## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec} + +УÑтанавливает, как долго начальный DDL-Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð´Ð¾Ð»Ð¶ÐµÐ½ ждать, пока Ñ€ÐµÐ¿Ð»Ð¸Ñ†Ð¸Ñ€Ð¾Ð²Ð°Ð½Ð½Ð°Ñ Ð±Ð°Ð·Ð° данных прецеÑÑирует предыдущие запиÑи очереди DDL в Ñекундах. + +Возможные значениÑ: + +- Положительное целое чиÑло. +- 0 — Ðе ограничено. + +Значение по умолчанию: `300`. + +## distributed_ddl_task_timeout {#distributed_ddl_task_timeout} + +УÑтанавливает тайм-аут Ð´Ð»Ñ Ð¾Ñ‚Ð²ÐµÑ‚Ð¾Ð² на DDL-запроÑÑ‹ от вÑех хоÑтов в клаÑтере. ЕÑли DDL-Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ был выполнен на вÑех хоÑтах, ответ будет Ñодержать ошибку тайм-аута, и Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð±ÑƒÐ´ÐµÑ‚ выполнен в аÑинхронном режиме. + +Возможные значениÑ: + +- Положительное целое чиÑло. +- 0 — ÐÑинхронный режим. +- Отрицательное чиÑло — беÑконечный тайм-аут. + +Значение по умолчанию: `180`. + +## distributed_ddl_output_mode {#distributed_ddl_output_mode} + +Задает формат результата раÑпределенного DDL-запроÑа. + +Возможные значениÑ: + +- `throw` — возвращает набор результатов Ñо ÑтатуÑом Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов Ð´Ð»Ñ Ð²Ñех хоÑтов, где завершен запроÑ. ЕÑли Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ выполнилÑÑ Ð½Ð° некоторых хоÑтах, то будет выброшено иÑключение. ЕÑли Ð·Ð°Ð¿Ñ€Ð¾Ñ ÐµÑ‰Ðµ не закончен на некоторых хоÑтах и таймаут [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) превышен, то выбраÑываетÑÑ Ð¸Ñключение `TIMEOUT_EXCEEDED`. +- `none` — идентично `throw`, но раÑпределенный DDL-Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ возвращает набор результатов. +- `null_status_on_timeout` — возвращает `NULL` в качеÑтве ÑтатуÑа Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð² некоторых Ñтроках набора результатов вмеÑто выбраÑÑ‹Ð²Ð°Ð½Ð¸Ñ `TIMEOUT_EXCEEDED`, еÑли Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ закончен на ÑоответÑтвующих хоÑтах. +- `never_throw` — не выбраÑывает иÑключение и `TIMEOUT_EXCEEDED`, еÑли Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ удалÑÑ Ð½Ð° некоторых хоÑтах. + +Значение по умолчанию: `throw`. + ## flatten_nested {#flatten-nested} УÑтанавливает формат данных у [вложенных](../../sql-reference/data-types/nested-data-structures/nested.md) Ñтолбцов. @@ -3059,3 +3130,14 @@ SETTINGS index_granularity = 8192 │ **ИÑпользование** ЕÑли уÑтановлено значение `0`, то Ñ‚Ð°Ð±Ð»Ð¸Ñ‡Ð½Ð°Ñ Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð½Ðµ делает Nullable Ñтолбцы, а вмеÑто NULL выÑтавлÑет Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð¿Ð¾ умолчанию Ð´Ð»Ñ ÑкалÑрного типа. Это также применимо Ð´Ð»Ñ Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ð¹ NULL внутри маÑÑивов. + +## output_format_arrow_low_cardinality_as_dictionary {#output-format-arrow-low-cardinality-as-dictionary} + +ПозволÑет конвертировать тип [LowCardinality](../../sql-reference/data-types/lowcardinality.md) в тип `DICTIONARY` формата [Arrow](../../interfaces/formats.md#data-format-arrow) Ð´Ð»Ñ Ð·Ð°Ð¿Ñ€Ð¾Ñов `SELECT`. + +Возможные значениÑ: + +- 0 — тип `LowCardinality` не конвертируетÑÑ Ð² тип `DICTIONARY`. +- 1 — тип `LowCardinality` конвертируетÑÑ Ð² тип `DICTIONARY`. + +Значение по умолчанию: `0`. diff --git a/docs/ru/operations/system-tables/data_skipping_indices.md b/docs/ru/operations/system-tables/data_skipping_indices.md new file mode 100644 index 00000000000..39e13ed1d5a --- /dev/null +++ b/docs/ru/operations/system-tables/data_skipping_indices.md @@ -0,0 +1,38 @@ +# system.data_skipping_indices {#system-data-skipping-indices} + +Содержит информацию о ÑущеÑтвующих индекÑах пропуÑка данных во вÑех таблицах. + +Столбцы: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Ð¸Ð¼Ñ Ð±Ð°Ð·Ñ‹ данных. +- `table` ([String](../../sql-reference/data-types/string.md)) — Ð¸Ð¼Ñ Ñ‚Ð°Ð±Ð»Ð¸Ñ†Ñ‹. +- `name` ([String](../../sql-reference/data-types/string.md)) — Ð¸Ð¼Ñ Ð¸Ð½Ð´ÐµÐºÑа. +- `type` ([String](../../sql-reference/data-types/string.md)) — тип индекÑа. +- `expr` ([String](../../sql-reference/data-types/string.md)) — выражение, иÑпользуемое Ð´Ð»Ñ Ð²Ñ‹Ñ‡Ð¸ÑÐ»ÐµÐ½Ð¸Ñ Ð¸Ð½Ð´ÐµÐºÑа. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количеÑтво гранул в блоке данных. + +**Пример** + +```sql +SELECT * FROM system.data_skipping_indices LIMIT 2 FORMAT Vertical; +``` + +```text +Row 1: +────── +database: default +table: user_actions +name: clicks_idx +type: minmax +expr: clicks +granularity: 1 + +Row 2: +────── +database: default +table: users +name: contacts_null_idx +type: minmax +expr: assumeNotNull(contacts_null) +granularity: 1 +``` diff --git a/docs/ru/sql-reference/aggregate-functions/reference/median.md b/docs/ru/sql-reference/aggregate-functions/reference/median.md index 1472809e2e3..0c4b0db12c5 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/median.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/median.md @@ -4,7 +4,6 @@ Функции: - - `median` — Ñиноним Ð´Ð»Ñ [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). - `medianDeterministic` — Ñиноним Ð´Ð»Ñ [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). - `medianExact` — Ñиноним Ð´Ð»Ñ [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). @@ -31,7 +30,7 @@ ЗапроÑ: ``` sql -SELECT medianDeterministic(val, 1) FROM t +SELECT medianDeterministic(val, 1) FROM t; ``` Результат: @@ -41,4 +40,3 @@ SELECT medianDeterministic(val, 1) FROM t │ 1.5 │ └─────────────────────────────┘ ``` - diff --git a/docs/ru/sql-reference/data-types/lowcardinality.md b/docs/ru/sql-reference/data-types/lowcardinality.md index fe9118b1e14..71282835372 100644 --- a/docs/ru/sql-reference/data-types/lowcardinality.md +++ b/docs/ru/sql-reference/data-types/lowcardinality.md @@ -15,7 +15,7 @@ LowCardinality(data_type) **Параметры** -- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md) и чиÑла за иÑключением типа [Decimal](decimal.md). `LowCardinality` неÑффективен Ð´Ð»Ñ Ð½ÐµÐºÐ¾Ñ‚Ð¾Ñ€Ñ‹Ñ… типов данных, Ñм. опиÑание наÑтройки [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types). +- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md) и чиÑла за иÑключением типа [Decimal](decimal.md). `LowCardinality` неÑффективен Ð´Ð»Ñ Ð½ÐµÐºÐ¾Ñ‚Ð¾Ñ€Ñ‹Ñ… типов данных, Ñм. опиÑание наÑтройки [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types). ## ОпиÑание {#lowcardinality-dscr} @@ -23,11 +23,11 @@ LowCardinality(data_type) ЭффективноÑÑ‚ÑŒ иÑÐ¿Ð¾Ð»ÑŒÐ·Ð¾Ð²Ð°Ð½Ð¸Ñ Ñ‚Ð¸Ð¿Ð° данных `LowCarditality` завиÑит от Ñ€Ð°Ð·Ð½Ð¾Ð¾Ð±Ñ€Ð°Ð·Ð¸Ñ Ð´Ð°Ð½Ð½Ñ‹Ñ…. ЕÑли Ñловарь Ñодержит менее 10 000 различных значений, ClickHouse в оÑновном показывает более выÑокую ÑффективноÑÑ‚ÑŒ Ñ‡Ñ‚ÐµÐ½Ð¸Ñ Ð¸ Ñ…Ñ€Ð°Ð½ÐµÐ½Ð¸Ñ Ð´Ð°Ð½Ð½Ñ‹Ñ…. ЕÑли же Ñловарь Ñодержит более 100 000 различных значений, ClickHouse может работать хуже, чем при иÑпользовании обычных типов данных. -При работе Ñо Ñтроками, иÑпользование `LowCardinality` вмеÑто [Enum](enum.md) обеÑпечивает большую гибкоÑÑ‚ÑŒ в иÑпользовании и чаÑто показывает такую же или более выÑокую ÑффективноÑÑ‚ÑŒ. +При работе Ñо Ñтроками иÑпользование `LowCardinality` вмеÑто [Enum](enum.md) обеÑпечивает большую гибкоÑÑ‚ÑŒ в иÑпользовании и чаÑто показывает такую же или более выÑокую ÑффективноÑÑ‚ÑŒ. ## Пример -Создать таблицу Ñо Ñтолбцами типа `LowCardinality`: +Создание таблицы Ñо Ñтолбцами типа `LowCardinality`: ```sql CREATE TABLE lc_t @@ -43,18 +43,18 @@ ORDER BY id ÐаÑтройки: -- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) -- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) -- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) -- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) +- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) +- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) +- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) +- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) +- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) Функции: -- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality) +- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality) ## Смотрите также -- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality). -- [Reducing Clickhouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). - +- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality). +- [Reducing Clickhouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md index a703eb1b0ac..46dcbb8c037 100644 --- a/docs/ru/sql-reference/data-types/map.md +++ b/docs/ru/sql-reference/data-types/map.md @@ -9,11 +9,8 @@ toc_title: Map(key, value) **Параметры** -- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). -- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). - -!!! warning "Предупреждение" - Ð¡ÐµÐ¹Ñ‡Ð°Ñ Ð¸Ñпользование типа данных `Map` ÑвлÑетÑÑ ÑкÑпериментальной возможноÑтью. Чтобы иÑпользовать Ñтот тип данных, включите наÑтройку `allow_experimental_map_type = 1`. +- `key` — ключ. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md) или [FixedString](../../sql-reference/data-types/fixedstring.md). +- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [Array](../../sql-reference/data-types/array.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md) или [FixedString](../../sql-reference/data-types/fixedstring.md). Чтобы получить значение из колонки `a Map('key', 'value')`, иÑпользуйте ÑинтакÑÐ¸Ñ `a['key']`. Ð’ наÑтоÑщее Ð²Ñ€ÐµÐ¼Ñ Ñ‚Ð°ÐºÐ°Ñ Ð¿Ð¾Ð´Ñтановка работает по алгоритму Ñ Ð»Ð¸Ð½ÐµÐ¹Ð½Ð¾Ð¹ ÑложноÑтью. diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 8941ccc1691..b935244e821 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -306,3 +306,51 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello" │ [('d','"hello"'),('f','"world"')] │ └───────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + +## toJSONString {#tojsonstring} + +Сериализует значение в JSON предÑтавление. ПоддерживаютÑÑ Ñ€Ð°Ð·Ð»Ð¸Ñ‡Ð½Ñ‹Ðµ типы данных и вложенные Ñтруктуры. +По умолчанию 64-битные [целые чиÑла](../../sql-reference/data-types/int-uint.md) и более (например, `UInt64` или `Int128`) заключаютÑÑ Ð² кавычки. ÐаÑтройка [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) управлÑет Ñтим поведением. +Специальные Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ `NaN` и `inf` заменÑÑŽÑ‚ÑÑ Ð½Ð° `null`. Чтобы они отображалиÑÑŒ, включите наÑтройку [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals). +Когда ÑериализуетÑÑ Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ðµ [Enum](../../sql-reference/data-types/enum.md), то Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð²Ñ‹Ð²Ð¾Ð´Ð¸Ñ‚ его имÑ. + +**СинтакÑиÑ** + +``` sql +toJSONString(value) +``` + +**Ðргументы** + +- `value` — значение, которое необходимо Ñериализовать. Может быть любого типа. + +**Возвращаемое значение** + +- JSON предÑтавление значениÑ. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Первый пример показывает Ñериализацию [Map](../../sql-reference/data-types/map.md). +Во втором примере еÑÑ‚ÑŒ Ñпециальные значениÑ, обернутые в [Tuple](../../sql-reference/data-types/tuple.md). + +ЗапроÑ: + +``` sql +SELECT toJSONString(map('key1', 1, 'key2', 2)); +SELECT toJSONString(tuple(1.25, NULL, NaN, +inf, -inf, [])) SETTINGS output_format_json_quote_denormals = 1; +``` + +Результат: + +``` text +{"key1":1,"key2":2} +[1.25,null,"nan","inf","-inf",[]] +``` + +**Смотрите также** + +- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) +- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 8707642eb59..059770c7b6b 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -462,27 +462,29 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, ## CAST(x, T) {#type_conversion_function-cast} -Преобразует входное значение `x` в указанный тип данных `T`. Ð’ отличии от функции `reinterpret` иÑпользует внешнее предÑтавление Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ `x`. - -ПоддерживаетÑÑ Ñ‚Ð°ÐºÐ¶Ðµ ÑинтакÑÐ¸Ñ `CAST(x AS t)`. - -!!! warning "Предупреждение" - ЕÑли значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Ðапример, `CAST(-1, 'UInt8')` возвращает 255. +Преобразует входное значение к указанному типу данных. Ð’ отличие от функции [reinterpret](#type_conversion_function-reinterpret) `CAST` пытаетÑÑ Ð¿Ñ€ÐµÐ´Ñтавить то же Ñамое значение в новом типе данных. ЕÑли преобразование невозможно, то возникает иÑключение. +ПоддерживаетÑÑ Ð½ÐµÑколько вариантов ÑинтакÑиÑа. **СинтакÑиÑ** ``` sql CAST(x, T) +CAST(x AS t) +x::t ``` **Ðргументы** -- `x` — любой тип данных. -- `T` — конечный тип данных. [String](../../sql-reference/data-types/string.md). +- `x` — значение, которое нужно преобразовать. Может быть любого типа. +- `T` — Ð¸Ð¼Ñ Ñ‚Ð¸Ð¿Ð° данных. [String](../../sql-reference/data-types/string.md). +- `t` — тип данных. **Возвращаемое значение** -- Значение конечного типа данных. +- Преобразованное значение. + +!!! note "Примечание" + ЕÑли входное значение выходит за границы нового типа, то результат переполнÑетÑÑ. Ðапример, `CAST(-1, 'UInt8')` возвращает `255`. **Примеры** @@ -491,16 +493,16 @@ CAST(x, T) ```sql SELECT CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint, - CAST(toInt8(1), 'Float32') AS cast_int_to_float, - CAST('1', 'UInt32') AS cast_string_to_int + CAST(1.5 AS Decimal(3,2)) AS cast_float_to_decimal, + '1'::Int32 AS cast_string_to_int; ``` Результат: ``` -┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┠-│ 255 │ 1 │ 1 │ -└──────────────────┴───────────────────┴────────────────────┘ +┌─cast_int_to_uint─┬─cast_float_to_decimal─┬─cast_string_to_int─┠+│ 255 │ 1.50 │ 1 │ +└──────────────────┴───────────────────────┴────────────────────┘ ``` ЗапроÑ: @@ -524,7 +526,7 @@ SELECT Преобразование в FixedString(N) работает только Ð´Ð»Ñ Ð°Ñ€Ð³ÑƒÐ¼ÐµÐ½Ñ‚Ð¾Ð² типа [String](../../sql-reference/data-types/string.md) или [FixedString](../../sql-reference/data-types/fixedstring.md). -ПоддерживаетÑÑ Ð¿Ñ€ÐµÐ¾Ð±Ñ€Ð°Ð·Ð¾Ð²Ð°Ð½Ð¸Ðµ к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. +ПоддерживаетÑÑ Ð¿Ñ€ÐµÐ¾Ð±Ñ€Ð°Ð·Ð¾Ð²Ð°Ð½Ð¸Ðµ к типу [Nullable](../../sql-reference/data-types/nullable.md) и обратно. **Примеры** diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 0a485c7b591..f875103a498 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -17,7 +17,7 @@ toc_title: PARTITION - [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — очиÑтить поÑтроенные вторичные индекÑÑ‹ Ð´Ð»Ñ Ð·Ð°Ð´Ð°Ð½Ð½Ð¾Ð¹ партиции; - [FREEZE PARTITION](#alter_freeze-partition) — Ñоздать резервную копию партиции; - [UNFREEZE PARTITION](#alter_unfreeze-partition) — удалить резервную копию партиции; -- [FETCH PARTITION](#alter_fetch-partition) — Ñкачать партицию Ñ Ð´Ñ€ÑƒÐ³Ð¾Ð³Ð¾ Ñервера; +- [FETCH PARTITION\|PART](#alter_fetch-partition) — Ñкачать партицию/куÑок Ñ Ð´Ñ€ÑƒÐ³Ð¾Ð³Ð¾ Ñервера; - [MOVE PARTITION\|PART](#alter_move-partition) — перемеÑтить партицию/куÑкок на другой диÑк или том. - [UPDATE IN PARTITION](#update-in-partition) — обновить данные внутри партиции по уÑловию. - [DELETE IN PARTITION](#delete-in-partition) — удалить данные внутри партиции по уÑловию. @@ -209,29 +209,35 @@ ALTER TABLE 'table_name' UNFREEZE [PARTITION 'part_expr'] WITH NAME 'backup_name УдалÑет Ñ Ð´Ð¸Ñка "замороженные" партиции Ñ ÑƒÐºÐ°Ð·Ð°Ð½Ð½Ñ‹Ð¼ именем. ЕÑли ÑÐµÐºÑ†Ð¸Ñ `PARTITION` опущена, Ð·Ð°Ð¿Ñ€Ð¾Ñ ÑƒÐ´Ð°Ð»Ñет резервную копию вÑех партиций Ñразу. -## FETCH PARTITION {#alter_fetch-partition} +## FETCH PARTITION\|PART {#alter_fetch-partition} ``` sql -ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper' +ALTER TABLE table_name FETCH PARTITION|PART partition_expr FROM 'path-in-zookeeper' ``` Загружает партицию Ñ Ð´Ñ€ÑƒÐ³Ð¾Ð³Ð¾ Ñервера. Этот Ð·Ð°Ð¿Ñ€Ð¾Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ð°ÐµÑ‚ только Ð´Ð»Ñ Ñ€ÐµÐ¿Ð»Ð¸Ñ†Ð¸Ñ€Ð¾Ð²Ð°Ð½Ð½Ñ‹Ñ… таблиц. Ð—Ð°Ð¿Ñ€Ð¾Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½Ñет Ñледующее: -1. Загружает партицию Ñ ÑƒÐºÐ°Ð·Ð°Ð½Ð½Ð¾Ð³Ð¾ шарда. Путь к шарду задаетÑÑ Ð² Ñекции `FROM` (‘path-in-zookeeper’). Обратите внимание, нужно задавать путь к шарду в ZooKeeper. +1. Загружает партицию/куÑок Ñ ÑƒÐºÐ°Ð·Ð°Ð½Ð½Ð¾Ð³Ð¾ шарда. Путь к шарду задаетÑÑ Ð² Ñекции `FROM` (‘path-in-zookeeper’). Обратите внимание, нужно задавать путь к шарду в ZooKeeper. 2. Помещает загруженные данные в директорию `detached` таблицы `table_name`. Чтобы прикрепить Ñти данные к таблице, иÑпользуйте Ð·Ð°Ð¿Ñ€Ð¾Ñ [ATTACH PARTITION\|PART](#alter_attach-partition). Ðапример: +1. FETCH PARTITION ``` sql ALTER TABLE users FETCH PARTITION 201902 FROM '/clickhouse/tables/01-01/visits'; ALTER TABLE users ATTACH PARTITION 201902; ``` +2. FETCH PART +``` sql +ALTER TABLE users FETCH PART 201901_2_2_0 FROM '/clickhouse/tables/01-01/visits'; +ALTER TABLE users ATTACH PART 201901_2_2_0; +``` Следует иметь в виду: -- Ð—Ð°Ð¿Ñ€Ð¾Ñ `ALTER TABLE t FETCH PARTITION` не реплицируетÑÑ. Он загружает партицию в директорию `detached` только на локальном Ñервере. +- Ð—Ð°Ð¿Ñ€Ð¾Ñ `ALTER TABLE t FETCH PARTITION|PART` не реплицируетÑÑ. Он загружает партицию в директорию `detached` только на локальном Ñервере. - Ð—Ð°Ð¿Ñ€Ð¾Ñ `ALTER TABLE t ATTACH` реплицируетÑÑ â€” он добавлÑет данные в таблицу Ñразу на вÑех репликах. Ðа одной из реплик данные будут добавлены из директории `detached`, а на других — из ÑоÑедних реплик. Перед загрузкой данных ÑиÑтема проверÑет, ÑущеÑтвует ли Ð¿Ð°Ñ€Ñ‚Ð¸Ñ†Ð¸Ñ Ð¸ Ñовпадает ли её Ñтруктура Ñо Ñтруктурой таблицы. При Ñтом автоматичеÑки выбираетÑÑ Ð½Ð°Ð¸Ð±Ð¾Ð»ÐµÐµ Ð°ÐºÑ‚ÑƒÐ°Ð»ÑŒÐ½Ð°Ñ Ñ€ÐµÐ¿Ð»Ð¸ÐºÐ° Ñреди вÑех живых реплик. diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 05ffaa22bbd..1d8ec3c60b0 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -282,7 +282,7 @@ GRANT INSERT(x,y) ON db.table TO john - `ALTER MATERIALIZE TTL`. Уровень: `TABLE`. ÐлиаÑÑ‹: `MATERIALIZE TTL` - `ALTER SETTINGS`. Уровень: `TABLE`. ÐлиаÑÑ‹: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` - `ALTER MOVE PARTITION`. Уровень: `TABLE`. ÐлиаÑÑ‹: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` - - `ALTER FETCH PARTITION`. Уровень: `TABLE`. ÐлиаÑÑ‹: `FETCH PARTITION` + - `ALTER FETCH PARTITION`. Уровень: `TABLE`. ÐлиаÑÑ‹: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART` - `ALTER FREEZE PARTITION`. Уровень: `TABLE`. ÐлиаÑÑ‹: `FREEZE PARTITION` - `ALTER VIEW` Уровень: `GROUP` - `ALTER VIEW REFRESH `. Уровень: `VIEW`. ÐлиаÑÑ‹: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 4bd883c87ff..72352fe2047 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -4,7 +4,7 @@ toc_title: JOIN # Ð¡ÐµÐºÑ†Ð¸Ñ JOIN {#select-join} -Join Ñоздаёт новую таблицу путем Ð¾Ð±ÑŠÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñтолбцов из одной или неÑкольких таблиц Ñ Ð¸Ñпользованием общих Ð´Ð»Ñ ÐºÐ°Ð¶Ð´Ð¾Ð¹ из них значений. Это Ð¾Ð±Ñ‹Ñ‡Ð½Ð°Ñ Ð¾Ð¿ÐµÑ€Ð°Ñ†Ð¸Ñ Ð² базах данных Ñ Ð¿Ð¾Ð´Ð´ÐµÑ€Ð¶ÐºÐ¾Ð¹ SQL, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ ÑоответÑтвует join из [релÑционной алгебры](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators). ЧаÑтный Ñлучай ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ð¾Ð´Ð½Ð¾Ð¹ таблицы чаÑто называют «self-join». +`JOIN` Ñоздаёт новую таблицу путем Ð¾Ð±ÑŠÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñтолбцов из одной или неÑкольких таблиц Ñ Ð¸Ñпользованием общих Ð´Ð»Ñ ÐºÐ°Ð¶Ð´Ð¾Ð¹ из них значений. Это Ð¾Ð±Ñ‹Ñ‡Ð½Ð°Ñ Ð¾Ð¿ÐµÑ€Ð°Ñ†Ð¸Ñ Ð² базах данных Ñ Ð¿Ð¾Ð´Ð´ÐµÑ€Ð¶ÐºÐ¾Ð¹ SQL, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ ÑоответÑтвует join из [релÑционной алгебры](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators). ЧаÑтный Ñлучай ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ð¾Ð´Ð½Ð¾Ð¹ таблицы чаÑто называют self-join. СинтакÑиÑ: @@ -38,12 +38,21 @@ FROM ## ÐаÑтройки {#join-settings} -!!! note "Примечание" - Значение ÑтрогоÑти по умолчанию может быть переопределено Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ наÑтройки [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). +Значение ÑтрогоÑти по умолчанию может быть переопределено Ñ Ð¿Ð¾Ð¼Ð¾Ñ‰ÑŒÑŽ наÑтройки [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). Поведение Ñервера ClickHouse Ð´Ð»Ñ Ð¾Ð¿ÐµÑ€Ð°Ñ†Ð¸Ð¹ `ANY JOIN` завиÑит от параметра [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys). -### ИÑпользование ASOF JOIN {#asof-join-usage} +**См. также** + +- [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) +- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) +- [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) +- [partial_merge_join_optimizations](../../../operations/settings/settings.md#partial_merge_join_optimizations) +- [partial_merge_join_rows_in_right_blocks](../../../operations/settings/settings.md#partial_merge_join_rows_in_right_blocks) +- [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge) +- [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) + +## ИÑпользование ASOF JOIN {#asof-join-usage} `ASOF JOIN` применим в том Ñлучае, когда необходимо объединÑÑ‚ÑŒ запиÑи, которые не имеют точного ÑовпадениÑ. @@ -95,7 +104,7 @@ USING (equi_column1, ... equi_columnN, asof_column) Чтобы задать значение ÑтрогоÑти по умолчанию, иÑпользуйте ÑеÑÑионный параметр [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). -#### РаÑпределённый join {#global-join} +## РаÑпределённый JOIN {#global-join} ЕÑÑ‚ÑŒ два пути Ð´Ð»Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ ÑÐ¾ÐµÐ´Ð¸Ð½ÐµÐ½Ð¸Ñ Ñ ÑƒÑ‡Ð°Ñтием раÑпределённых таблиц: @@ -104,6 +113,42 @@ USING (equi_column1, ... equi_columnN, asof_column) Будьте аккуратны при иÑпользовании `GLOBAL`. За дополнительной информацией обращайтеÑÑŒ в раздел [РаÑпределенные подзапроÑÑ‹](../../../sql-reference/operators/in.md#select-distributed-subqueries). +## ÐеÑвные Ð¿Ñ€ÐµÐ¾Ð±Ñ€Ð°Ð·Ð¾Ð²Ð°Ð½Ð¸Ñ Ñ‚Ð¸Ð¿Ð¾Ð² {#implicit-type-conversion} + +ЗапроÑÑ‹ `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN` и `FULL JOIN` поддерживают неÑвные Ð¿Ñ€ÐµÐ¾Ð±Ñ€Ð°Ð·Ð¾Ð²Ð°Ð½Ð¸Ñ Ñ‚Ð¸Ð¿Ð¾Ð² Ð´Ð»Ñ ÐºÐ»ÑŽÑ‡ÐµÐ¹ ÑоединениÑ. Однако Ð·Ð°Ð¿Ñ€Ð¾Ñ Ð½Ðµ может быть выполнен, еÑли не ÑущеÑтвует типа, к которому можно привеÑти Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ ÐºÐ»ÑŽÑ‡ÐµÐ¹ Ñ Ð¾Ð±ÐµÐ¸Ñ… Ñторон (например, нет типа, который бы одновременно вмещал в ÑÐµÐ±Ñ Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ `UInt64` и `Int64`, или `String` и `Int32`). + +**Пример** + +РаÑÑмотрим таблицу `t_1`: +```text +┌─a─┬─b─┬─toTypeName(a)─┬─toTypeName(b)─┠+│ 1 │ 1 │ UInt16 │ UInt8 │ +│ 2 │ 2 │ UInt16 │ UInt8 │ +└───┴───┴───────────────┴───────────────┘ +``` +и таблицу `t_2`: +```text +┌──a─┬────b─┬─toTypeName(a)─┬─toTypeName(b)───┠+│ -1 │ 1 │ Int16 │ Nullable(Int64) │ +│ 1 │ -1 │ Int16 │ Nullable(Int64) │ +│ 1 │ 1 │ Int16 │ Nullable(Int64) │ +└────┴──────┴───────────────┴─────────────────┘ +``` + +Ð—Ð°Ð¿Ñ€Ð¾Ñ +```sql +SELECT a, b, toTypeName(a), toTypeName(b) FROM t_1 FULL JOIN t_2 USING (a, b); +``` +вернёт результат: +```text +┌──a─┬────b─┬─toTypeName(a)─┬─toTypeName(b)───┠+│ 1 │ 1 │ Int32 │ Nullable(Int64) │ +│ 2 │ 2 │ Int32 │ Nullable(Int64) │ +│ -1 │ 1 │ Int32 │ Nullable(Int64) │ +│ 1 │ -1 │ Int32 │ Nullable(Int64) │ +└────┴──────┴───────────────┴─────────────────┘ +``` + ## Рекомендации по иÑпользованию {#usage-recommendations} ### Обработка пуÑÑ‚Ñ‹Ñ… Ñчеек и NULL {#processing-of-empty-or-null-cells} @@ -142,12 +187,14 @@ USING (equi_column1, ... equi_columnN, asof_column) ### ÐžÐ³Ñ€Ð°Ð½Ð¸Ñ‡ÐµÐ½Ð¸Ñ Ð¿Ð¾ памÑти {#memory-limitations} -По умолчанию ClickHouse иÑпользует алгоритм [hash join](https://en.wikipedia.org/wiki/Hash_join). ClickHouse берет `` и Ñоздает Ð´Ð»Ñ Ð½ÐµÐ³Ð¾ Ñ…Ñш-таблицу в оперативной памÑти. ПоÑле некоторого порога Ð¿Ð¾Ñ‚Ñ€ÐµÐ±Ð»ÐµÐ½Ð¸Ñ Ð¿Ð°Ð¼Ñти ClickHouse переходит к алгоритму merge join. +По умолчанию ClickHouse иÑпользует алгоритм [hash join](https://ru.wikipedia.org/wiki/Ðлгоритм_ÑоединениÑ_хешированием). ClickHouse берет правую таблицу и Ñоздает Ð´Ð»Ñ Ð½ÐµÐµ хеш-таблицу в оперативной памÑти. При включённой наÑтройке `join_algorithm = 'auto'`, поÑле некоторого порога Ð¿Ð¾Ñ‚Ñ€ÐµÐ±Ð»ÐµÐ½Ð¸Ñ Ð¿Ð°Ð¼Ñти ClickHouse переходит к алгоритму [merge join](https://ru.wikipedia.org/wiki/Ðлгоритм_ÑоединениÑ_ÑлиÑнием_Ñортированных_ÑпиÑков). ОпиÑание алгоритмов `JOIN` Ñм. в наÑтройке [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm). -- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — ограничивает количеÑтво Ñтрок в Ñ…Ñш-таблице. -- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — ограничивает размер Ñ…Ñш-таблицы. +ЕÑли вы хотите ограничить потребление памÑти во Ð²Ñ€ÐµÐ¼Ñ Ð²Ñ‹Ð¿Ð¾Ð»Ð½ÐµÐ½Ð¸Ñ Ð¾Ð¿ÐµÑ€Ð°Ñ†Ð¸Ð¸ `JOIN`, иÑпользуйте наÑтройки: -По доÑтижении любого из Ñтих ограничений, ClickHouse дейÑтвует в ÑоответÑтвии Ñ Ð½Ð°Ñтройкой [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode). +- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — ограничивает количеÑтво Ñтрок в хеш-таблице. +- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — ограничивает размер хеш-таблицы. + +По доÑтижении любого из Ñтих ограничений ClickHouse дейÑтвует в ÑоответÑтвии Ñ Ð½Ð°Ñтройкой [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode). ## Примеры {#examples} diff --git a/docs/ru/whats-new/security-changelog.md b/docs/ru/whats-new/security-changelog.md index e3d26e772c4..60d6c2f1b66 100644 --- a/docs/ru/whats-new/security-changelog.md +++ b/docs/ru/whats-new/security-changelog.md @@ -5,6 +5,17 @@ toc_title: Security Changelog # Security Changelog {#security-changelog} +## ИÑправлено в релизе 21.4.3.21, 2021-04-12 {#fixed-in-clickhouse-release-21-4-3-21-2019-09-10} + +### CVE-2021-25263 {#cve-2021-25263} + +Злоумышленник Ñ Ð´Ð¾Ñтупом к Ñозданию Ñловарей может читать файлы на файловой ÑиÑтеме Ñервера Clickhouse. +Злоумышленник может обойти некорректную проверку пути к файлу ÑÐ»Ð¾Ð²Ð°Ñ€Ñ Ð¸ загрузить чаÑÑ‚ÑŒ любого файла как Ñловарь. При Ñтом, Ð¼Ð°Ð½Ð¸Ð¿ÑƒÐ»Ð¸Ñ€ÑƒÑ Ð¾Ð¿Ñ†Ð¸Ñми парÑинга файла, можно получить Ñледующую чаÑÑ‚ÑŒ файла и пошагово прочитать веÑÑŒ файл. + +ИÑправление доÑтупно в верÑиÑÑ… 20.8.18.32-lts, 21.1.9.41-stable, 21.2.9.41-stable, 21.3.6.55-lts, 21.4.3.21-stable и выше. + +Обнаружено благодарÑ: [Ð’ÑчеÑлаву Егошину](https://twitter.com/vegoshin) + ## ИÑправлено в релизе 19.14.3.3, 2019-09-10 {#ispravleno-v-relize-19-14-3-3-2019-09-10} ### CVE-2019-15024 {#cve-2019-15024} diff --git a/docs/zh/engines/database-engines/atomic.md b/docs/zh/engines/database-engines/atomic.md index f019b94a00b..73e044b5e98 100644 --- a/docs/zh/engines/database-engines/atomic.md +++ b/docs/zh/engines/database-engines/atomic.md @@ -6,12 +6,12 @@ toc_title: Atomic # Atomic {#atomic} -It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default. +它支æŒéžé˜»å¡ž DROP å’Œ RENAME TABLE 查询以åŠåŽŸå­ EXCHANGE TABLES t1 AND t2 查询。默认情况下使用Atomicæ•°æ®åº“引擎。 -## Creating a Database {#creating-a-database} +## 创建数æ®åº“ {#creating-a-database} ```sql CREATE DATABASE test ENGINE = Atomic; ``` -[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) +[原文](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) diff --git a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md index 6d1dfac7686..6fb57dc19d9 100644 --- a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -1,4 +1,4 @@ -# 折å æ ‘ {#table_engine-collapsingmergetree} +# CollapsingMergeTree {#table_engine-collapsingmergetree} 该引擎继承于 [MergeTree](mergetree.md),并在数æ®å—åˆå¹¶ç®—法中添加了折å è¡Œçš„逻辑。 @@ -203,4 +203,4 @@ SELECT * FROM UAct FINAL è¿™ç§æŸ¥è¯¢æ•°æ®çš„方法是éžå¸¸ä½Žæ•ˆçš„。ä¸è¦åœ¨å¤§è¡¨ä¸­ä½¿ç”¨å®ƒã€‚ -[æ¥æºæ–‡ç« ](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) +[原文](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 3b89da9f595..dc9871c1a31 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 37 toc_title: "版本折å MergeTree" --- -# 版本折å MergeTree {#versionedcollapsingmergetree} +# VersionedCollapsingMergeTree {#versionedcollapsingmergetree} 这个引擎: diff --git a/docs/zh/interfaces/tcp.md b/docs/zh/interfaces/tcp.md index b779b9fea40..571fd22b758 100644 --- a/docs/zh/interfaces/tcp.md +++ b/docs/zh/interfaces/tcp.md @@ -5,6 +5,6 @@ toc_title: 原生接å£(TCP) # 原生接å£ï¼ˆTCP){#native-interface-tcp} -原生接å£ç”¨äºŽ[命令行客户端](cli.md),用于分布å¼æŸ¥è¯¢å¤„ç†æœŸé—´çš„æœåŠ¡å™¨é—´é€šä¿¡ï¼Œä»¥åŠå…¶ä»–C++程åºã€‚å¯æƒœçš„是,原生的ClickHouseå议还没有正å¼çš„规范,但它å¯ä»¥ä»ŽClickHouse[æºä»£ç ](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)通过拦截和分æžTCPæµé‡è¿›è¡Œåå‘工程。 +原生接å£å议用于[命令行客户端](cli.md),用于分布å¼æŸ¥è¯¢å¤„ç†æœŸé—´çš„æœåŠ¡å™¨é—´é€šä¿¡ï¼Œä»¥åŠå…¶ä»–C++ 程åºã€‚ä¸å¹¸çš„是,原生ClickHouseå议还没有正å¼çš„规范,但它å¯ä»¥ä»ŽClickHouseæºä»£ç [从这里开始](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)或通过拦截和分æžTCPæµé‡è¿›è¡Œé€†å‘工程。 -[æ¥æºæ–‡ç« ](https://clickhouse.tech/docs/zh/interfaces/tcp/) +[原文](https://clickhouse.tech/docs/en/interfaces/tcp/) diff --git a/docs/zh/interfaces/third-party/gui.md b/docs/zh/interfaces/third-party/gui.md index e85f8b2ec79..46baf55d564 100644 --- a/docs/zh/interfaces/third-party/gui.md +++ b/docs/zh/interfaces/third-party/gui.md @@ -57,9 +57,9 @@ ClickHouse Web ç•Œé¢ [Tabix](https://github.com/tabixio/tabix). - 表格预览。 - 自动完æˆã€‚ -### ツ环æ¿-ï½®ï¾‚å˜‰ï½¯ï¾‚å² {#clickhouse-cli} +### clickhouse-cli {#clickhouse-cli} -[ツ环æ¿-ョツ嘉ッツå²](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端,用Python 3编写。 +[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端,用Python 3编写。 特å¾ï¼š @@ -68,15 +68,15 @@ ClickHouse Web ç•Œé¢ [Tabix](https://github.com/tabixio/tabix). - 寻呼机支æŒæ•°æ®è¾“出。 - 自定义PostgreSQL类命令。 -### ツ暗ェツ氾环催ツ団ツ法ツ人 {#clickhouse-flamegraph} +### clickhouse-flamegraph {#clickhouse-flamegraph} [clickhouse-flamegraph](https://github.com/Slach/clickhouse-flamegraph) 是一个å¯è§†åŒ–的专业工具`system.trace_log`如[flamegraph](http://www.brendangregg.com/flamegraphs.html). ## 商业 {#shang-ye} -### ツ环æ¿Softwareョツ嘉ッ {#holistics-software} +### Holistics {#holistics-software} -[整体学](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为å¯ç”¨æ€§æœ€é«˜æŽ’å第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数æ®å¹³å°å’Œå•†ä¸šæ™ºèƒ½å·¥å…·ï¼Œç”¨äºŽè®¾ç½®æ‚¨çš„分æžæµç¨‹ã€‚ +[Holistics](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为å¯ç”¨æ€§æœ€é«˜æŽ’å第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数æ®å¹³å°å’Œå•†ä¸šæ™ºèƒ½å·¥å…·ï¼Œç”¨äºŽè®¾ç½®æ‚¨çš„分æžæµç¨‹ã€‚ 特å¾ï¼š diff --git a/docs/zh/operations/index.md b/docs/zh/operations/index.md index f35858279f5..5139f083ceb 100644 --- a/docs/zh/operations/index.md +++ b/docs/zh/operations/index.md @@ -5,9 +5,21 @@ toc_title: "æ“作" # æ“作 {#operations} -Clickhouseè¿ç»´æ‰‹å†Œä¸»è¦åŒ…å«ä¸‹é¢å‡ éƒ¨åˆ†ï¼š +ClickHouseæ“作手册由以下主è¦éƒ¨åˆ†ç»„æˆï¼š -- 安装è¦æ±‚ +- [安装è¦æ±‚](../operations/requirements.md) +- [监控](../operations/monitoring.md) +- [故障排除](../operations/troubleshooting.md) +- [使用建议](../operations/tips.md) +- [更新程åº](../operations/update.md) +- [访问æƒé™](../operations/access-rights.md) +- [æ•°æ®å¤‡ä»½](../operations/backup.md) +- [é…置文件](../operations/configuration-files.md) +- [é…é¢](../operations/quotas.md) +- [系统表](../operations/system-tables/index.md) +- [æœåŠ¡å™¨é…ç½®å‚æ•°](../operations/server-configuration-parameters/index.md) +- [如何用ClickHouse测试你的硬件](../operations/performance-test.md) +- [设置](../operations/settings/index.md) +- [实用工具](../operations/utilities/index.md) - -[原始文章](https://clickhouse.tech/docs/en/operations/) +[原文](https://clickhouse.tech/docs/en/operations/) diff --git a/docs/zh/sql-reference/table-functions/mysql.md b/docs/zh/sql-reference/table-functions/mysql.md index c54cd7d2a06..3ed0001b0a0 100644 --- a/docs/zh/sql-reference/table-functions/mysql.md +++ b/docs/zh/sql-reference/table-functions/mysql.md @@ -1,13 +1,8 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 42 -toc_title: mysql ---- - # mysql {#mysql} -å…许 `SELECT` è¦å¯¹å­˜å‚¨åœ¨è¿œç¨‹MySQLæœåŠ¡å™¨ä¸Šçš„æ•°æ®æ‰§è¡Œçš„查询。 +å…许对存储在远程MySQLæœåŠ¡å™¨ä¸Šçš„æ•°æ®æ‰§è¡Œ`SELECT`å’Œ`INSERT`查询。 + +**语法** ``` sql mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); @@ -15,31 +10,44 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_ **å‚æ•°** -- `host:port` — MySQL server address. +- `host:port` — MySQLæœåŠ¡å™¨åœ°å€. -- `database` — Remote database name. +- `database` — 远程数æ®åº“å称. -- `table` — Remote table name. +- `table` — 远程表å称. -- `user` — MySQL user. +- `user` — MySQL用户. -- `password` — User password. +- `password` — 用户密ç . -- `replace_query` — Flag that converts `INSERT INTO` 查询到 `REPLACE INTO`. 如果 `replace_query=1`,查询被替æ¢ã€‚ +- `replace_query` — å°†INSERT INTO` 查询转æ¢ä¸º `REPLACE INTO`的标志。如果 `replace_query=1`,查询被替æ¢ã€‚ -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` 表达å¼è¢«æ·»åŠ åˆ° `INSERT` 查询。 +- `on_duplicate_clause` — 添加 `ON DUPLICATE KEY on_duplicate_clause` 表达å¼åˆ° `INSERT` 查询。明确规定åªèƒ½ä½¿ç”¨ `replace_query = 0` ,如果你åŒæ—¶è®¾ç½®replace_query = 1`å’Œ`on_duplicate_clause`,ClickHouse将产生异常。 - Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. + 示例:`INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1` - To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception. + `on_duplicate_clause`这里是`UPDATE c2 = c2 + 1`。请查阅MySQL文档,æ¥æ‰¾åˆ°å¯ä»¥å’Œ`ON DUPLICATE KEY`一起使用的 `on_duplicate_clause`å­å¥ã€‚ -ç®€å• `WHERE` æ¡æ¬¾å¦‚ `=, !=, >, >=, <, <=` 当å‰åœ¨MySQLæœåŠ¡å™¨ä¸Šæ‰§è¡Œã€‚ +简å•çš„ `WHERE` å­å¥å¦‚ `=, !=, >, >=, <, <=` å°†å³æ—¶åœ¨MySQLæœåŠ¡å™¨ä¸Šæ‰§è¡Œã€‚其余的æ¡ä»¶å’Œ `LIMIT` åªæœ‰åœ¨å¯¹MySQL的查询完æˆåŽï¼Œæ‰ä¼šåœ¨ClickHouse中执行采样约æŸã€‚ -其余的æ¡ä»¶å’Œ `LIMIT` åªæœ‰åœ¨å¯¹MySQL的查询完æˆåŽï¼Œæ‰ä¼šåœ¨ClickHouse中执行采样约æŸã€‚ +支æŒä½¿ç”¨`|`并列进行多副本查询,示例如下: + +```sql +SELECT name FROM mysql(`mysql{1|2|3}:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` + +或 + +```sql +SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', 'mysql_table', 'user', 'password'); +``` **返回值** -与原始MySQL表具有相åŒåˆ—çš„table对象。 +与原始MySQL表具有相åŒåˆ—的表对象。 + +!!! note "注æ„" + 在`INSERT`查询中为了区分`mysql(...)`与带有列å列表的表å的表函数,你必须使用关键字`FUNCTION`或`TABLE FUNCTION`。查看如下示例。 ## 用法示例 {#usage-example} @@ -66,7 +74,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -从ClickHouse中选择数æ®: +从ClickHouse中查询数æ®: ``` sql SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') @@ -78,6 +86,21 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') └────────┴──────────────┴───────┴────────────────┘ ``` +替æ¢å’Œæ’入: + +```sql +INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3); +INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4); +SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); +``` + +```text +┌─int_id─┬─float─┠+│ 1 │ 3 │ +│ 2 │ 4 │ +└────────┴───────┘ +``` + ## å¦è¯·å‚阅 {#see-also} - [该 ‘MySQL’ 表引擎](../../engines/table-engines/integrations/mysql.md) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 9c1c8338321..6ed6d5404a3 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -301,26 +302,9 @@ private: } catch (const Exception & e) { - bool print_stack_trace = config().getBool("stacktrace", false); + bool print_stack_trace = config().getBool("stacktrace", false) && e.code() != ErrorCodes::NETWORK_ERROR; - std::string text = e.displayText(); - - /** If exception is received from server, then stack trace is embedded in message. - * If exception is thrown on client, then stack trace is in separate field. - */ - - auto embedded_stack_trace_pos = text.find("Stack trace"); - if (std::string::npos != embedded_stack_trace_pos && !print_stack_trace) - text.resize(embedded_stack_trace_pos); - - std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; - - /// Don't print the stack trace on the client if it was logged on the server. - /// Also don't print the stack trace in case of network errors. - if (print_stack_trace && e.code() != ErrorCodes::NETWORK_ERROR && std::string::npos == embedded_stack_trace_pos) - { - std::cerr << "Stack trace:" << std::endl << e.getStackTraceString(); - } + std::cerr << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; /// If exception code isn't zero, we should return non-zero return code anyway. return e.code() ? e.code() : -1; @@ -487,6 +471,52 @@ private: } #endif + /// Make query to get all server warnings + std::vector loadWarningMessages() + { + std::vector messages; + connection->sendQuery(connection_parameters.timeouts, "SELECT message FROM system.warnings", "" /* query_id */, QueryProcessingStage::Complete); + while (true) + { + Packet packet = connection->receivePacket(); + switch (packet.type) + { + case Protocol::Server::Data: + if (packet.block) + { + const ColumnString & column = typeid_cast(*packet.block.getByPosition(0).column); + + size_t rows = packet.block.rows(); + for (size_t i = 0; i < rows; ++i) + messages.emplace_back(column.getDataAt(i).toString()); + } + continue; + + case Protocol::Server::Progress: + continue; + case Protocol::Server::ProfileInfo: + continue; + case Protocol::Server::Totals: + continue; + case Protocol::Server::Extremes: + continue; + case Protocol::Server::Log: + continue; + + case Protocol::Server::Exception: + packet.exception->rethrow(); + return messages; + + case Protocol::Server::EndOfStream: + return messages; + + default: + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", + packet.type, connection->getDescription()); + } + } + } + int mainImpl() { UseSSL use_ssl; @@ -565,6 +595,26 @@ private: suggest->load(connection_parameters, config().getInt("suggestion_limit")); } + /// Load Warnings at the beginning of connection + if (!config().has("no-warnings")) + { + try + { + std::vector messages = loadWarningMessages(); + if (!messages.empty()) + { + std::cout << "Warnings:" << std::endl; + for (const auto & message : messages) + std::cout << "* " << message << std::endl; + std::cout << std::endl; + } + } + catch (...) + { + /// Ignore exception + } + } + /// Load command history if present. if (config().has("history_file")) history_file = config().getString("history_file"); @@ -633,17 +683,10 @@ private: } catch (const Exception & e) { - // We don't need to handle the test hints in the interactive - // mode. - std::cerr << std::endl - << "Exception on client:" << std::endl - << "Code: " << e.code() << ". " << e.displayText() << std::endl; - - if (config().getBool("stacktrace", false)) - std::cerr << "Stack trace:" << std::endl << e.getStackTraceString() << std::endl; - - std::cerr << std::endl; + /// We don't need to handle the test hints in the interactive mode. + bool print_stack_trace = config().getBool("stacktrace", false); + std::cerr << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; client_exception = std::make_unique(e); } @@ -940,18 +983,11 @@ private: { if (server_exception) { - std::string text = server_exception->displayText(); - auto embedded_stack_trace_pos = text.find("Stack trace"); - if (std::string::npos != embedded_stack_trace_pos && !config().getBool("stacktrace", false)) - { - text.resize(embedded_stack_trace_pos); - } + bool print_stack_trace = config().getBool("stacktrace", false); std::cerr << "Received exception from server (version " << server_version << "):" << std::endl - << "Code: " << server_exception->code() << ". " << text << std::endl; + << getExceptionMessage(*server_exception, print_stack_trace, true) << std::endl; if (is_interactive) - { std::cerr << std::endl; - } } if (client_exception) @@ -1410,8 +1446,7 @@ private: { // Just report it, we'll terminate below. fmt::print(stderr, - "Error while reconnecting to the server: Code: {}: {}\n", - getCurrentExceptionCode(), + "Error while reconnecting to the server: {}\n", getCurrentExceptionMessage(true)); assert(!connection->isConnected()); @@ -2529,6 +2564,7 @@ public: ("opentelemetry-traceparent", po::value(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation") ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("history_file", po::value(), "path to history file") + ("no-warnings", "disable warnings when client connects to server") ; Settings cmd_settings; @@ -2596,8 +2632,7 @@ public: } catch (const Exception & e) { - std::string text = e.displayText(); - std::cerr << "Code: " << e.code() << ". " << text << std::endl; + std::cerr << getExceptionMessage(e, false) << std::endl; std::cerr << "Table â„–" << i << std::endl << std::endl; /// Avoid the case when error exit code can possibly overflow to normal (zero). auto exit_code = e.code() % 256; @@ -2689,6 +2724,8 @@ public: config().setBool("highlight", options["highlight"].as()); if (options.count("history_file")) config().setString("history_file", options["history_file"].as()); + if (options.count("no-warnings")) + config().setBool("no-warnings", true); if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { @@ -2740,8 +2777,7 @@ int mainEntryClickHouseClient(int argc, char ** argv) } catch (const DB::Exception & e) { - std::string text = e.displayText(); - std::cerr << "Code: " << e.code() << ". " << text << std::endl; + std::cerr << DB::getExceptionMessage(e, false) << std::endl; return 1; } catch (...) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6be7ba1ad73..e256338a538 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -433,7 +433,7 @@ void LocalServer::processQueries() try { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, finalize_progress); + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, {}, finalize_progress); } catch (...) { diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 313523d19dc..d4f830e5a0c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -477,17 +477,6 @@ int Server::main(const std::vector & /*args*/) CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); - if (ThreadFuzzer::instance().isEffective()) - LOG_WARNING(log, "ThreadFuzzer is enabled. Application will run slowly and unstable."); - -#if !defined(NDEBUG) || !defined(__OPTIMIZE__) - LOG_WARNING(log, "Server was built in debug mode. It will work slowly."); -#endif - -#if defined(SANITIZER) - LOG_WARNING(log, "Server was built with sanitizer. It will work slowly."); -#endif - /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -497,6 +486,18 @@ int Server::main(const std::vector & /*args*/) global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::SERVER); +#if !defined(NDEBUG) || !defined(__OPTIMIZE__) + global_context->addWarningMessage("Server was built in debug mode. It will work slowly."); +#endif + +if (ThreadFuzzer::instance().isEffective()) + global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); + +#if defined(SANITIZER) + global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); +#endif + + // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. @@ -552,8 +553,10 @@ int Server::main(const std::vector & /*args*/) if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1) { /// Program is run under debugger. Modification of it's binary image is ok for breakpoints. - LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).", - calculated_binary_hash); + global_context->addWarningMessage( + fmt::format("Server is run under debugger and its binary image is modified (most likely with breakpoints).", + calculated_binary_hash) + ); } else { @@ -636,7 +639,7 @@ int Server::main(const std::vector & /*args*/) } else { - LOG_WARNING(log, message); + global_context->addWarningMessage(message); } } diff --git a/programs/server/play.html b/programs/server/play.html index 4165a2829bd..7b13807f2d9 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -9,7 +9,7 @@ Do not use any JavaScript or CSS frameworks or preprocessors. This HTML page should not require any build systems (node.js, npm, gulp, etc.) This HTML page should not be minified, instead it should be reasonably minimalistic by itself. - This HTML page should not load any external resources + This HTML page should not load any external resources on load. (CSS and JavaScript must be embedded directly to the page. No external fonts or images should be loaded). This UI should look as lightweight, clean and fast as possible. All UI elements must be aligned in pixel-perfect way. @@ -343,13 +343,18 @@ /// Save query in history only if it is different. let previous_query = ''; - /// Substitute the address of the server where the page is served. - if (location.protocol != 'file:') { + const current_url = new URL(window.location); + + const server_address = current_url.searchParams.get('url'); + if (server_address) { + document.getElementById('url').value = server_address; + } else if (location.protocol != 'file:') { + /// Substitute the address of the server where the page is served. document.getElementById('url').value = location.origin; } /// Substitute user name if it's specified in the query string - let user_from_url = (new URL(window.location)).searchParams.get('user'); + const user_from_url = current_url.searchParams.get('user'); if (user_from_url) { document.getElementById('user').value = user_from_url; } @@ -361,7 +366,9 @@ let user = document.getElementById('user').value; let password = document.getElementById('password').value; - let url = document.getElementById('url').value + + let server_address = document.getElementById('url').value; + + let url = server_address + /// Ask server to allow cross-domain requests. '?add_http_cors_header=1' + '&user=' + encodeURIComponent(user) + @@ -390,11 +397,18 @@ response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit. }; let title = "ClickHouse Query: " + query; - let url = window.location.pathname + '?user=' + encodeURIComponent(user) + '#' + window.btoa(query); + + let history_url = window.location.pathname + '?user=' + encodeURIComponent(user); + if (server_address != location.origin) { + /// Save server's address in URL if it's not identical to the address of the play UI. + history_url += '&url=' + encodeURIComponent(server_address); + } + history_url += '#' + window.btoa(query); + if (previous_query == '') { - history.replaceState(state, title, url); + history.replaceState(state, title, history_url); } else { - history.pushState(state, title, url); + history.pushState(state, title, history_url); } document.title = title; previous_query = query; @@ -599,10 +613,16 @@ } /// Huge JS libraries should be loaded only if needed. - function loadJS(src) { + function loadJS(src, integrity) { return new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = src; + if (integrity) { + script.crossOrigin = 'anonymous'; + script.integrity = integrity; + } else { + console.warn('no integrity for', src) + } script.addEventListener('load', function() { resolve(true); }); document.head.appendChild(script); }); @@ -613,10 +633,14 @@ if (load_dagre_promise) { return load_dagre_promise; } load_dagre_promise = Promise.all([ - loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js'), - loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js'), - loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js'), - loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0'), + loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js', + 'sha384-2IH3T69EIKYC4c+RXZifZRvaH5SRUdacJW7j6HtE5rQbvLhKKdawxq6vpIzJ7j9M'), + loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js', + 'sha384-Q7oatU+b+y0oTkSoiRH9wTLH6sROySROCILZso/AbMMm9uKeq++r8ujD4l4f+CWj'), + loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js', + 'sha384-9N1ty7Yz7VKL3aJbOk+8ParYNW8G5W+MvxEfFL9G7CRYPmkHI9gJqyAfSI/8190W'), + loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0', + 'sha384-S+Kf0r6YzKIhKA8d1k2/xtYv+j0xYUU3E7+5YLrcPVab6hBh/r1J6cq90OXhw80u'), ]); return load_dagre_promise; diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 66023c1c0ea..7ae5eeb8288 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -64,7 +64,12 @@ public: std::lock_guard lock{mutex}; auto x = cache.get(params); if (x) - return *x; + { + if ((*x)->getUser()) + return *x; + /// No user, probably the user has been dropped while it was in the cache. + cache.remove(params); + } auto res = std::shared_ptr(new ContextAccess(manager, params)); cache.add(params, res); return res; diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index f9c1d23350d..d4b2dc8a252 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -655,7 +655,7 @@ private: for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags |= rhs.flags & lhs_child.getAllGrantableFlags(); + lhs_child.addGrantsRec(rhs.flags); } } } @@ -673,7 +673,7 @@ private: for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags &= rhs.flags; + lhs_child.removeGrantsRec(~rhs.flags); } } } @@ -1041,17 +1041,15 @@ void AccessRights::makeIntersection(const AccessRights & other) auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { if (!root_node) + return; + if (!other_root_node) { - if (other_root_node) - root_node = std::make_unique(*other_root_node); + root_node = nullptr; return; } - if (other_root_node) - { - root_node->makeIntersection(*other_root_node); - if (!root_node->flags && !root_node->children) - root_node = nullptr; - } + root_node->makeIntersection(*other_root_node); + if (!root_node->flags && !root_node->children) + root_node = nullptr; }; helper(root, other.root); helper(root_with_grant_option, other.root_with_grant_option); diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index 47153b5ab63..02d7e4982f9 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -173,6 +173,7 @@ enum class AccessType M(MONGO, "", GLOBAL, SOURCES) \ M(MYSQL, "", GLOBAL, SOURCES) \ M(POSTGRES, "", GLOBAL, SOURCES) \ + M(SQLITE, "", GLOBAL, SOURCES) \ M(ODBC, "", GLOBAL, SOURCES) \ M(JDBC, "", GLOBAL, SOURCES) \ M(HDFS, "", GLOBAL, SOURCES) \ diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 90495a83dfc..697e1ce39f5 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -163,11 +163,10 @@ void ContextAccess::setUser(const UserPtr & user_) const if (!user) { /// User has been dropped. - auto nothing_granted = std::make_shared(); - access = nothing_granted; - access_with_implicit = nothing_granted; subscription_for_user_change = {}; subscription_for_roles_changes = {}; + access = nullptr; + access_with_implicit = nullptr; enabled_roles = nullptr; roles_info = nullptr; enabled_row_policies = nullptr; @@ -252,32 +251,45 @@ String ContextAccess::getUserName() const std::shared_ptr ContextAccess::getRolesInfo() const { std::lock_guard lock{mutex}; - return roles_info; + if (roles_info) + return roles_info; + static const auto no_roles = std::make_shared(); + return no_roles; } std::shared_ptr ContextAccess::getEnabledRowPolicies() const { std::lock_guard lock{mutex}; - return enabled_row_policies; + if (enabled_row_policies) + return enabled_row_policies; + static const auto no_row_policies = std::make_shared(); + return no_row_policies; } ASTPtr ContextAccess::getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition) const { std::lock_guard lock{mutex}; - return enabled_row_policies ? enabled_row_policies->getCondition(database, table_name, index, extra_condition) : nullptr; + if (enabled_row_policies) + return enabled_row_policies->getCondition(database, table_name, index, extra_condition); + return nullptr; } std::shared_ptr ContextAccess::getQuota() const { std::lock_guard lock{mutex}; - return enabled_quota; + if (enabled_quota) + return enabled_quota; + static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota(); + return unlimited_quota; } std::optional ContextAccess::getQuotaUsage() const { std::lock_guard lock{mutex}; - return enabled_quota ? enabled_quota->getUsage() : std::optional{}; + if (enabled_quota) + return enabled_quota->getUsage(); + return {}; } @@ -288,7 +300,7 @@ std::shared_ptr ContextAccess::getFullAccess() auto full_access = std::shared_ptr(new ContextAccess); full_access->is_full_access = true; full_access->access = std::make_shared(AccessRights::getFullAccess()); - full_access->enabled_quota = EnabledQuota::getUnlimitedQuota(); + full_access->access_with_implicit = std::make_shared(addImplicitAccessRights(*full_access->access)); return full_access; }(); return res; @@ -298,28 +310,40 @@ std::shared_ptr ContextAccess::getFullAccess() std::shared_ptr ContextAccess::getDefaultSettings() const { std::lock_guard lock{mutex}; - return enabled_settings ? enabled_settings->getSettings() : nullptr; + if (enabled_settings) + return enabled_settings->getSettings(); + static const auto everything_by_default = std::make_shared(); + return everything_by_default; } std::shared_ptr ContextAccess::getSettingsConstraints() const { std::lock_guard lock{mutex}; - return enabled_settings ? enabled_settings->getConstraints() : nullptr; + if (enabled_settings) + return enabled_settings->getConstraints(); + static const auto no_constraints = std::make_shared(); + return no_constraints; } std::shared_ptr ContextAccess::getAccessRights() const { std::lock_guard lock{mutex}; - return access; + if (access) + return access; + static const auto nothing_granted = std::make_shared(); + return nothing_granted; } std::shared_ptr ContextAccess::getAccessRightsWithImplicit() const { std::lock_guard lock{mutex}; - return access_with_implicit; + if (access_with_implicit) + return access_with_implicit; + static const auto nothing_granted = std::make_shared(); + return nothing_granted; } @@ -551,7 +575,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i) { const UUID & role_id = *it; - if (info && info->enabled_roles_with_admin_option.count(role_id)) + if (info->enabled_roles_with_admin_option.count(role_id)) continue; if (throw_if_denied) @@ -560,7 +584,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const if (!role_name) role_name = "ID {" + toString(role_id) + "}"; - if (info && info->enabled_roles.count(role_id)) + if (info->enabled_roles.count(role_id)) show_error("Not enough privileges. " "Role " + backQuote(*role_name) + " is granted, but without ADMIN option. " "To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.", diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index a4373be4ff0..c7c4726c535 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -71,11 +71,9 @@ public: String getUserName() const; /// Returns information about current and enabled roles. - /// The function can return nullptr. std::shared_ptr getRolesInfo() const; /// Returns information about enabled row policies. - /// The function can return nullptr. std::shared_ptr getEnabledRowPolicies() const; /// Returns the row policy filter for a specified table. @@ -83,16 +81,13 @@ public: ASTPtr getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition = nullptr) const; /// Returns the quota to track resource consumption. - /// The function returns nullptr if no tracking or limitation is needed. std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; /// Returns the default settings, i.e. the settings to apply on user's login. - /// The function returns nullptr if it's no need to apply settings. std::shared_ptr getDefaultSettings() const; /// Returns the settings' constraints. - /// The function returns nullptr if there are no constraints. std::shared_ptr getSettingsConstraints() const; /// Returns the current access rights. diff --git a/src/Access/EnabledRowPolicies.cpp b/src/Access/EnabledRowPolicies.cpp index efd5ed4ae10..674dab3e0f0 100644 --- a/src/Access/EnabledRowPolicies.cpp +++ b/src/Access/EnabledRowPolicies.cpp @@ -12,8 +12,11 @@ size_t EnabledRowPolicies::Hash::operator()(const MixedConditionKey & key) const } -EnabledRowPolicies::EnabledRowPolicies(const Params & params_) - : params(params_) +EnabledRowPolicies::EnabledRowPolicies() : params() +{ +} + +EnabledRowPolicies::EnabledRowPolicies(const Params & params_) : params(params_) { } diff --git a/src/Access/EnabledRowPolicies.h b/src/Access/EnabledRowPolicies.h index 0ca4f16fcf1..5e819733963 100644 --- a/src/Access/EnabledRowPolicies.h +++ b/src/Access/EnabledRowPolicies.h @@ -32,6 +32,7 @@ public: friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); } }; + EnabledRowPolicies(); ~EnabledRowPolicies(); using ConditionType = RowPolicy::ConditionType; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 316f869fc79..988900e57d2 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -18,6 +18,8 @@ namespace ErrorCodes } +SettingsConstraints::SettingsConstraints() = default; + SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_) { } @@ -199,10 +201,13 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh } }; - if (reaction == THROW_ON_VIOLATION) - manager->checkSettingNameIsAllowed(setting_name); - else if (!manager->isSettingNameAllowed(setting_name)) - return false; + if (manager) + { + if (reaction == THROW_ON_VIOLATION) + manager->checkSettingNameIsAllowed(setting_name); + else if (!manager->isSettingNameAllowed(setting_name)) + return false; + } Field current_value, new_value; if (current_settings.tryGet(setting_name, current_value)) diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index 4259fe15e25..cdec2bb293c 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -51,6 +51,7 @@ class AccessControlManager; class SettingsConstraints { public: + SettingsConstraints(); SettingsConstraints(const AccessControlManager & manager_); SettingsConstraints(const SettingsConstraints & src); SettingsConstraints & operator =(const SettingsConstraints & src); diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp new file mode 100644 index 00000000000..3d7b396a6f2 --- /dev/null +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -0,0 +1,94 @@ +#include +#include + +using namespace DB; + + +TEST(AccessRights, Union) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs.clear(); + rhs.clear(); + rhs.grant(AccessType::SELECT, "db2"); + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1, col2, col3) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1) ON db1.tb1, GRANT SELECT(col2, col3) ON db1.tb1 WITH GRANT OPTION"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); +} + + +TEST(AccessRights, Intersection) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs.clear(); + rhs.clear(); + lhs.grant(AccessType::SELECT, "db2"); + rhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON db1.*"); +} diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 4be2455d71e..3355cb0d6fc 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -101,6 +101,24 @@ struct AggregateFunctionSumData { const auto * end = ptr + count; + if constexpr ( + (is_integer_v && !is_big_int_v) + || (IsDecimalNumber && !std::is_same_v && !std::is_same_v)) + { + /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null) + /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I + T local_sum{}; + while (ptr < end) + { + T multiplier = !*null_map; + Impl::add(local_sum, *ptr * multiplier); + ++ptr; + ++null_map; + } + Impl::add(sum, local_sum); + return; + } + if constexpr (std::is_floating_point_v) { constexpr size_t unroll_count = 128 / sizeof(T); diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 03327f76e48..b103f42fcc5 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -459,6 +459,8 @@ public: explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {} bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (NegativeInfinity &) const { throw Exception("Cannot compare -Inf", ErrorCodes::LOGICAL_ERROR); } + bool operator() (PositiveInfinity &) const { throw Exception("Cannot compare +Inf", ErrorCodes::LOGICAL_ERROR); } bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array & x) const { return compareImpl(x); } @@ -494,6 +496,8 @@ public: explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {} bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (NegativeInfinity &) const { throw Exception("Cannot compare -Inf", ErrorCodes::LOGICAL_ERROR); } + bool operator() (PositiveInfinity &) const { throw Exception("Cannot compare +Inf", ErrorCodes::LOGICAL_ERROR); } bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array & x) const { return compareImpl(x); } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 272bea4f6d7..31286c740d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -76,6 +76,10 @@ add_headers_and_sources(clickhouse_common_io IO) add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) +if (USE_SQLITE) + add_headers_and_sources(dbms Databases/SQLite) +endif() + if(USE_RDKAFKA) add_headers_and_sources(dbms Storages/Kafka) endif() @@ -415,6 +419,11 @@ if (USE_AWS_S3) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${AWS_S3_INCLUDE_DIR}) endif() +if (USE_S2_GEOMETRY) + dbms_target_link_libraries (PUBLIC ${S2_GEOMETRY_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) +endif() + if (USE_BROTLI) target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) @@ -425,6 +434,10 @@ if (USE_AMQPCPP) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${AMQPCPP_INCLUDE_DIR}) endif() +if (USE_SQLITE) + dbms_target_link_libraries(PUBLIC sqlite) +endif() + if (USE_CASSANDRA) dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY}) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8455ef3117e..0c461d2f399 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -353,6 +353,11 @@ bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLoc if (offset_states[location.offset].active_connection_count == 0 && !offset_states[location.offset].next_replica_in_process) throw NetException("Receive timeout expired", ErrorCodes::SOCKET_TIMEOUT); } + else if (std::holds_alternative(res)) + { + finishProcessReplica(replica_state, true); + std::rethrow_exception(std::move(std::get(res))); + } return false; } diff --git a/src/Client/PacketReceiver.h b/src/Client/PacketReceiver.h index 516491db994..ca0d62f0257 100644 --- a/src/Client/PacketReceiver.h +++ b/src/Client/PacketReceiver.h @@ -31,7 +31,7 @@ public: } /// Resume packet receiving. - std::variant resume() + std::variant resume() { /// If there is no pending data, check receive timeout. if (!connection->hasReadPendingData() && !checkReceiveTimeout()) @@ -43,7 +43,7 @@ public: /// Resume fiber. fiber = std::move(fiber).resume(); if (exception) - std::rethrow_exception(std::move(exception)); + return std::move(exception); if (is_read_in_process) return epoll.getFileDescriptor(); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 62524315354..dec93fc7a30 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -546,97 +546,54 @@ namespace { /// The following function implements a slightly more general version -/// of getExtremes() than the implementation from ColumnVector. +/// of getExtremes() than the implementation from Not-Null IColumns. /// It takes into account the possible presence of nullable values. -template -void getExtremesFromNullableContent(const ColumnVector & col, const NullMap & null_map, Field & min, Field & max) +void getExtremesWithNulls(const IColumn & nested_column, const NullMap & null_array, Field & min, Field & max, bool null_last = false) { - const auto & data = col.getData(); - size_t size = data.size(); - - if (size == 0) + size_t number_of_nulls = 0; + size_t n = null_array.size(); + NullMap not_null_array(n); + for (auto i = 0ul; i < n; ++i) { - min = Null(); - max = Null(); - return; - } - - bool has_not_null = false; - bool has_not_nan = false; - - T cur_min = 0; - T cur_max = 0; - - for (size_t i = 0; i < size; ++i) - { - const T x = data[i]; - - if (null_map[i]) - continue; - - if (!has_not_null) + if (null_array[i]) { - cur_min = x; - cur_max = x; - has_not_null = true; - has_not_nan = !isNaN(x); - continue; + ++number_of_nulls; + not_null_array[i] = 0; } - - if (isNaN(x)) - continue; - - if (!has_not_nan) + else { - cur_min = x; - cur_max = x; - has_not_nan = true; - continue; + not_null_array[i] = 1; } - - if (x < cur_min) - cur_min = x; - else if (x > cur_max) - cur_max = x; } - - if (has_not_null) + if (number_of_nulls == 0) { - min = cur_min; - max = cur_max; + nested_column.getExtremes(min, max); + } + else if (number_of_nulls == n) + { + min = PositiveInfinity(); + max = PositiveInfinity(); + } + else + { + auto filtered_column = nested_column.filter(not_null_array, -1); + filtered_column->getExtremes(min, max); + if (null_last) + max = PositiveInfinity(); } } - } void ColumnNullable::getExtremes(Field & min, Field & max) const { - min = Null(); - max = Null(); + getExtremesWithNulls(getNestedColumn(), getNullMapData(), min, max); +} - const auto & null_map_data = getNullMapData(); - if (const auto * col_i8 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_i8, null_map_data, min, max); - else if (const auto * col_i16 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_i16, null_map_data, min, max); - else if (const auto * col_i32 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_i32, null_map_data, min, max); - else if (const auto * col_i64 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_i64, null_map_data, min, max); - else if (const auto * col_u8 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_u8, null_map_data, min, max); - else if (const auto * col_u16 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_u16, null_map_data, min, max); - else if (const auto * col_u32 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_u32, null_map_data, min, max); - else if (const auto * col_u64 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_u64, null_map_data, min, max); - else if (const auto * col_f32 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_f32, null_map_data, min, max); - else if (const auto * col_f64 = typeid_cast(nested_column.get())) - getExtremesFromNullableContent(*col_f64, null_map_data, min, max); +void ColumnNullable::getExtremesNullLast(Field & min, Field & max) const +{ + getExtremesWithNulls(getNestedColumn(), getNullMapData(), min, max, true); } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 963b3e1e8fa..7b339893ff4 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -111,6 +111,8 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; + // Special function for nullable minmax index + void getExtremesNullLast(Field & min, Field & max) const; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override { diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 8b006bc550d..4fe0f0bb8c8 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -109,11 +109,23 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) /// It should not affect client address checking, since client cannot connect from IPv6 address /// if server has no IPv6 addresses. flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG; + + DNSResolver::IPAddresses addresses; + + try + { #if defined(ARCADIA_BUILD) - auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); + addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); #else - auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); + addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); #endif + } + catch (const Poco::Net::DNSException & e) + { + LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.message()); + addresses.clear(); + } + if (addresses.empty()) throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 8301ea656bf..7904d0ac61d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -558,6 +558,9 @@ M(588, DISTRIBUTED_BROKEN_BATCH_INFO) \ M(589, DISTRIBUTED_BROKEN_BATCH_FILES) \ M(590, CANNOT_SYSCONF) \ + M(591, SQLITE_ENGINE_ERROR) \ + M(592, DATA_ENCRYPTION_ERROR) \ + M(593, ZERO_COPY_REPLICATION_ERROR) \ \ M(998, POSTGRESQL_CONNECTION_FAILURE) \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index e98cd3c3046..641f8bbe0f0 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -313,7 +313,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded try { stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() - << ", e.displayText() = " << e.displayText() + << ", " << e.displayText() << (with_stacktrace ? ", Stack trace (when copying this message, always include the lines below):\n\n" + getExceptionStackTraceString(e) : "") << (with_extra_info ? getExtraExceptionInfo(e) : "") << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; @@ -433,7 +433,12 @@ std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool } } - stream << "Code: " << e.code() << ", e.displayText() = " << text; + stream << "Code: " << e.code() << ". " << text; + + if (!text.empty() && text.back() != '.') + stream << '.'; + + stream << " (" << ErrorCodes::getName(e.code()) << ")"; if (with_stacktrace && !has_embedded_stack_trace) stream << ", Stack trace (when copying this message, always include the lines below):\n\n" << e.getStackTraceString(); diff --git a/src/Common/FieldVisitorConvertToNumber.h b/src/Common/FieldVisitorConvertToNumber.h index 0f099c6215d..82a804691d7 100644 --- a/src/Common/FieldVisitorConvertToNumber.h +++ b/src/Common/FieldVisitorConvertToNumber.h @@ -26,6 +26,16 @@ public: throw Exception("Cannot convert NULL to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); } + T operator() (const NegativeInfinity &) const + { + throw Exception("Cannot convert -Inf to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); + } + + T operator() (const PositiveInfinity &) const + { + throw Exception("Cannot convert +Inf to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); + } + T operator() (const String &) const { throw Exception("Cannot convert String to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); diff --git a/src/Common/FieldVisitorDump.cpp b/src/Common/FieldVisitorDump.cpp index e6726a4502e..5e767cf30c1 100644 --- a/src/Common/FieldVisitorDump.cpp +++ b/src/Common/FieldVisitorDump.cpp @@ -25,6 +25,8 @@ static inline void writeQuoted(const DecimalField & x, WriteBuffer & buf) } String FieldVisitorDump::operator() (const Null &) const { return "NULL"; } +String FieldVisitorDump::operator() (const NegativeInfinity &) const { return "-Inf"; } +String FieldVisitorDump::operator() (const PositiveInfinity &) const { return "+Inf"; } String FieldVisitorDump::operator() (const UInt64 & x) const { return formatQuotedWithPrefix(x, "UInt64_"); } String FieldVisitorDump::operator() (const Int64 & x) const { return formatQuotedWithPrefix(x, "Int64_"); } String FieldVisitorDump::operator() (const Float64 & x) const { return formatQuotedWithPrefix(x, "Float64_"); } diff --git a/src/Common/FieldVisitorDump.h b/src/Common/FieldVisitorDump.h index 22e34d66ff7..bc82d35f0f1 100644 --- a/src/Common/FieldVisitorDump.h +++ b/src/Common/FieldVisitorDump.h @@ -10,6 +10,8 @@ class FieldVisitorDump : public StaticVisitor { public: String operator() (const Null & x) const; + String operator() (const NegativeInfinity & x) const; + String operator() (const PositiveInfinity & x) const; String operator() (const UInt64 & x) const; String operator() (const UInt128 & x) const; String operator() (const UInt256 & x) const; diff --git a/src/Common/FieldVisitorHash.cpp b/src/Common/FieldVisitorHash.cpp index 80d5f2daf65..259dd871d20 100644 --- a/src/Common/FieldVisitorHash.cpp +++ b/src/Common/FieldVisitorHash.cpp @@ -14,6 +14,18 @@ void FieldVisitorHash::operator() (const Null &) const hash.update(type); } +void FieldVisitorHash::operator() (const NegativeInfinity &) const +{ + UInt8 type = Field::Types::NegativeInfinity; + hash.update(type); +} + +void FieldVisitorHash::operator() (const PositiveInfinity &) const +{ + UInt8 type = Field::Types::PositiveInfinity; + hash.update(type); +} + void FieldVisitorHash::operator() (const UInt64 & x) const { UInt8 type = Field::Types::UInt64; diff --git a/src/Common/FieldVisitorHash.h b/src/Common/FieldVisitorHash.h index 6c786fda4ad..bf7c3d5004f 100644 --- a/src/Common/FieldVisitorHash.h +++ b/src/Common/FieldVisitorHash.h @@ -16,6 +16,8 @@ public: FieldVisitorHash(SipHash & hash_); void operator() (const Null & x) const; + void operator() (const NegativeInfinity & x) const; + void operator() (const PositiveInfinity & x) const; void operator() (const UInt64 & x) const; void operator() (const UInt128 & x) const; void operator() (const UInt256 & x) const; diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index 0064830c08a..e0ffca28341 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -22,6 +22,8 @@ bool FieldVisitorSum::operator() (UInt64 & x) const bool FieldVisitorSum::operator() (Float64 & x) const { x += get(rhs); return x != 0; } bool FieldVisitorSum::operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } +bool FieldVisitorSum::operator() (NegativeInfinity &) const { throw Exception("Cannot sum -Inf", ErrorCodes::LOGICAL_ERROR); } +bool FieldVisitorSum::operator() (PositiveInfinity &) const { throw Exception("Cannot sum +Inf", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } bool FieldVisitorSum::operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Common/FieldVisitorSum.h b/src/Common/FieldVisitorSum.h index e208933043b..4c34fa86455 100644 --- a/src/Common/FieldVisitorSum.h +++ b/src/Common/FieldVisitorSum.h @@ -21,6 +21,8 @@ public: bool operator() (UInt64 & x) const; bool operator() (Float64 & x) const; bool operator() (Null &) const; + bool operator() (NegativeInfinity & x) const; + bool operator() (PositiveInfinity & x) const; bool operator() (String &) const; bool operator() (Array &) const; bool operator() (Tuple &) const; diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index 45bc54f2c2a..74dfc55e1db 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -53,6 +53,8 @@ static String formatFloat(const Float64 x) String FieldVisitorToString::operator() (const Null &) const { return "NULL"; } +String FieldVisitorToString::operator() (const NegativeInfinity &) const { return "-Inf"; } +String FieldVisitorToString::operator() (const PositiveInfinity &) const { return "+Inf"; } String FieldVisitorToString::operator() (const UInt64 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const Int64 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const Float64 & x) const { return formatFloat(x); } diff --git a/src/Common/FieldVisitorToString.h b/src/Common/FieldVisitorToString.h index 39709f1c272..139f011927f 100644 --- a/src/Common/FieldVisitorToString.h +++ b/src/Common/FieldVisitorToString.h @@ -10,6 +10,8 @@ class FieldVisitorToString : public StaticVisitor { public: String operator() (const Null & x) const; + String operator() (const NegativeInfinity & x) const; + String operator() (const PositiveInfinity & x) const; String operator() (const UInt64 & x) const; String operator() (const UInt128 & x) const; String operator() (const UInt256 & x) const; diff --git a/src/Common/FieldVisitorWriteBinary.cpp b/src/Common/FieldVisitorWriteBinary.cpp index 8e991ad13d3..56df9f1e43a 100644 --- a/src/Common/FieldVisitorWriteBinary.cpp +++ b/src/Common/FieldVisitorWriteBinary.cpp @@ -7,6 +7,8 @@ namespace DB { void FieldVisitorWriteBinary::operator() (const Null &, WriteBuffer &) const { } +void FieldVisitorWriteBinary::operator() (const NegativeInfinity &, WriteBuffer &) const { } +void FieldVisitorWriteBinary::operator() (const PositiveInfinity &, WriteBuffer &) const { } void FieldVisitorWriteBinary::operator() (const UInt64 & x, WriteBuffer & buf) const { writeVarUInt(x, buf); } void FieldVisitorWriteBinary::operator() (const Int64 & x, WriteBuffer & buf) const { writeVarInt(x, buf); } void FieldVisitorWriteBinary::operator() (const Float64 & x, WriteBuffer & buf) const { writeFloatBinary(x, buf); } diff --git a/src/Common/FieldVisitorWriteBinary.h b/src/Common/FieldVisitorWriteBinary.h index ae864ca74f3..5f7bf578e32 100644 --- a/src/Common/FieldVisitorWriteBinary.h +++ b/src/Common/FieldVisitorWriteBinary.h @@ -9,6 +9,8 @@ class FieldVisitorWriteBinary { public: void operator() (const Null & x, WriteBuffer & buf) const; + void operator() (const NegativeInfinity & x, WriteBuffer & buf) const; + void operator() (const PositiveInfinity & x, WriteBuffer & buf) const; void operator() (const UInt64 & x, WriteBuffer & buf) const; void operator() (const UInt128 & x, WriteBuffer & buf) const; void operator() (const UInt256 & x, WriteBuffer & buf) const; diff --git a/src/Common/FieldVisitorsAccurateComparison.h b/src/Common/FieldVisitorsAccurateComparison.h index ba3fabd1535..9e6a93cee3f 100644 --- a/src/Common/FieldVisitorsAccurateComparison.h +++ b/src/Common/FieldVisitorsAccurateComparison.h @@ -26,8 +26,12 @@ public: template bool operator() (const T & l, const U & r) const { - if constexpr (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v + || std::is_same_v || std::is_same_v + || std::is_same_v || std::is_same_v) + { return std::is_same_v; + } else { if constexpr (std::is_same_v) @@ -77,6 +81,10 @@ public: { if constexpr (std::is_same_v || std::is_same_v) return false; + else if constexpr (std::is_same_v || std::is_same_v) + return !std::is_same_v; + else if constexpr (std::is_same_v || std::is_same_v) + return false; else { if constexpr (std::is_same_v) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 915d14466b6..f4f47148d56 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -224,7 +224,7 @@ M(PerfLocalMemoryReferences, "Local NUMA node memory reads") \ M(PerfLocalMemoryMisses, "Local NUMA node memory read misses") \ \ - M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \ + M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \ \ M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \ M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \ @@ -248,6 +248,9 @@ M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \ M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \ M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ + \ + M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \ + M(SleepFunctionMicroseconds, "Time spent sleeping due to a sleep function call.") \ namespace ProfileEvents diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index fa78f052f37..8074fabfa2d 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -375,9 +375,13 @@ void Block::setColumn(size_t position, ColumnWithTypeAndName && column) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Block::setColumn(), max position {}", position, toString(data.size())); - data[position].name = std::move(column.name); - data[position].type = std::move(column.type); - data[position].column = std::move(column.column); + if (data[position].name != column.name) + { + index_by_name.erase(data[position].name); + index_by_name.emplace(column.name, position); + } + + data[position] = std::move(column); } @@ -436,7 +440,7 @@ Block Block::sortColumns() const Block sorted_block; /// std::unordered_map (index_by_name) cannot be used to guarantee the sort order - std::vector sorted_index_by_name(index_by_name.size()); + std::vector sorted_index_by_name(index_by_name.size()); { size_t i = 0; for (auto it = index_by_name.begin(); it != index_by_name.end(); ++it) diff --git a/src/Core/Block.h b/src/Core/Block.h index a21bd290571..fb94a205bf5 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -68,7 +68,7 @@ public: const_cast(this)->findByName(name)); } - const ColumnWithTypeAndName* findByName(const std::string & name) const; + const ColumnWithTypeAndName * findByName(const std::string & name) const; ColumnWithTypeAndName & getByName(const std::string & name) { diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index e625c92f826..b7b03951ac9 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -455,6 +455,16 @@ inline void writeText(const Null &, WriteBuffer & buf) writeText(std::string("NULL"), buf); } +inline void writeText(const NegativeInfinity &, WriteBuffer & buf) +{ + writeText(std::string("-Inf"), buf); +} + +inline void writeText(const PositiveInfinity &, WriteBuffer & buf) +{ + writeText(std::string("+Inf"), buf); +} + String toString(const Field & x) { return Field::dispatch( diff --git a/src/Core/Field.h b/src/Core/Field.h index 23569f5f9f1..744675d6e86 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -218,6 +218,8 @@ template <> struct NearestFieldTypeImpl { using Type = Tuple; }; template <> struct NearestFieldTypeImpl { using Type = Map; }; template <> struct NearestFieldTypeImpl { using Type = UInt64; }; template <> struct NearestFieldTypeImpl { using Type = Null; }; +template <> struct NearestFieldTypeImpl { using Type = NegativeInfinity; }; +template <> struct NearestFieldTypeImpl { using Type = PositiveInfinity; }; template <> struct NearestFieldTypeImpl { using Type = AggregateFunctionStateData; }; @@ -269,6 +271,10 @@ public: Int256 = 25, Map = 26, UUID = 27, + + // Special types for index analysis + NegativeInfinity = 254, + PositiveInfinity = 255, }; static const char * toString(Which which) @@ -276,6 +282,8 @@ public: switch (which) { case Null: return "Null"; + case NegativeInfinity: return "-Inf"; + case PositiveInfinity: return "+Inf"; case UInt64: return "UInt64"; case UInt128: return "UInt128"; case UInt256: return "UInt256"; @@ -404,7 +412,10 @@ public: Types::Which getType() const { return which; } const char * getTypeName() const { return Types::toString(which); } - bool isNull() const { return which == Types::Null; } + // Non-valued field are all denoted as Null + bool isNull() const { return which == Types::Null || which == Types::NegativeInfinity || which == Types::PositiveInfinity; } + bool isNegativeInfinity() const { return which == Types::NegativeInfinity; } + bool isPositiveInfinity() const { return which == Types::PositiveInfinity; } template @@ -459,7 +470,10 @@ public: switch (which) { - case Types::Null: return false; + case Types::Null: + case Types::NegativeInfinity: + case Types::PositiveInfinity: + return false; case Types::UInt64: return get() < rhs.get(); case Types::UInt128: return get() < rhs.get(); case Types::UInt256: return get() < rhs.get(); @@ -496,7 +510,10 @@ public: switch (which) { - case Types::Null: return true; + case Types::Null: + case Types::NegativeInfinity: + case Types::PositiveInfinity: + return true; case Types::UInt64: return get() <= rhs.get(); case Types::UInt128: return get() <= rhs.get(); case Types::UInt256: return get() <= rhs.get(); @@ -533,8 +550,11 @@ public: switch (which) { - case Types::Null: return true; - case Types::UInt64: return get() == rhs.get(); + case Types::Null: + case Types::NegativeInfinity: + case Types::PositiveInfinity: + return true; + case Types::UInt64: return get() == rhs.get(); case Types::Int64: return get() == rhs.get(); case Types::Float64: { @@ -573,6 +593,8 @@ public: switch (field.which) { case Types::Null: return f(field.template get()); + case Types::NegativeInfinity: return f(field.template get()); + case Types::PositiveInfinity: return f(field.template get()); // gcc 8.2.1 #if !defined(__clang__) #pragma GCC diagnostic push @@ -731,6 +753,8 @@ using Row = std::vector; template <> struct Field::TypeToEnum { static const Types::Which value = Types::Null; }; +template <> struct Field::TypeToEnum { static const Types::Which value = Types::NegativeInfinity; }; +template <> struct Field::TypeToEnum { static const Types::Which value = Types::PositiveInfinity; }; template <> struct Field::TypeToEnum { static const Types::Which value = Types::UInt64; }; template <> struct Field::TypeToEnum { static const Types::Which value = Types::UInt128; }; template <> struct Field::TypeToEnum { static const Types::Which value = Types::UInt256; }; @@ -751,6 +775,8 @@ template <> struct Field::TypeToEnum>{ static const Typ template <> struct Field::TypeToEnum{ static const Types::Which value = Types::AggregateFunctionState; }; template <> struct Field::EnumToType { using Type = Null; }; +template <> struct Field::EnumToType { using Type = NegativeInfinity; }; +template <> struct Field::EnumToType { using Type = PositiveInfinity; }; template <> struct Field::EnumToType { using Type = UInt64; }; template <> struct Field::EnumToType { using Type = UInt128; }; template <> struct Field::EnumToType { using Type = UInt256; }; diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp index 3650818c543..26535f05be7 100644 --- a/src/Core/MySQL/MySQLClient.cpp +++ b/src/Core/MySQL/MySQLClient.cpp @@ -24,14 +24,14 @@ namespace ErrorCodes } MySQLClient::MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_) - : host(host_), port(port_), user(user_), password(std::move(password_)) + : host(host_), port(port_), user(user_), password(std::move(password_)), + client_capabilities(CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION) { - client_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION; } MySQLClient::MySQLClient(MySQLClient && other) : host(std::move(other.host)), port(other.port), user(std::move(other.user)), password(std::move(other.password)) - , client_capability_flags(other.client_capability_flags) + , client_capabilities(other.client_capabilities) { } @@ -56,7 +56,8 @@ void MySQLClient::connect() in = std::make_shared(*socket); out = std::make_shared(*socket); - packet_endpoint = std::make_shared(*in, *out, seq); + packet_endpoint = MySQLProtocol::PacketEndpoint::create(*in, *out, sequence_id); + handshake(); } @@ -68,7 +69,7 @@ void MySQLClient::disconnect() socket->close(); socket = nullptr; connected = false; - seq = 0; + sequence_id = 0; } /// https://dev.mysql.com/doc/internals/en/connection-phase-packets.html @@ -87,10 +88,10 @@ void MySQLClient::handshake() String auth_plugin_data = native41.getAuthPluginData(); HandshakeResponse handshake_response( - client_capability_flags, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password); + client_capabilities, MAX_PACKET_LENGTH, charset_utf8, user, "", auth_plugin_data, mysql_native_password); packet_endpoint->sendPacket(handshake_response, true); - ResponsePacket packet_response(client_capability_flags, true); + ResponsePacket packet_response(client_capabilities, true); packet_endpoint->receivePacket(packet_response); packet_endpoint->resetSequenceId(); @@ -105,7 +106,7 @@ void MySQLClient::writeCommand(char command, String query) WriteCommand write_command(command, query); packet_endpoint->sendPacket(write_command, true); - ResponsePacket packet_response(client_capability_flags); + ResponsePacket packet_response(client_capabilities); packet_endpoint->receivePacket(packet_response); switch (packet_response.getType()) { @@ -124,7 +125,7 @@ void MySQLClient::registerSlaveOnMaster(UInt32 slave_id) RegisterSlave register_slave(slave_id); packet_endpoint->sendPacket(register_slave, true); - ResponsePacket packet_response(client_capability_flags); + ResponsePacket packet_response(client_capabilities); packet_endpoint->receivePacket(packet_response); packet_endpoint->resetSequenceId(); if (packet_response.getType() == PACKET_ERR) diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h index e503c985584..2c93fc888a3 100644 --- a/src/Core/MySQL/MySQLClient.h +++ b/src/Core/MySQL/MySQLClient.h @@ -45,9 +45,9 @@ private: String password; bool connected = false; - UInt32 client_capability_flags = 0; + uint8_t sequence_id = 0; + uint32_t client_capabilities = 0; - uint8_t seq = 0; const UInt8 charset_utf8 = 33; const String mysql_native_password = "mysql_native_password"; diff --git a/src/Core/MySQL/PacketEndpoint.h b/src/Core/MySQL/PacketEndpoint.h index d027934eafb..df81f49fefb 100644 --- a/src/Core/MySQL/PacketEndpoint.h +++ b/src/Core/MySQL/PacketEndpoint.h @@ -5,6 +5,7 @@ #include "IMySQLReadPacket.h" #include "IMySQLWritePacket.h" #include "IO/MySQLPacketPayloadReadBuffer.h" +#include namespace DB { @@ -15,19 +16,13 @@ namespace MySQLProtocol /* Writes and reads packets, keeping sequence-id. * Throws ProtocolError, if packet with incorrect sequence-id was received. */ -class PacketEndpoint +class PacketEndpoint : public shared_ptr_helper { public: uint8_t & sequence_id; ReadBuffer * in; WriteBuffer * out; - /// For writing. - PacketEndpoint(WriteBuffer & out_, uint8_t & sequence_id_); - - /// For reading and writing. - PacketEndpoint(ReadBuffer & in_, WriteBuffer & out_, uint8_t & sequence_id_); - MySQLPacketPayloadReadBuffer getPayload(); void receivePacket(IMySQLReadPacket & packet); @@ -48,8 +43,19 @@ public: /// Converts packet to text. Is used for debug output. static String packetToText(const String & payload); + +protected: + /// For writing. + PacketEndpoint(WriteBuffer & out_, uint8_t & sequence_id_); + + /// For reading and writing. + PacketEndpoint(ReadBuffer & in_, WriteBuffer & out_, uint8_t & sequence_id_); + + friend struct shared_ptr_helper; }; +using PacketEndpointPtr = std::shared_ptr; + } } diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index 57d29c96c53..91191c73fd0 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -161,18 +162,24 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const { - std::unordered_map self_columns; + /// NOTE: It's better to make a map in `IStorage` than to create it here every time again. +#if !defined(ARCADIA_BUILD) + google::dense_hash_map types; +#else + google::sparsehash::dense_hash_map types; +#endif + types.set_empty_key(StringRef()); for (const auto & column : *this) - self_columns[column.name] = &column; + types[column.name] = &column.type; NamesAndTypesList res; for (const String & name : names) { - auto it = self_columns.find(name); - if (it == self_columns.end()) + auto it = types.find(name); + if (it == types.end()) throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN); - res.emplace_back(*it->second); + res.emplace_back(name, *it->second); } return res; diff --git a/src/Core/PostgreSQL/Connection.cpp b/src/Core/PostgreSQL/Connection.cpp index c423d75981e..e5c61c19963 100644 --- a/src/Core/PostgreSQL/Connection.cpp +++ b/src/Core/PostgreSQL/Connection.cpp @@ -1,4 +1,7 @@ #include "Connection.h" + +#if USE_LIBPQXX + #include namespace postgres @@ -72,3 +75,5 @@ void Connection::connect() updateConnection(); } } + +#endif diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index e01de419c17..681681a38bf 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -1,5 +1,11 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + #include // Y_IGNORE #include #include @@ -45,3 +51,5 @@ private: Poco::Logger * log; }; } + +#endif diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 98ab7df182d..cbdde7062b5 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -1,5 +1,11 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + #include // Y_IGNORE #include #include @@ -35,3 +41,5 @@ private: using ConnectionHolderPtr = std::unique_ptr; } + +#endif diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 6bf756b8a12..b8b8e78396c 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -1,4 +1,7 @@ #include "PoolWithFailover.h" + +#if USE_LIBPQXX + #include "Utils.h" #include #include @@ -136,3 +139,5 @@ ConnectionHolderPtr PoolWithFailover::get() throw DB::Exception(DB::ErrorCodes::POSTGRESQL_CONNECTION_FAILURE, "Unable to connect to any of the replicas"); } } + +#endif diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index f4ae2c6cd1b..9150262e242 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -1,5 +1,12 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + + #include "ConnectionHolder.h" #include #include @@ -63,3 +70,5 @@ private: using PoolWithFailoverPtr = std::shared_ptr; } + +#endif diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 98e76da99d2..ebfdacd0fea 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -1,4 +1,7 @@ #include "Utils.h" + +#if USE_LIBPQXX + #include namespace postgres @@ -17,3 +20,5 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S } } + +#endif diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index 34d66fefb70..4a58fcffb9a 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -1,5 +1,11 @@ #pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_LIBPQXX + #include // Y_IGNORE #include #include "Connection.h" @@ -15,3 +21,5 @@ namespace postgres { ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password); } + +#endif diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index e606300fc37..19560cec9ea 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,16 @@ void insertPostgreSQLValue( assert_cast(column).insertValue(time); break; } - case ExternalResultDescription::ValueType::vtDateTime64:[[fallthrough]]; + case ExternalResultDescription::ValueType::vtDateTime64: + { + ReadBufferFromString in(value); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); + if (time < 0) + time = 0; + assert_cast &>(column).insertValue(time); + break; + } case ExternalResultDescription::ValueType::vtDecimal32: [[fallthrough]]; case ExternalResultDescription::ValueType::vtDecimal64: [[fallthrough]]; case ExternalResultDescription::ValueType::vtDecimal128: [[fallthrough]]; @@ -204,6 +214,18 @@ void preparePostgreSQLArrayInfo( ReadBufferFromString in(field); time_t time = 0; readDateTimeText(time, in, assert_cast(nested.get())->getTimeZone()); + if (time < 0) + time = 0; + return time; + }; + else if (which.isDateTime64()) + parser = [nested](std::string & field) -> Field + { + ReadBufferFromString in(field); + DateTime64 time = 0; + readDateTime64Text(time, 6, in, assert_cast(nested.get())->getTimeZone()); + if (time < 0) + time = 0; return time; }; else if (which.isDecimal32()) diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.h b/src/Core/PostgreSQL/insertPostgreSQLValue.h index 7acba4f09bd..4ed3eb95aac 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.h +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.h @@ -7,7 +7,6 @@ #if USE_LIBPQXX #include -#include #include #include diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8c733415dec..55566e2e7a4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -57,7 +57,7 @@ class IColumn; M(Seconds, tcp_keep_alive_timeout, 0, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \ M(Milliseconds, hedged_connection_timeout_ms, DBMS_DEFAULT_HEDGED_CONNECTION_TIMEOUT_MS, "Connection timeout for establishing connection with replica for Hedged requests", 0) \ M(Milliseconds, receive_data_timeout_ms, DBMS_DEFAULT_RECEIVE_DATA_TIMEOUT_MS, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \ - M(Bool, use_hedged_requests, false, "Use hedged requests for distributed queries", 0) \ + M(Bool, use_hedged_requests, true, "Use hedged requests for distributed queries", 0) \ M(Bool, allow_changing_replica_until_first_data_packet, false, "Allow HedgedConnections to change replica until receiving first data packet", 0) \ M(Milliseconds, queue_max_wait_ms, 0, "The wait time in the request queue, if the number of concurrent requests exceeds the maximum.", 0) \ M(Milliseconds, connection_pool_max_wait_ms, 0, "The wait time when the connection pool is full.", 0) \ @@ -482,6 +482,8 @@ class IColumn; M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ + M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ \ @@ -524,6 +526,7 @@ class IColumn; M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ + M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ \ diff --git a/src/Core/Types.h b/src/Core/Types.h index 5496f09f3d3..b5f3c1bff9f 100644 --- a/src/Core/Types.h +++ b/src/Core/Types.h @@ -14,6 +14,8 @@ namespace DB /// Data types for representing elementary values from a database in RAM. struct Null {}; +struct NegativeInfinity {}; +struct PositiveInfinity {}; /// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776 #if !defined(__clang__) diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index b0420073998..d3348466369 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -73,6 +73,7 @@ bool callOnBasicType(TypeIndex number, F && f) switch (number) { case TypeIndex::Date: return f(TypePair()); + case TypeIndex::Date32: return f(TypePair()); case TypeIndex::DateTime: return f(TypePair()); case TypeIndex::DateTime64: return f(TypePair()); default: @@ -142,6 +143,7 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) switch (type_num1) { case TypeIndex::Date: return callOnBasicType(type_num2, std::forward(f)); + case TypeIndex::Date32: return callOnBasicType(type_num2, std::forward(f)); case TypeIndex::DateTime: return callOnBasicType(type_num2, std::forward(f)); case TypeIndex::DateTime64: return callOnBasicType(type_num2, std::forward(f)); default: @@ -154,6 +156,7 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) class DataTypeDate; +class DataTypeDate32; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; @@ -192,7 +195,7 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args) case TypeIndex::Decimal256: return f(TypePair, T>(), std::forward(args)...); case TypeIndex::Date: return f(TypePair(), std::forward(args)...); - case TypeIndex::Date32: return f(TypePair(), std::forward(args)...); + case TypeIndex::Date32: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime64: return f(TypePair(), std::forward(args)...); diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index e250e013913..45cbc6efe19 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -13,5 +13,6 @@ #cmakedefine01 USE_LDAP #cmakedefine01 USE_ROCKSDB #cmakedefine01 USE_LIBPQXX +#cmakedefine01 USE_SQLITE #cmakedefine01 USE_NURAFT #cmakedefine01 USE_KRB5 diff --git a/src/Core/iostream_debug_helpers.cpp b/src/Core/iostream_debug_helpers.cpp index 8ec06af049e..38e61ac4fca 100644 --- a/src/Core/iostream_debug_helpers.cpp +++ b/src/Core/iostream_debug_helpers.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -28,12 +27,6 @@ std::ostream & operator<< (std::ostream & stream, const Field & what) return stream; } -std::ostream & operator<<(std::ostream & stream, const IBlockInputStream & what) -{ - stream << "IBlockInputStream(name = " << what.getName() << ")"; - return stream; -} - std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what) { stream << "NameAndTypePair(name = " << what.name << ", type = " << what.type << ")"; diff --git a/src/Core/iostream_debug_helpers.h b/src/Core/iostream_debug_helpers.h index 7568fa6e445..f57788b63d8 100644 --- a/src/Core/iostream_debug_helpers.h +++ b/src/Core/iostream_debug_helpers.h @@ -10,9 +10,6 @@ class Field; template >> std::ostream & operator<<(std::ostream & stream, const T & what); -class IBlockInputStream; -std::ostream & operator<<(std::ostream & stream, const IBlockInputStream & what); - struct NameAndTypePair; std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what); diff --git a/src/Core/ya.make b/src/Core/ya.make index d1e352ee846..6946d7a47bb 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -31,6 +31,10 @@ SRCS( MySQL/PacketsProtocolText.cpp MySQL/PacketsReplication.cpp NamesAndTypes.cpp + PostgreSQL/Connection.cpp + PostgreSQL/PoolWithFailover.cpp + PostgreSQL/Utils.cpp + PostgreSQL/insertPostgreSQLValue.cpp PostgreSQLProtocol.cpp QueryProcessingStage.cpp Settings.cpp diff --git a/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 81be24439a5..1539e814b04 100644 --- a/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/src/DataStreams/BlocksSource.h b/src/DataStreams/BlocksSource.h index 249f089f9af..a416a48e9d2 100644 --- a/src/DataStreams/BlocksSource.h +++ b/src/DataStreams/BlocksSource.h @@ -11,7 +11,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include diff --git a/src/DataStreams/CountingBlockOutputStream.h b/src/DataStreams/CountingBlockOutputStream.h index 5c36c40c1ad..c7247b39945 100644 --- a/src/DataStreams/CountingBlockOutputStream.h +++ b/src/DataStreams/CountingBlockOutputStream.h @@ -1,6 +1,6 @@ #pragma once + #include -#include #include diff --git a/src/DataStreams/IBlockInputStream.cpp b/src/DataStreams/IBlockInputStream.cpp index a6484c41b4f..c3071cdcf20 100644 --- a/src/DataStreams/IBlockInputStream.cpp +++ b/src/DataStreams/IBlockInputStream.cpp @@ -25,7 +25,6 @@ namespace ErrorCodes extern const int TOO_MANY_BYTES; extern const int TOO_MANY_ROWS_OR_BYTES; extern const int LOGICAL_ERROR; - extern const int TOO_DEEP_PIPELINE; } @@ -357,74 +356,4 @@ Block IBlockInputStream::getExtremes() return res; } - -String IBlockInputStream::getTreeID() const -{ - WriteBufferFromOwnString s; - s << getName(); - - if (!children.empty()) - { - s << "("; - for (BlockInputStreams::const_iterator it = children.begin(); it != children.end(); ++it) - { - if (it != children.begin()) - s << ", "; - s << (*it)->getTreeID(); - } - s << ")"; - } - - return s.str(); -} - - -size_t IBlockInputStream::checkDepthImpl(size_t max_depth, size_t level) const -{ - if (children.empty()) - return 0; - - if (level > max_depth) - throw Exception("Query pipeline is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_PIPELINE); - - size_t res = 0; - for (const auto & child : children) - { - size_t child_depth = child->checkDepth(level + 1); - if (child_depth > res) - res = child_depth; - } - - return res + 1; -} - - -void IBlockInputStream::dumpTree(WriteBuffer & ostr, size_t indent, size_t multiplier) const -{ - ostr << String(indent, ' ') << getName(); - if (multiplier > 1) - ostr << " × " << multiplier; - //ostr << ": " << getHeader().dumpStructure(); - ostr << '\n'; - ++indent; - - /// If the subtree is repeated several times, then we output it once with the multiplier. - using Multipliers = std::map; - Multipliers multipliers; - - for (const auto & child : children) - ++multipliers[child->getTreeID()]; - - for (const auto & child : children) - { - String id = child->getTreeID(); - size_t & subtree_multiplier = multipliers[id]; - if (subtree_multiplier != 0) /// Already printed subtrees are marked with zero in the array of multipliers. - { - child->dumpTree(ostr, indent, subtree_multiplier); - subtree_multiplier = 0; - } - } -} - } diff --git a/src/DataStreams/IBlockInputStream.h b/src/DataStreams/IBlockInputStream.h index 090ea394fd6..8b3e2512e47 100644 --- a/src/DataStreams/IBlockInputStream.h +++ b/src/DataStreams/IBlockInputStream.h @@ -23,15 +23,6 @@ namespace ErrorCodes class ProcessListElement; class EnabledQuota; class QueryStatus; -struct SortColumnDescription; -using SortDescription = std::vector; - -/** Callback to track the progress of the query. - * Used in IBlockInputStream and Context. - * The function takes the number of rows in the last block, the number of bytes in the last block. - * Note that the callback can be called from different threads. - */ -using ProgressCallback = std::function; /** The stream interface for reading data by blocks from the database. @@ -93,15 +84,6 @@ public: */ virtual void readSuffix(); - /// Must be called before `read()` and `readPrefix()`. - void dumpTree(WriteBuffer & ostr, size_t indent = 0, size_t multiplier = 1) const; - - /** Check the depth of the pipeline. - * If max_depth is specified and the `depth` is greater - throw an exception. - * Must be called before `read()` and `readPrefix()`. - */ - size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); } - /// Do not allow to change the table while the blocks stream and its children are alive. void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } @@ -269,9 +251,6 @@ private: size_t checkDepthImpl(size_t max_depth, size_t level) const; - /// Get text with names of this source and the entire subtree. - String getTreeID() const; - template void forEachChild(F && f) { diff --git a/src/DataStreams/LazyBlockInputStream.h b/src/DataStreams/LazyBlockInputStream.h deleted file mode 100644 index 37089c9bb5b..00000000000 --- a/src/DataStreams/LazyBlockInputStream.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** Initialize another source on the first `read` call, and then use it. - * This is needed, for example, to read from a table that will be populated - * after creation of LazyBlockInputStream object, but before the first `read` call. - */ -class LazyBlockInputStream : public IBlockInputStream -{ -public: - using Generator = std::function; - - LazyBlockInputStream(const Block & header_, Generator generator_) - : header(header_), generator(std::move(generator_)) - { - } - - LazyBlockInputStream(const char * name_, const Block & header_, Generator generator_) - : name(name_), header(header_), generator(std::move(generator_)) - { - } - - String getName() const override { return name; } - - Block getHeader() const override - { - return header; - } - - /// We call readPrefix lazily. Suppress default behaviour. - void readPrefix() override {} - -protected: - Block readImpl() override - { - if (!input) - { - input = generator(); - - if (!input) - return Block(); - - auto * p_input = dynamic_cast(input.get()); - - if (p_input) - { - /// They could have been set before, but were not passed into the `input`. - if (progress_callback) - p_input->setProgressCallback(progress_callback); - if (process_list_elem) - p_input->setProcessListElement(process_list_elem); - } - - input->readPrefix(); - - { - addChild(input); - - if (isCancelled() && p_input) - p_input->cancel(is_killed); - } - } - - return input->read(); - } - -private: - const char * name = "Lazy"; - Block header; - Generator generator; - - BlockInputStreamPtr input; -}; - -} diff --git a/src/DataStreams/NullBlockInputStream.h b/src/DataStreams/NullBlockInputStream.h deleted file mode 100644 index 2e4f78899dc..00000000000 --- a/src/DataStreams/NullBlockInputStream.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/// Empty stream of blocks of specified structure. -class NullBlockInputStream : public IBlockInputStream -{ -public: - NullBlockInputStream(const Block & header_) : header(header_) {} - - Block getHeader() const override { return header; } - String getName() const override { return "Null"; } - -private: - Block header; - - Block readImpl() override { return {}; } -}; - -} diff --git a/src/DataStreams/ParallelInputsProcessor.h b/src/DataStreams/ParallelInputsProcessor.h index 07602954223..65c7e741ec2 100644 --- a/src/DataStreams/ParallelInputsProcessor.h +++ b/src/DataStreams/ParallelInputsProcessor.h @@ -8,7 +8,6 @@ #include -#include #include #include #include diff --git a/src/DataStreams/SQLiteBlockInputStream.cpp b/src/DataStreams/SQLiteBlockInputStream.cpp new file mode 100644 index 00000000000..da7645d968d --- /dev/null +++ b/src/DataStreams/SQLiteBlockInputStream.cpp @@ -0,0 +1,163 @@ +#include "SQLiteBlockInputStream.h" + +#if USE_SQLITE +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SQLITE_ENGINE_ERROR; +} + +SQLiteBlockInputStream::SQLiteBlockInputStream( + SQLitePtr sqlite_db_, + const String & query_str_, + const Block & sample_block, + const UInt64 max_block_size_) + : query_str(query_str_) + , max_block_size(max_block_size_) + , sqlite_db(std::move(sqlite_db_)) +{ + description.init(sample_block); +} + + +void SQLiteBlockInputStream::readPrefix() +{ + sqlite3_stmt * compiled_stmt = nullptr; + int status = sqlite3_prepare_v2(sqlite_db.get(), query_str.c_str(), query_str.size() + 1, &compiled_stmt, nullptr); + + if (status != SQLITE_OK) + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, + "Cannot prepate sqlite statement. Status: {}. Message: {}", + status, sqlite3_errstr(status)); + + compiled_statement = std::unique_ptr(compiled_stmt, StatementDeleter()); +} + + +Block SQLiteBlockInputStream::readImpl() +{ + if (!compiled_statement) + return Block(); + + MutableColumns columns = description.sample_block.cloneEmptyColumns(); + size_t num_rows = 0; + + while (true) + { + int status = sqlite3_step(compiled_statement.get()); + + if (status == SQLITE_BUSY) + { + continue; + } + else if (status == SQLITE_DONE) + { + compiled_statement.reset(); + break; + } + else if (status != SQLITE_ROW) + { + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, + "Expected SQLITE_ROW status, but got status {}. Error: {}, Message: {}", + status, sqlite3_errstr(status), sqlite3_errmsg(sqlite_db.get())); + } + + int column_count = sqlite3_column_count(compiled_statement.get()); + for (const auto idx : collections::range(0, column_count)) + { + const auto & sample = description.sample_block.getByPosition(idx); + + if (sqlite3_column_type(compiled_statement.get(), idx) == SQLITE_NULL) + { + insertDefaultSQLiteValue(*columns[idx], *sample.column); + continue; + } + + if (description.types[idx].second) + { + ColumnNullable & column_nullable = assert_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), description.types[idx].first, idx); + column_nullable.getNullMapData().emplace_back(0); + } + else + { + insertValue(*columns[idx], description.types[idx].first, idx); + } + } + + if (++num_rows == max_block_size) + break; + } + + return description.sample_block.cloneWithColumns(std::move(columns)); +} + + +void SQLiteBlockInputStream::readSuffix() +{ + if (compiled_statement) + compiled_statement.reset(); +} + + +void SQLiteBlockInputStream::insertValue(IColumn & column, const ExternalResultDescription::ValueType type, size_t idx) +{ + switch (type) + { + case ValueType::vtUInt8: + assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); + break; + case ValueType::vtUInt16: + assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); + break; + case ValueType::vtUInt32: + assert_cast(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx)); + break; + case ValueType::vtUInt64: + /// There is no uint64 in sqlite3, only int and int64 + assert_cast(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx)); + break; + case ValueType::vtInt8: + assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); + break; + case ValueType::vtInt16: + assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); + break; + case ValueType::vtInt32: + assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); + break; + case ValueType::vtInt64: + assert_cast(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx)); + break; + case ValueType::vtFloat32: + assert_cast(column).insertValue(sqlite3_column_double(compiled_statement.get(), idx)); + break; + case ValueType::vtFloat64: + assert_cast(column).insertValue(sqlite3_column_double(compiled_statement.get(), idx)); + break; + default: + const char * data = reinterpret_cast(sqlite3_column_text(compiled_statement.get(), idx)); + int len = sqlite3_column_bytes(compiled_statement.get(), idx); + assert_cast(column).insertData(data, len); + break; + } +} + +} + +#endif diff --git a/src/DataStreams/SQLiteBlockInputStream.h b/src/DataStreams/SQLiteBlockInputStream.h new file mode 100644 index 00000000000..35fc4801b4b --- /dev/null +++ b/src/DataStreams/SQLiteBlockInputStream.h @@ -0,0 +1,62 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_SQLITE +#include +#include + +#include // Y_IGNORE + + +namespace DB +{ +class SQLiteBlockInputStream : public IBlockInputStream +{ +using SQLitePtr = std::shared_ptr; + +public: + SQLiteBlockInputStream(SQLitePtr sqlite_db_, + const String & query_str_, + const Block & sample_block, + UInt64 max_block_size_); + + String getName() const override { return "SQLite"; } + + Block getHeader() const override { return description.sample_block.cloneEmpty(); } + +private: + void insertDefaultSQLiteValue(IColumn & column, const IColumn & sample_column) + { + column.insertFrom(sample_column, 0); + } + + using ValueType = ExternalResultDescription::ValueType; + + struct StatementDeleter + { + void operator()(sqlite3_stmt * stmt) { sqlite3_finalize(stmt); } + }; + + void readPrefix() override; + + Block readImpl() override; + + void readSuffix() override; + + void insertValue(IColumn & column, const ExternalResultDescription::ValueType type, size_t idx); + + String query_str; + UInt64 max_block_size; + + ExternalResultDescription description; + + SQLitePtr sqlite_db; + std::unique_ptr compiled_statement; +}; + +} + +#endif diff --git a/src/DataStreams/narrowBlockInputStreams.h b/src/DataStreams/narrowBlockInputStreams.h index 97e9c164ddc..c026f5fbedf 100644 --- a/src/DataStreams/narrowBlockInputStreams.h +++ b/src/DataStreams/narrowBlockInputStreams.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index 29e6eb3afc3..e6534ebc2f7 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -41,6 +41,7 @@ SRCS( RemoteBlockOutputStream.cpp RemoteQueryExecutor.cpp RemoteQueryExecutorReadContext.cpp + SQLiteBlockInputStream.cpp SizeLimits.cpp SquashingBlockInputStream.cpp SquashingBlockOutputStream.cpp diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 17f2f8b9924..e74e4553614 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -12,6 +13,11 @@ public: TypeIndex getTypeId() const override { return TypeIndex::Date32; } const char * getFamilyName() const override { return family_name; } + Field getDefault() const override + { + return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + } + bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index d03a8867e42..1e5e4f55ea7 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -42,11 +42,23 @@ public: return it; } + /// throws exception if value is not valid const StringRef & getNameForValue(const T & value) const { return findByValue(value)->second; } + /// returns false if value is not valid + bool getNameForValue(const T & value, StringRef & result) const + { + const auto it = value_to_name_map.find(value); + if (it == std::end(value_to_name_map)) + return false; + + result = it->second; + return true; + } + T getValue(StringRef field_name, bool try_treat_as_id = false) const; template diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index c1a8cacd5c2..3c3439593ed 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -19,6 +19,7 @@ namespace DB namespace ErrorCodes { extern const int EMPTY_DATA_PASSED; + extern const int LOGICAL_ERROR; } @@ -27,6 +28,16 @@ DataTypePtr FieldToDataType::operator() (const Null &) const return std::make_shared(std::make_shared()); } +DataTypePtr FieldToDataType::operator() (const NegativeInfinity &) const +{ + throw Exception("It's invalid to have -inf literals in SQL", ErrorCodes::LOGICAL_ERROR); +} + +DataTypePtr FieldToDataType::operator() (const PositiveInfinity &) const +{ + throw Exception("It's invalid to have +inf literals in SQL", ErrorCodes::LOGICAL_ERROR); +} + DataTypePtr FieldToDataType::operator() (const UInt64 & x) const { if (x <= std::numeric_limits::max()) return std::make_shared(); diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h index ca83ce868fc..6d579b2bf65 100644 --- a/src/DataTypes/FieldToDataType.h +++ b/src/DataTypes/FieldToDataType.h @@ -21,6 +21,8 @@ class FieldToDataType : public StaticVisitor { public: DataTypePtr operator() (const Null & x) const; + DataTypePtr operator() (const NegativeInfinity & x) const; + DataTypePtr operator() (const PositiveInfinity & x) const; DataTypePtr operator() (const UInt64 & x) const; DataTypePtr operator() (const UInt128 & x) const; DataTypePtr operator() (const UInt256 & x) const; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 5eba65e39b9..c4f04282487 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -417,7 +417,7 @@ template inline bool isColumnedAsNumber(const T & data_type) { WhichDataType which(data_type); - return which.isInt() || which.isUInt() || which.isFloat() || which.isDate() || which.isDateTime() || which.isDateTime64() || which.isUUID(); + return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isUUID(); } template @@ -484,6 +484,7 @@ template class DataTypeNumber; class DataTypeDate; +class DataTypeDate32; class DataTypeDateTime; class DataTypeDateTime64; @@ -493,6 +494,7 @@ template <> inline constexpr bool IsDataTypeDecimal = true; template constexpr bool IsDataTypeNumber> = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; +template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index 06ff32d95ab..970b70f9f0b 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -29,7 +29,7 @@ namespace ErrorCodes static inline bool typeIsSigned(const IDataType & type) { WhichDataType data_type(type); - return data_type.isNativeInt() || data_type.isFloat(); + return data_type.isNativeInt() || data_type.isFloat() || data_type.isEnum(); } static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type) @@ -270,7 +270,7 @@ static inline llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builde { return llvm::ConstantInt::get(type, column.getUInt(index)); } - else if (column_data_type.isNativeInt()) + else if (column_data_type.isNativeInt() || column_data_type.isEnum()) { return llvm::ConstantInt::get(type, column.getInt(index)); } diff --git a/src/DataTypes/Serializations/SerializationIP.cpp b/src/DataTypes/Serializations/SerializationIP.cpp index ec49f960c77..14790c6b530 100644 --- a/src/DataTypes/Serializations/SerializationIP.cpp +++ b/src/DataTypes/Serializations/SerializationIP.cpp @@ -1,8 +1,11 @@ #include + #include +#include #include #include -#include +#include +#include namespace DB { diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 48b923c4756..6a1914bf046 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -1,17 +1,17 @@ #include #include -#include #include #include #include #include +#include +#include #include #include #include #include #include -#include #include #include @@ -40,6 +40,10 @@ #include #endif +#if USE_SQLITE +#include +#endif + namespace fs = std::filesystem; namespace DB @@ -100,7 +104,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String const UUID & uuid = create.uuid; bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "Lazy" || - engine_name == "Replicated" || engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL"; + engine_name == "Replicated" || engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL" || engine_name == "SQLite"; if (engine_define->engine->arguments && !engine_may_have_arguments) throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS); @@ -299,6 +303,22 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String } +#endif + +#if USE_SQLITE + else if (engine_name == "SQLite") + { + const ASTFunction * engine = engine_define->engine; + + if (!engine->arguments || engine->arguments->children.size() != 1) + throw Exception("SQLite database requires 1 argument: database path", ErrorCodes::BAD_ARGUMENTS); + + const auto & arguments = engine->arguments->children; + + String database_path = safeGetLiteralValue(arguments[0], "SQLite"); + + return std::make_shared(context, engine_define, database_path); + } #endif throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE); diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 28f9372a61e..abcb8dbb974 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -305,12 +305,12 @@ void DatabaseLazy::clearExpiredTables() const DatabaseLazyIterator::DatabaseLazyIterator(DatabaseLazy & database_, Strings && table_names_) - : database(database_) + : IDatabaseTablesIterator(database_.database_name) + , database(database_) , table_names(std::move(table_names_)) , iterator(table_names.begin()) , current_storage(nullptr) { - database_name = database.database_name; } void DatabaseLazyIterator::next() diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index ba5fa974d5c..0c8382465f7 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -45,6 +45,9 @@ public: /// - it maintains a list of tables but tables are loaded lazily). virtual const StoragePtr & table() const = 0; + IDatabaseTablesIterator(const String & database_name_) : database_name(database_name_) { } + IDatabaseTablesIterator(String && database_name_) : database_name(std::move(database_name_)) { } + virtual ~IDatabaseTablesIterator() = default; virtual UUID uuid() const { return UUIDHelpers::Nil; } @@ -52,7 +55,7 @@ public: const String & databaseName() const { assert(!database_name.empty()); return database_name; } protected: - String database_name; + const String database_name; }; /// Copies list of tables and iterates through such snapshot. @@ -64,26 +67,24 @@ private: protected: DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) + : IDatabaseTablesIterator(std::move(other.database_name)) { size_t idx = std::distance(other.tables.begin(), other.it); std::swap(tables, other.tables); other.it = other.tables.end(); it = tables.begin(); std::advance(it, idx); - database_name = std::move(other.database_name); } public: DatabaseTablesSnapshotIterator(const Tables & tables_, const String & database_name_) - : tables(tables_), it(tables.begin()) + : IDatabaseTablesIterator(database_name_), tables(tables_), it(tables.begin()) { - database_name = database_name_; } DatabaseTablesSnapshotIterator(Tables && tables_, String && database_name_) - : tables(std::move(tables_)), it(tables.begin()) + : IDatabaseTablesIterator(std::move(database_name_)), tables(std::move(tables_)), it(tables.begin()) { - database_name = std::move(database_name_); } void next() override { ++it; } diff --git a/src/Databases/MySQL/DatabaseMaterializeMySQL.h b/src/Databases/MySQL/DatabaseMaterializeMySQL.h index 74a3c06e6f0..d07810e6416 100644 --- a/src/Databases/MySQL/DatabaseMaterializeMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializeMySQL.h @@ -66,6 +66,8 @@ public: void assertCalledFromSyncThreadOrDrop(const char * method) const; void shutdownSynchronizationThread(); + + friend class DatabaseMaterializeTablesIterator; }; diff --git a/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h b/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h index 54031de40a2..a3d49077baa 100644 --- a/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h +++ b/src/Databases/MySQL/DatabaseMaterializeTablesIterator.h @@ -30,7 +30,7 @@ public: UUID uuid() const override { return nested_iterator->uuid(); } DatabaseMaterializeTablesIterator(DatabaseTablesIteratorPtr nested_iterator_, const IDatabase * database_) - : nested_iterator(std::move(nested_iterator_)), database(database_) + : IDatabaseTablesIterator(database_->getDatabaseName()), nested_iterator(std::move(nested_iterator_)), database(database_) { } diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 64d47720af9..a5eccc817d0 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -71,7 +71,7 @@ static DataTypePtr convertPostgreSQLDataType(String & type, const std::function< else if (type == "bigserial") res = std::make_shared(); else if (type.starts_with("timestamp")) - res = std::make_shared(); + res = std::make_shared(6); else if (type == "date") res = std::make_shared(); else if (type.starts_with("numeric")) diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp new file mode 100644 index 00000000000..f8e31517f77 --- /dev/null +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -0,0 +1,215 @@ +#include "DatabaseSQLite.h" + +#if USE_SQLITE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SQLITE_ENGINE_ERROR; + extern const int UNKNOWN_TABLE; +} + +DatabaseSQLite::DatabaseSQLite( + ContextPtr context_, + const ASTStorage * database_engine_define_, + const String & database_path_) + : IDatabase("SQLite") + , WithContext(context_->getGlobalContext()) + , database_engine_define(database_engine_define_->clone()) + , log(&Poco::Logger::get("DatabaseSQLite")) +{ + sqlite_db = openSQLiteDB(database_path_, context_); +} + + +bool DatabaseSQLite::empty() const +{ + std::lock_guard lock(mutex); + return fetchTablesList().empty(); +} + + +DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &) +{ + std::lock_guard lock(mutex); + + Tables tables; + auto table_names = fetchTablesList(); + for (const auto & table_name : table_names) + tables[table_name] = fetchTable(table_name, local_context, true); + + return std::make_unique(tables, database_name); +} + + +std::unordered_set DatabaseSQLite::fetchTablesList() const +{ + std::unordered_set tables; + std::string query = "SELECT name FROM sqlite_master " + "WHERE type = 'table' AND name NOT LIKE 'sqlite_%'"; + + auto callback_get_data = [](void * res, int col_num, char ** data_by_col, char ** /* col_names */) -> int + { + for (int i = 0; i < col_num; ++i) + static_cast *>(res)->insert(data_by_col[i]); + return 0; + }; + + char * err_message = nullptr; + int status = sqlite3_exec(sqlite_db.get(), query.c_str(), callback_get_data, &tables, &err_message); + if (status != SQLITE_OK) + { + String err_msg(err_message); + sqlite3_free(err_message); + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, + "Cannot fetch sqlite database tables. Error status: {}. Message: {}", + status, err_msg); + } + + return tables; +} + + +bool DatabaseSQLite::checkSQLiteTable(const String & table_name) const +{ + const String query = fmt::format("SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}';", table_name); + + auto callback_get_data = [](void * res, int, char **, char **) -> int + { + *(static_cast(res)) += 1; + return 0; + }; + + int count = 0; + char * err_message = nullptr; + int status = sqlite3_exec(sqlite_db.get(), query.c_str(), callback_get_data, &count, &err_message); + if (status != SQLITE_OK) + { + String err_msg(err_message); + sqlite3_free(err_message); + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, + "Cannot check sqlite table. Error status: {}. Message: {}", + status, err_msg); + } + + return (count != 0); +} + + +bool DatabaseSQLite::isTableExist(const String & table_name, ContextPtr) const +{ + std::lock_guard lock(mutex); + return checkSQLiteTable(table_name); +} + + +StoragePtr DatabaseSQLite::tryGetTable(const String & table_name, ContextPtr local_context) const +{ + std::lock_guard lock(mutex); + return fetchTable(table_name, local_context, false); +} + + +StoragePtr DatabaseSQLite::fetchTable(const String & table_name, ContextPtr local_context, bool table_checked) const +{ + if (!table_checked && !checkSQLiteTable(table_name)) + return StoragePtr{}; + + auto columns = fetchSQLiteTableStructure(sqlite_db.get(), table_name); + + if (!columns) + return StoragePtr{}; + + auto storage = StorageSQLite::create( + StorageID(database_name, table_name), + sqlite_db, + table_name, + ColumnsDescription{*columns}, + ConstraintsDescription{}, + local_context); + + return storage; +} + + +ASTPtr DatabaseSQLite::getCreateDatabaseQuery() const +{ + const auto & create_query = std::make_shared(); + create_query->database = getDatabaseName(); + create_query->set(create_query->storage, database_engine_define); + return create_query; +} + + +ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, ContextPtr local_context, bool throw_on_error) const +{ + auto storage = fetchTable(table_name, local_context, false); + if (!storage) + { + if (throw_on_error) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "SQLite table {}.{} does not exist", + database_name, table_name); + return nullptr; + } + + auto create_table_query = std::make_shared(); + auto table_storage_define = database_engine_define->clone(); + create_table_query->set(create_table_query->storage, table_storage_define); + + auto columns_declare_list = std::make_shared(); + auto columns_expression_list = std::make_shared(); + + columns_declare_list->set(columns_declare_list->columns, columns_expression_list); + create_table_query->set(create_table_query->columns_list, columns_declare_list); + + /// init create query. + auto table_id = storage->getStorageID(); + create_table_query->table = table_id.table_name; + create_table_query->database = table_id.database_name; + + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary()) + { + const auto & column_declaration = std::make_shared(); + column_declaration->name = column_type_and_name.name; + column_declaration->type = getColumnDeclaration(column_type_and_name.type); + columns_expression_list->children.emplace_back(column_declaration); + } + + ASTStorage * ast_storage = table_storage_define->as(); + ASTs storage_children = ast_storage->children; + auto storage_engine_arguments = ast_storage->engine->arguments; + + /// Add table_name to engine arguments + storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 1, std::make_shared(table_id.table_name)); + + return create_table_query; +} + + +ASTPtr DatabaseSQLite::getColumnDeclaration(const DataTypePtr & data_type) const +{ + WhichDataType which(data_type); + + if (which.isNullable()) + return makeASTFunction("Nullable", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + + return std::make_shared(data_type->getName()); +} + +} + +#endif diff --git a/src/Databases/SQLite/DatabaseSQLite.h b/src/Databases/SQLite/DatabaseSQLite.h new file mode 100644 index 00000000000..35b1200f397 --- /dev/null +++ b/src/Databases/SQLite/DatabaseSQLite.h @@ -0,0 +1,65 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_SQLITE +#include +#include +#include + +#include // Y_IGNORE + + +namespace DB +{ +class DatabaseSQLite final : public IDatabase, protected WithContext +{ +public: + using SQLitePtr = std::shared_ptr; + + DatabaseSQLite(ContextPtr context_, const ASTStorage * database_engine_define_, const String & database_path_); + + String getEngineName() const override { return "SQLite"; } + + bool canContainMergeTreeTables() const override { return false; } + + bool canContainDistributedTables() const override { return false; } + + bool shouldBeEmptyOnDetach() const override { return false; } + + bool isTableExist(const String & name, ContextPtr context) const override; + + StoragePtr tryGetTable(const String & name, ContextPtr context) const override; + + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override; + + bool empty() const override; + + ASTPtr getCreateDatabaseQuery() const override; + + void shutdown() override {} + +protected: + ASTPtr getCreateTableQueryImpl(const String & table_name, ContextPtr context, bool throw_on_error) const override; + +private: + ASTPtr database_engine_define; + + SQLitePtr sqlite_db; + + Poco::Logger * log; + + bool checkSQLiteTable(const String & table_name) const; + + NameSet fetchTablesList() const; + + StoragePtr fetchTable(const String & table_name, ContextPtr context, bool table_checked) const; + + ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; +}; + +} + +#endif diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp new file mode 100644 index 00000000000..f3568673acb --- /dev/null +++ b/src/Databases/SQLite/SQLiteUtils.cpp @@ -0,0 +1,57 @@ +#include "SQLiteUtils.h" + +#if USE_SQLITE +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int PATH_ACCESS_DENIED; +} + + +String validateSQLiteDatabasePath(const String & path, const String & user_files_path) +{ + String canonical_user_files_path = fs::canonical(user_files_path); + + String canonical_path; + std::error_code err; + + if (fs::path(path).is_relative()) + canonical_path = fs::canonical(fs::path(user_files_path) / path, err); + else + canonical_path = fs::canonical(path, err); + + if (err) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "SQLite database path '{}' is invalid. Error: {}", path, err.message()); + + if (!canonical_path.starts_with(canonical_user_files_path)) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, + "SQLite database file path '{}' must be inside 'user_files' directory", path); + + return canonical_path; +} + + +SQLitePtr openSQLiteDB(const String & database_path, ContextPtr context) +{ + auto validated_path = validateSQLiteDatabasePath(database_path, context->getUserFilesPath()); + + sqlite3 * tmp_sqlite_db = nullptr; + int status = sqlite3_open(validated_path.c_str(), &tmp_sqlite_db); + + if (status != SQLITE_OK) + throw Exception(ErrorCodes::PATH_ACCESS_DENIED, + "Cannot access sqlite database. Error status: {}. Message: {}", + status, sqlite3_errstr(status)); + + return std::shared_ptr(tmp_sqlite_db, sqlite3_close); +} + +} + +#endif diff --git a/src/Databases/SQLite/SQLiteUtils.h b/src/Databases/SQLite/SQLiteUtils.h new file mode 100644 index 00000000000..56ca1ca8cf3 --- /dev/null +++ b/src/Databases/SQLite/SQLiteUtils.h @@ -0,0 +1,22 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_SQLITE +#include +#include +#include // Y_IGNORE + + +namespace DB +{ + +using SQLitePtr = std::shared_ptr; + +SQLitePtr openSQLiteDB(const String & database_path, ContextPtr context); + +} + +#endif diff --git a/src/Databases/SQLite/fetchSQLiteTableStructure.cpp b/src/Databases/SQLite/fetchSQLiteTableStructure.cpp new file mode 100644 index 00000000000..c4acf5b3a3a --- /dev/null +++ b/src/Databases/SQLite/fetchSQLiteTableStructure.cpp @@ -0,0 +1,104 @@ +#include + +#if USE_SQLITE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SQLITE_ENGINE_ERROR; +} + +static DataTypePtr convertSQLiteDataType(String type) +{ + DataTypePtr res; + type = Poco::toLower(type); + + if (type == "tinyint") + res = std::make_shared(); + else if (type == "smallint") + res = std::make_shared(); + else if (type.starts_with("int") || type == "mediumint") + res = std::make_shared(); + else if (type == "bigint") + res = std::make_shared(); + else if (type == "float") + res = std::make_shared(); + else if (type.starts_with("double") || type == "real") + res = std::make_shared(); + else + res = std::make_shared(); // No decimal when fetching data through API + + return res; +} + + +std::shared_ptr fetchSQLiteTableStructure(sqlite3 * connection, const String & sqlite_table_name) +{ + auto columns = NamesAndTypesList(); + auto query = fmt::format("pragma table_info({});", quoteString(sqlite_table_name)); + + auto callback_get_data = [](void * res, int col_num, char ** data_by_col, char ** col_names) -> int + { + NameAndTypePair name_and_type; + bool is_nullable = false; + + for (int i = 0; i < col_num; ++i) + { + if (strcmp(col_names[i], "name") == 0) + { + name_and_type.name = data_by_col[i]; + } + else if (strcmp(col_names[i], "type") == 0) + { + name_and_type.type = convertSQLiteDataType(data_by_col[i]); + } + else if (strcmp(col_names[i], "notnull") == 0) + { + is_nullable = (data_by_col[i][0] == '0'); + } + } + + if (is_nullable) + name_and_type.type = std::make_shared(name_and_type.type); + + static_cast(res)->push_back(name_and_type); + + return 0; + }; + + char * err_message = nullptr; + int status = sqlite3_exec(connection, query.c_str(), callback_get_data, &columns, &err_message); + + if (status != SQLITE_OK) + { + String err_msg(err_message); + sqlite3_free(err_message); + + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, + "Failed to fetch SQLite data. Status: {}. Message: {}", + status, err_msg); + } + + if (columns.empty()) + return nullptr; + + return std::make_shared(columns); +} + +} + +#endif diff --git a/src/Databases/SQLite/fetchSQLiteTableStructure.h b/src/Databases/SQLite/fetchSQLiteTableStructure.h new file mode 100644 index 00000000000..80f50173e5e --- /dev/null +++ b/src/Databases/SQLite/fetchSQLiteTableStructure.h @@ -0,0 +1,19 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_SQLITE + +#include +#include // Y_IGNORE + + +namespace DB +{ +std::shared_ptr fetchSQLiteTableStructure(sqlite3 * connection, + const String & sqlite_table_name); +} + +#endif diff --git a/src/Databases/ya.make b/src/Databases/ya.make index 15c14ac5fc2..7c5e310f7b4 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -27,6 +27,9 @@ SRCS( MySQL/MaterializeMetadata.cpp MySQL/MaterializeMySQLSettings.cpp MySQL/MaterializeMySQLSyncThread.cpp + SQLite/DatabaseSQLite.cpp + SQLite/SQLiteUtils.cpp + SQLite/fetchSQLiteTableStructure.cpp ) diff --git a/src/Dictionaries/DictionaryBlockInputStream.h b/src/Dictionaries/DictionaryBlockInputStream.h index de1acd294f7..7692c910b94 100644 --- a/src/Dictionaries/DictionaryBlockInputStream.h +++ b/src/Dictionaries/DictionaryBlockInputStream.h @@ -6,13 +6,13 @@ #include #include #include -#include #include #include #include "DictionaryBlockInputStreamBase.h" #include "DictionaryStructure.h" #include "IDictionary.h" + namespace DB { diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 1478518dee4..ed124ce1e0a 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -9,13 +9,14 @@ #include #include #include -#include #include #include #include #include #include #include +#include + namespace DB { diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 0508a0d70ad..c9b38acfbb5 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -2,13 +2,13 @@ #include #include -#include #include #include #include #include + namespace DB { namespace ErrorCodes diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 5247c8038cd..7f4a3a5b45f 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -266,7 +266,7 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) /// Executable dictionaries may execute arbitrary commands. /// It's OK for dictionaries created by administrator from xml-file, but /// maybe dangerous for dictionaries created from DDL-queries. - if (created_from_ddl) + if (created_from_ddl && context->getApplicationType() != Context::ApplicationType::LOCAL) throw Exception(ErrorCodes::DICTIONARY_ACCESS_DENIED, "Dictionaries with executable dictionary source are not allowed to be created from DDL query"); auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config); diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index fe6b19b8253..5ece5af0f0b 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -283,7 +283,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) /// Executable dictionaries may execute arbitrary commands. /// It's OK for dictionaries created by administrator from xml-file, but /// maybe dangerous for dictionaries created from DDL-queries. - if (created_from_ddl) + if (created_from_ddl && context->getApplicationType() != Context::ApplicationType::LOCAL) throw Exception(ErrorCodes::DICTIONARY_ACCESS_DENIED, "Dictionaries with executable pool dictionary source are not allowed to be created from DDL query"); auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config); diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h index bef28e71d57..7d40531cfa5 100644 --- a/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -11,6 +10,7 @@ #include "IDictionary.h" #include "RangeHashedDictionary.h" + namespace DB { /* diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 7237a249bcb..58059dbe355 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -206,9 +206,9 @@ void DiskDecorator::startup() delegate->startup(); } -void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) +void DiskDecorator::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) { - delegate->applyNewSettings(config, context); + delegate->applyNewSettings(config, context, config_prefix, map); } } diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index 0910f4c28cd..6586675d1de 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -65,11 +65,12 @@ public: String getUniqueId(const String & path) const override { return delegate->getUniqueId(path); } bool checkUniqueId(const String & id) const override { return delegate->checkUniqueId(id); } DiskType::Type getType() const override { return delegate->getType(); } + bool supportZeroCopyReplication() const override { return delegate->supportZeroCopyReplication(); } void onFreeze(const String & path) override; SyncGuardPtr getDirectorySyncGuard(const String & path) const override; void shutdown() override; void startup() override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) override; + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; protected: Executor & getExecutor() override; diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp new file mode 100644 index 00000000000..030f7399d74 --- /dev/null +++ b/src/Disks/DiskEncrypted.cpp @@ -0,0 +1,265 @@ +#include + +#if USE_SSL +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int INCORRECT_DISK_INDEX; +} + +namespace +{ + using DiskEncryptedPtr = std::shared_ptr; + using namespace FileEncryption; + + constexpr Algorithm DEFAULT_ENCRYPTION_ALGORITHM = Algorithm::AES_128_CTR; + + String unhexKey(const String & hex) + { + try + { + return boost::algorithm::unhex(hex); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length"); + } + } + + struct DiskEncryptedSettings + { + Algorithm encryption_algorithm; + String key; + DiskPtr wrapped_disk; + String path_on_wrapped_disk; + + DiskEncryptedSettings( + const String & disk_name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const DisksMap & map) + { + try + { + encryption_algorithm = DEFAULT_ENCRYPTION_ALGORITHM; + if (config.has(config_prefix + ".algorithm")) + parseFromString(encryption_algorithm, config.getString(config_prefix + ".algorithm")); + + key = config.getString(config_prefix + ".key", ""); + String key_hex = config.getString(config_prefix + ".key_hex", ""); + if (!key.empty() && !key_hex.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Both 'key' and 'key_hex' are specified. There should be only one"); + + if (!key_hex.empty()) + { + assert(key.empty()); + key = unhexKey(key_hex); + } + + FileEncryption::checkKeySize(encryption_algorithm, key.size()); + + String wrapped_disk_name = config.getString(config_prefix + ".disk", ""); + if (wrapped_disk_name.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Name of the wrapped disk must not be empty. An encrypted disk is a wrapper over another disk"); + + auto wrapped_disk_it = map.find(wrapped_disk_name); + if (wrapped_disk_it == map.end()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The wrapped disk must have been announced earlier. No disk with name {}", + wrapped_disk_name); + wrapped_disk = wrapped_disk_it->second; + + path_on_wrapped_disk = config.getString(config_prefix + ".path", ""); + } + catch (Exception & e) + { + e.addMessage("Disk " + disk_name); + throw; + } + } + }; + + bool inline isSameDiskType(const IDisk & one, const IDisk & another) + { + return typeid(one) == typeid(another); + } +} + +class DiskEncryptedReservation : public IReservation +{ +public: + DiskEncryptedReservation(DiskEncryptedPtr disk_, std::unique_ptr reservation_) + : disk(std::move(disk_)), reservation(std::move(reservation_)) + { + } + + UInt64 getSize() const override { return reservation->getSize(); } + + DiskPtr getDisk(size_t i) const override + { + if (i != 0) + throw Exception("Can't use i != 0 with single disk reservation", ErrorCodes::INCORRECT_DISK_INDEX); + return disk; + } + + Disks getDisks() const override { return {disk}; } + + void update(UInt64 new_size) override { reservation->update(new_size); } + +private: + DiskEncryptedPtr disk; + std::unique_ptr reservation; +}; + +ReservationPtr DiskEncrypted::reserve(UInt64 bytes) +{ + auto reservation = delegate->reserve(bytes); + if (!reservation) + return {}; + return std::make_unique(std::static_pointer_cast(shared_from_this()), std::move(reservation)); +} + +DiskEncrypted::DiskEncrypted( + const String & name_, + DiskPtr wrapped_disk_, + const String & path_on_wrapped_disk_, + FileEncryption::Algorithm encryption_algorithm_, + const String & key_) + : DiskDecorator(wrapped_disk_), name(name_), disk_path(path_on_wrapped_disk_), encryption_algorithm(encryption_algorithm_), key(key_) +{ + initialize(); +} + +void DiskEncrypted::initialize() +{ + disk_absolute_path = delegate->getPath() + disk_path; + + // use wrapped_disk as an EncryptedDisk store + if (disk_path.empty()) + return; + + if (disk_path.back() != '/') + throw Exception("Disk path must ends with '/', but '" + disk_path + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); + + delegate->createDirectories(disk_path); +} + +void DiskEncrypted::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +{ + /// Check if we can copy the file without deciphering. + if (isSameDiskType(*this, *to_disk)) + { + /// Disk type is the same, check if the key is the same too. + if (auto * to_encrypted_disk = typeid_cast(to_disk.get())) + { + if ((encryption_algorithm == to_encrypted_disk->encryption_algorithm) && (key == to_encrypted_disk->key)) + { + /// Key is the same so we can simply copy the encrypted file. + delegate->copy(wrappedPath(from_path), to_encrypted_disk->delegate, to_encrypted_disk->wrappedPath(to_path)); + return; + } + } + } + + /// Copy the file through buffers with deciphering. + copyThroughBuffers(from_path, to_disk, to_path); +} + +std::unique_ptr DiskEncrypted::readFile( + const String & path, + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold, + MMappedFileCache * mmap_cache) const +{ + auto wrapped_path = wrappedPath(path); + auto buffer = delegate->readFile(wrapped_path, buf_size, estimated_size, aio_threshold, mmap_threshold, mmap_cache); + + InitVector iv; + iv.read(*buffer); + return std::make_unique(buf_size, std::move(buffer), encryption_algorithm, key, iv); +} + +std::unique_ptr DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode) +{ + InitVector iv; + UInt64 old_file_size = 0; + auto wrapped_path = wrappedPath(path); + + if (mode == WriteMode::Append && exists(path) && getFileSize(path)) + { + auto read_buffer = delegate->readFile(wrapped_path, InitVector::kSize); + iv.read(*read_buffer); + old_file_size = getFileSize(path); + } + else + iv = InitVector::random(); + + auto buffer = delegate->writeFile(wrapped_path, buf_size, mode); + return std::make_unique(buf_size, std::move(buffer), encryption_algorithm, key, iv, old_file_size); +} + + +size_t DiskEncrypted::getFileSize(const String & path) const +{ + auto wrapped_path = wrappedPath(path); + size_t size = delegate->getFileSize(wrapped_path); + return size > InitVector::kSize ? (size - InitVector::kSize) : 0; +} + +void DiskEncrypted::truncateFile(const String & path, size_t size) +{ + auto wrapped_path = wrappedPath(path); + delegate->truncateFile(wrapped_path, size ? (size + InitVector::kSize) : 0); +} + +SyncGuardPtr DiskEncrypted::getDirectorySyncGuard(const String & path) const +{ + auto wrapped_path = wrappedPath(path); + return delegate->getDirectorySyncGuard(wrapped_path); +} + +void DiskEncrypted::applyNewSettings( + const Poco::Util::AbstractConfiguration & config, + ContextPtr /*context*/, + const String & config_prefix, + const DisksMap & map) +{ + DiskEncryptedSettings settings{name, config, config_prefix, map}; + delegate = settings.wrapped_disk; + disk_path = settings.path_on_wrapped_disk; + encryption_algorithm = settings.encryption_algorithm; + key = settings.key; + initialize(); +} + +void registerDiskEncrypted(DiskFactory & factory) +{ + auto creator = [](const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr /*context*/, + const DisksMap & map) -> DiskPtr + { + DiskEncryptedSettings settings{name, config, config_prefix, map}; + return std::make_shared( + name, settings.wrapped_disk, settings.path_on_wrapped_disk, settings.encryption_algorithm, settings.key); + }; + factory.registerDiskType("encrypted", creator); +} + +} + + +#endif diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h new file mode 100644 index 00000000000..e7466edc2ae --- /dev/null +++ b/src/Disks/DiskEncrypted.h @@ -0,0 +1,236 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + + +namespace DB +{ +class ReadBufferFromFileBase; +class WriteBufferFromFileBase; +namespace FileEncryption { enum class Algorithm; } + +/// Encrypted disk ciphers all written files on the fly and writes the encrypted files to an underlying (normal) disk. +/// And when we read files from an encrypted disk it deciphers them automatically, +/// so we can work with a encrypted disk like it's a normal disk. +class DiskEncrypted : public DiskDecorator +{ +public: + DiskEncrypted( + const String & name_, + DiskPtr wrapped_disk_, + const String & path_on_wrapped_disk_, + FileEncryption::Algorithm encryption_algorithm_, + const String & key_); + + const String & getName() const override { return name; } + const String & getPath() const override { return disk_absolute_path; } + + ReservationPtr reserve(UInt64 bytes) override; + + bool exists(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->exists(wrapped_path); + } + + bool isFile(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->isFile(wrapped_path); + } + + bool isDirectory(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->isDirectory(wrapped_path); + } + + size_t getFileSize(const String & path) const override; + + void createDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->createDirectory(wrapped_path); + } + + void createDirectories(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->createDirectories(wrapped_path); + } + + + void clearDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->clearDirectory(wrapped_path); + } + + void moveDirectory(const String & from_path, const String & to_path) override + { + auto wrapped_from_path = wrappedPath(from_path); + auto wrapped_to_path = wrappedPath(to_path); + delegate->moveDirectory(wrapped_from_path, wrapped_to_path); + } + + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + return delegate->iterateDirectory(wrapped_path); + } + + void createFile(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->createFile(wrapped_path); + } + + void moveFile(const String & from_path, const String & to_path) override + { + auto wrapped_from_path = wrappedPath(from_path); + auto wrapped_to_path = wrappedPath(to_path); + delegate->moveFile(wrapped_from_path, wrapped_to_path); + } + + void replaceFile(const String & from_path, const String & to_path) override + { + auto wrapped_from_path = wrappedPath(from_path); + auto wrapped_to_path = wrappedPath(to_path); + delegate->replaceFile(wrapped_from_path, wrapped_to_path); + } + + void listFiles(const String & path, std::vector & file_names) override + { + auto wrapped_path = wrappedPath(path); + delegate->listFiles(wrapped_path, file_names); + } + + void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; + + std::unique_ptr readFile( + const String & path, + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold, + MMappedFileCache * mmap_cache) const override; + + std::unique_ptr writeFile( + const String & path, + size_t buf_size, + WriteMode mode) override; + + void removeFile(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeFile(wrapped_path); + } + + void removeFileIfExists(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeFileIfExists(wrapped_path); + } + + void removeDirectory(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeDirectory(wrapped_path); + } + + void removeRecursive(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeRecursive(wrapped_path); + } + + void removeSharedFile(const String & path, bool flag) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeSharedFile(wrapped_path, flag); + } + + void removeSharedRecursive(const String & path, bool flag) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeSharedRecursive(wrapped_path, flag); + } + + void removeSharedFileIfExists(const String & path, bool flag) override + { + auto wrapped_path = wrappedPath(path); + delegate->removeSharedFileIfExists(wrapped_path, flag); + } + + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override + { + auto wrapped_path = wrappedPath(path); + delegate->setLastModified(wrapped_path, timestamp); + } + + Poco::Timestamp getLastModified(const String & path) override + { + auto wrapped_path = wrappedPath(path); + return delegate->getLastModified(wrapped_path); + } + + void setReadOnly(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->setReadOnly(wrapped_path); + } + + void createHardLink(const String & src_path, const String & dst_path) override + { + auto wrapped_src_path = wrappedPath(src_path); + auto wrapped_dst_path = wrappedPath(dst_path); + delegate->createHardLink(wrapped_src_path, wrapped_dst_path); + } + + void truncateFile(const String & path, size_t size) override; + + String getUniqueId(const String & path) const override + { + auto wrapped_path = wrappedPath(path); + return delegate->getUniqueId(wrapped_path); + } + + void onFreeze(const String & path) override + { + auto wrapped_path = wrappedPath(path); + delegate->onFreeze(wrapped_path); + } + + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; + + DiskType::Type getType() const override { return DiskType::Type::Encrypted; } + + SyncGuardPtr getDirectorySyncGuard(const String & path) const override; + +private: + void initialize(); + + String wrappedPath(const String & path) const + { + // if path starts_with disk_path -> got already wrapped path + if (!disk_path.empty() && path.starts_with(disk_path)) + return path; + return disk_path + path; + } + + String name; + String disk_path; + String disk_absolute_path; + FileEncryption::Algorithm encryption_algorithm; + String key; +}; + +} + +#endif diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index b0fb0bd7ca7..94175c92de6 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -24,7 +24,8 @@ DiskPtr DiskFactory::create( const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) const + ContextPtr context, + const DisksMap & map) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -33,7 +34,7 @@ DiskPtr DiskFactory::create( throw Exception{"DiskFactory: the disk '" + name + "' has unknown disk type: " + disk_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG}; const auto & disk_creator = found->second; - return disk_creator(name, config, config_prefix, context); + return disk_creator(name, config, config_prefix, context, map); } } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index 1c05c8d0335..7fcac8928c8 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -8,12 +8,14 @@ #include #include +#include #include namespace DB { +using DisksMap = std::map; /** * Disk factory. Responsible for creating new disk objects. */ @@ -24,7 +26,8 @@ public: const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context)>; + ContextPtr context, + const DisksMap & map)>; static DiskFactory & instance(); @@ -34,7 +37,8 @@ public: const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) const; + ContextPtr context, + const DisksMap & map) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 4ceb76ab059..2897b08706d 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -309,7 +309,7 @@ void DiskLocal::copy(const String & from_path, const std::shared_ptr & to fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. } else - IDisk::copy(from_path, to_disk, to_path); /// Copy files through buffers. + copyThroughBuffers(from_path, to_disk, to_path); /// Base implementation. } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const @@ -367,7 +367,8 @@ void registerDiskLocal(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) -> DiskPtr { + ContextPtr context, + const DisksMap & /*map*/) -> DiskPtr { String path = config.getString(config_prefix + ".path", ""); if (name == "default") { diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 63a6fe59bea..3aa243b103b 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -100,6 +100,8 @@ public: DiskType::Type getType() const override { return DiskType::Type::Local; } + bool supportZeroCopyReplication() const override { return false; } + SyncGuardPtr getDirectorySyncGuard(const String & path) const override; private: diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index 77926b4e375..337b9784080 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -450,7 +450,8 @@ void registerDiskMemory(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & /*config*/, const String & /*config_prefix*/, - ContextPtr /*context*/) -> DiskPtr { return std::make_shared(name); }; + ContextPtr /*context*/, + const DisksMap & /*map*/) -> DiskPtr { return std::make_shared(name); }; factory.registerDiskType("memory", creator); } diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index 40fd2b2a9f9..d168bc26ff3 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -92,6 +92,8 @@ public: DiskType::Type getType() const override { return DiskType::Type::RAM; } + bool supportZeroCopyReplication() const override { return false; } + private: void createDirectoriesImpl(const String & path); void replaceFileImpl(const String & from_path, const String & to_path); diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 0d36cadc349..bc7810479c5 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -37,7 +37,7 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con auto disk_config_prefix = config_prefix + "." + disk_name; - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context)); + disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); } if (!has_default_disk) disks.emplace(default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0)); @@ -62,16 +62,16 @@ DiskSelectorPtr DiskSelector::updateFromConfig( if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) throw Exception("Disk name can contain only alphanumeric and '_' (" + disk_name + ")", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + auto disk_config_prefix = config_prefix + "." + disk_name; if (result->getDisksMap().count(disk_name) == 0) { - auto disk_config_prefix = config_prefix + "." + disk_name; - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context)); + result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); } else { auto disk = old_disks_minus_new_disks[disk_name]; - disk->applyNewSettings(config, context); + disk->applyNewSettings(config, context, disk_config_prefix, result->getDisksMap()); old_disks_minus_new_disks.erase(disk_name); } diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 4652cc40ea3..88cc6ee5197 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -12,7 +12,6 @@ namespace DB class DiskSelector; using DiskSelectorPtr = std::shared_ptr; -using DisksMap = std::map; /// Parse .xml configuration and store information about disks /// Mostly used for introspection. diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index a5c23fe2c2c..5eeeaaeb2e3 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -12,7 +12,8 @@ struct DiskType Local, RAM, S3, - HDFS + HDFS, + Encrypted }; static String toString(Type disk_type) { @@ -26,6 +27,8 @@ struct DiskType return "s3"; case Type::HDFS: return "hdfs"; + case Type::Encrypted: + return "encrypted"; } __builtin_unreachable(); } diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index dafd507ba1e..4eb43eaf7b5 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -115,7 +115,7 @@ std::unique_ptr DiskHDFS::writeFile(const String & path auto hdfs_path = remote_fs_root_path + file_name; LOG_DEBUG(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", - backQuote(metadata_path + path), remote_fs_root_path + hdfs_path); + backQuote(metadata_path + path), hdfs_path); /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, @@ -153,6 +153,14 @@ void DiskHDFS::removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) }); } +bool DiskHDFS::checkUniqueId(const String & hdfs_uri) const +{ + if (!boost::algorithm::starts_with(hdfs_uri, remote_fs_root_path)) + return false; + const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2); + const String remote_fs_object_path = hdfs_uri.substr(begin_of_path); + return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str())); +} namespace { @@ -170,7 +178,8 @@ void registerDiskHDFS(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context_) -> DiskPtr + ContextPtr context_, + const DisksMap & /*map*/) -> DiskPtr { fs::path disk = fs::path(context_->getPath()) / "disks" / name; fs::create_directories(disk); diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 1f93192fd57..068572daa62 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -44,6 +44,8 @@ public: DiskType::Type getType() const override { return DiskType::Type::HDFS; } + bool supportZeroCopyReplication() const override { return true; } + std::unique_ptr readFile( const String & path, size_t buf_size, @@ -58,6 +60,11 @@ public: RemoteFSPathKeeperPtr createFSPathKeeper() const override; + /// Check file exists and ClickHouse has an access to it + /// Overrode in remote disk + /// Required for remote disk to ensure that replica has access to data written by other node + bool checkUniqueId(const String & hdfs_uri) const override; + private: String getRandomName() { return toString(UUIDHelpers::generateV4()); } diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 82705b5dcc8..df0f921389f 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -58,7 +58,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p } } -void IDisk::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) { auto & exec = to_disk->getExecutor(); ResultsCollector results; @@ -71,6 +71,11 @@ void IDisk::copy(const String & from_path, const std::shared_ptr & to_dis result.get(); } +void IDisk::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +{ + copyThroughBuffers(from_path, to_disk, to_path); +} + void IDisk::truncateFile(const String &, size_t) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate operation is not implemented for disk of type {}", getType()); diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index f9e7624f4ab..717c72420cc 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -211,19 +212,23 @@ public: /// Return disk type - "local", "s3", etc. virtual DiskType::Type getType() const = 0; + /// Whether this disk support zero-copy replication. + /// Overrode in remote fs disks. + virtual bool supportZeroCopyReplication() const = 0; + /// Invoked when Global Context is shutdown. virtual void shutdown() {} /// Performs action on disk startup. virtual void startup() {} - /// Return some uniq string for file, overrode for S3 - /// Required for distinguish different copies of the same part on S3 + /// Return some uniq string for file, overrode for IDiskRemote + /// Required for distinguish different copies of the same part on remote disk virtual String getUniqueId(const String & path) const { return path; } /// Check file exists and ClickHouse has an access to it - /// Overrode in DiskS3 - /// Required for S3 to ensure that replica has access to data written by other node + /// Overrode in remote FS disks (s3/hdfs) + /// Required for remote disk to ensure that replica has access to data written by other node virtual bool checkUniqueId(const String & id) const { return exists(id); } /// Invoked on partitions freeze query. @@ -233,7 +238,7 @@ public: virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const; /// Applies new settings for disk in runtime. - virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr) {} + virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) { } protected: friend class DiskDecorator; @@ -241,6 +246,11 @@ protected: /// Returns executor to perform asynchronous operations. virtual Executor & getExecutor() { return *executor; } + /// Base implementation of the function copy(). + /// It just opens two files, reads data by portions from the first file, and writes it to the second one. + /// A derived class may override copy() to provide a faster implementation. + void copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path); + private: std::unique_ptr executor; }; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index a4dcc8037bc..398e617196d 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -344,17 +344,6 @@ void IDiskRemote::replaceFile(const String & from_path, const String & to_path) } -void IDiskRemote::removeFileIfExists(const String & path) -{ - RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); - if (fs::exists(fs::path(metadata_path) / path)) - { - removeMeta(path, fs_paths_keeper); - removeFromRemoteFS(fs_paths_keeper); - } -} - - void IDiskRemote::removeSharedFile(const String & path, bool keep_in_remote_fs) { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); @@ -364,6 +353,18 @@ void IDiskRemote::removeSharedFile(const String & path, bool keep_in_remote_fs) } +void IDiskRemote::removeSharedFileIfExists(const String & path, bool keep_in_remote_fs) +{ + RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); + if (fs::exists(fs::path(metadata_path) / path)) + { + removeMeta(path, fs_paths_keeper); + if (!keep_in_remote_fs) + removeFromRemoteFS(fs_paths_keeper); + } +} + + void IDiskRemote::removeSharedRecursive(const String & path, bool keep_in_remote_fs) { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); @@ -488,4 +489,13 @@ bool IDiskRemote::tryReserve(UInt64 bytes) return false; } +String IDiskRemote::getUniqueId(const String & path) const +{ + Metadata metadata(remote_fs_root_path, metadata_path, path); + String id; + if (!metadata.remote_fs_objects.empty()) + id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].first; + return id; +} + } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 360d4e2de33..80b01c3c949 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -83,12 +83,14 @@ public: void removeFile(const String & path) override { removeSharedFile(path, false); } - void removeFileIfExists(const String & path) override; + void removeFileIfExists(const String & path) override { removeSharedFileIfExists(path, false); } void removeRecursive(const String & path) override { removeSharedRecursive(path, false); } void removeSharedFile(const String & path, bool keep_in_remote_fs) override; + void removeSharedFileIfExists(const String & path, bool keep_in_remote_fs) override; + void removeSharedRecursive(const String & path, bool keep_in_remote_fs) override; void listFiles(const String & path, std::vector & file_names) override; @@ -117,6 +119,10 @@ public: ReservationPtr reserve(UInt64 bytes) override; + String getUniqueId(const String & path) const override; + + bool checkUniqueId(const String & id) const override = 0; + virtual void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) = 0; virtual RemoteFSPathKeeperPtr createFSPathKeeper() const = 0; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e52a19de99a..1f1c73c32c3 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -158,15 +158,6 @@ DiskS3::DiskS3( { } -String DiskS3::getUniqueId(const String & path) const -{ - Metadata metadata(remote_fs_root_path, metadata_path, path); - String id; - if (!metadata.remote_fs_objects.empty()) - id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].first; - return id; -} - RemoteFSPathKeeperPtr DiskS3::createFSPathKeeper() const { auto settings = current_settings.get(); @@ -930,7 +921,7 @@ void DiskS3::onFreeze(const String & path) revision_file_buf.finalize(); } -void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) +void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) { auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context); diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index fc7c832e45d..133488ad31f 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -98,22 +98,21 @@ public: DiskType::Type getType() const override { return DiskType::Type::S3; } + bool supportZeroCopyReplication() const override { return true; } + void shutdown() override; void startup() override; - /// Return some uniq string for file - /// Required for distinguish different copies of the same part on S3 - String getUniqueId(const String & path) const override; - /// Check file exists and ClickHouse has an access to it - /// Required for S3 to ensure that replica has access to data wroten by other node + /// Overrode in remote disk + /// Required for remote disk to ensure that replica has access to data written by other node bool checkUniqueId(const String & id) const override; /// Dumps current revision counter into file 'revision.txt' at given path. void onFreeze(const String & path) override; - void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context) override; + void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) override; private: void createFileOperationObject(const String & operation_name, UInt64 revision, const ObjectMetadata & metadata); diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 1e40f45b098..49a11b1dbb9 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -167,7 +167,8 @@ void registerDiskS3(DiskFactory & factory) auto creator = [](const String & name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, - ContextPtr context) -> DiskPtr { + ContextPtr context, + const DisksMap & /*map*/) -> DiskPtr { S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.back() != '/') throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 8f4901e49e5..bf2f09853fe 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -16,6 +16,10 @@ void registerDiskMemory(DiskFactory & factory); void registerDiskS3(DiskFactory & factory); #endif +#if USE_SSL +void registerDiskEncrypted(DiskFactory & factory); +#endif + #if USE_HDFS void registerDiskHDFS(DiskFactory & factory); #endif @@ -32,6 +36,10 @@ void registerDisks() registerDiskS3(factory); #endif +#if USE_SSL + registerDiskEncrypted(factory); +#endif + #if USE_HDFS registerDiskHDFS(factory); #endif diff --git a/src/Disks/ya.make b/src/Disks/ya.make index 2312dc96241..925dfd2a0ce 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -10,6 +10,7 @@ PEERDIR( SRCS( DiskCacheWrapper.cpp DiskDecorator.cpp + DiskEncrypted.cpp DiskFactory.cpp DiskLocal.cpp DiskMemory.cpp diff --git a/src/Formats/CMakeLists.txt b/src/Formats/CMakeLists.txt index 0a342917073..12def0fb1d0 100644 --- a/src/Formats/CMakeLists.txt +++ b/src/Formats/CMakeLists.txt @@ -1,5 +1 @@ configure_file(config_formats.h.in ${ConfigIncludePath}/config_formats.h) - -if (ENABLE_EXAMPLES) - add_subdirectory(examples) -endif() diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8b7cf9635b4..d2d6d92dea3 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT; extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT; + extern const int UNSUPPORTED_METHOD; } const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const @@ -59,6 +59,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.avro.output_codec = settings.output_format_avro_codec; format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval; format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString(); + format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString(); format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; @@ -309,7 +310,7 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( { const auto & output_getter = getCreators(name).output_processor_creator; if (!output_getter) - throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); + throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); @@ -344,7 +345,7 @@ OutputFormatPtr FormatFactory::getOutputFormat( { const auto & output_getter = getCreators(name).output_processor_creator; if (!output_getter) - throw Exception("Format " + name + " is not suitable for output (with processors)", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); + throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); if (context->hasQueryContext() && context->getSettingsRef().log_queries) context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); @@ -352,8 +353,11 @@ OutputFormatPtr FormatFactory::getOutputFormat( RowOutputFormatParams params; params.callback = std::move(callback); - auto format_settings = _format_settings - ? *_format_settings : getFormatSettings(context); + auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); + + /// If we're handling MySQL protocol connection right now then MySQLWire is only allowed output format. + if (format_settings.mysql_wire.sequence_id && (name != "MySQLWire")) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); /** TODO: Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 1773f2cc2c6..69df095bca8 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -61,6 +61,7 @@ struct FormatSettings String output_codec; UInt64 output_sync_interval = 16 * 1024; bool allow_missing_fields = false; + String string_column_pattern; } avro; struct CSV @@ -131,6 +132,13 @@ struct FormatSettings bool allow_multiple_rows_without_delimiter = false; } protobuf; + struct + { + uint32_t client_capabilities = 0; + size_t max_packet_size = 0; + uint8_t * sequence_id = nullptr; /// Not null if it's MySQLWire output format used to handle MySQL protocol connections. + } mysql_wire; + struct { std::string regexp; @@ -169,4 +177,3 @@ struct FormatSettings }; } - diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index 28ba625d9fb..d06f507f044 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -29,10 +29,12 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D if (quotes) { pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); + if (pos > in.buffer().end()) throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); else if (pos == in.buffer().end()) continue; + if (*pos == '\\') { ++pos; @@ -48,10 +50,12 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D else { pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end()); + if (pos > in.buffer().end()) throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); else if (pos == in.buffer().end()) continue; + else if (*pos == '{') { ++balance; diff --git a/src/Formats/examples/CMakeLists.txt b/src/Formats/examples/CMakeLists.txt deleted file mode 100644 index e1cb7604fab..00000000000 --- a/src/Formats/examples/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -set(SRCS ) - -add_executable (tab_separated_streams tab_separated_streams.cpp ${SRCS}) -target_link_libraries (tab_separated_streams PRIVATE clickhouse_aggregate_functions dbms) diff --git a/src/Formats/examples/tab_separated_streams.cpp b/src/Formats/examples/tab_separated_streams.cpp deleted file mode 100644 index bd733e4b9aa..00000000000 --- a/src/Formats/examples/tab_separated_streams.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include - -#include - -#include -#include - -#include -#include - -#include - -#include -#include -#include -#include - - -using namespace DB; - -int main(int, char **) -try -{ - Block sample; - { - ColumnWithTypeAndName col; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - ColumnWithTypeAndName col; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - - ReadBufferFromFile in_buf("test_in"); - WriteBufferFromFile out_buf("test_out"); - - FormatSettings format_settings; - - RowInputFormatParams in_params{DEFAULT_INSERT_BLOCK_SIZE, 0, 0}; - RowOutputFormatParams out_params{[](const Columns & /* columns */, size_t /* row */){}}; - - InputFormatPtr input_format = std::make_shared(sample, in_buf, in_params, false, false, format_settings); - BlockInputStreamPtr block_input = std::make_shared(std::move(input_format)); - - BlockOutputStreamPtr block_output = std::make_shared( - std::make_shared(out_buf, sample, false, false, out_params, format_settings)); - - copyData(*block_input, *block_output); - return 0; -} -catch (...) -{ - std::cerr << getCurrentExceptionMessage(true) << '\n'; - return 1; -} diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index b20954c9652..04e5f80468b 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -124,3 +124,6 @@ endif() # Signed integer overflow on user-provided data inside boost::geometry - ignore. set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow) + +# target_link_libraries(clickhouse_functions PRIVATE ${S2_LIBRARY}) +target_include_directories(clickhouse_functions SYSTEM PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index d12bc1701ad..1891410a18e 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -74,6 +74,30 @@ struct ToDateImpl using FactorTransform = ZeroTransform; }; +struct ToDate32Impl +{ + static constexpr auto name = "toDate32"; + + static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) + { + return Int32(time_zone.toDayNum(t)); + } + static inline Int32 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return Int32(time_zone.toDayNum(t)); + } + static inline Int32 execute(Int32 d, const DateLUTImpl &) + { + return d; + } + static inline Int32 execute(UInt16 d, const DateLUTImpl &) + { + return d; + } + + using FactorTransform = ZeroTransform; +}; + struct ToStartOfDayImpl { static constexpr auto name = "toStartOfDay"; diff --git a/src/Functions/FunctionChar.cpp b/src/Functions/FunctionChar.cpp new file mode 100644 index 00000000000..1cbb60b7760 --- /dev/null +++ b/src/Functions/FunctionChar.cpp @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +class FunctionChar : public IFunction +{ +public: + static constexpr auto name = "char"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Number of arguments for function " + getName() + " can't be " + toString(arguments.size()) + + ", should be at least 1", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + for (const auto & arg : arguments) + { + WhichDataType which(arg); + if (!(which.isInt() || which.isUInt() || which.isFloat())) + throw Exception("Illegal type " + arg->getName() + " of argument of function " + getName() + + ", must be Int, UInt or Float number", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const auto size_per_row = arguments.size() + 1; + out_vec.resize(size_per_row * input_rows_count); + out_offsets.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + out_offsets[row] = size_per_row + out_offsets[row - 1]; + out_vec[row * size_per_row + size_per_row - 1] = '\0'; + } + + Columns columns_holder(arguments.size()); + for (size_t idx = 0; idx < arguments.size(); ++idx) + { + //partial const column + columns_holder[idx] = arguments[idx].column->convertToFullColumnIfConst(); + const IColumn * column = columns_holder[idx].get(); + + if (!(executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) + || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row))) + { + throw Exception{"Illegal column " + arguments[idx].column->getName() + + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + } + } + + return col_str; + } + +private: + template + bool executeNumber(const IColumn & src_data, ColumnString::Chars & out_vec, const size_t & column_idx, const size_t & rows, const size_t & size_per_row) const + { + const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data); + + if (!src_data_concrete) + { + return false; + } + + for (size_t row = 0; row < rows; ++row) + { + out_vec[row * size_per_row + column_idx] = static_cast(src_data_concrete->getInt(row)); + } + return true; + } +}; + +void registerFunctionChar(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 8bd5218261e..abf7f967653 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -39,7 +39,7 @@ public: { if (arguments.size() == 1) { - if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -47,7 +47,7 @@ public: } else if (arguments.size() == 2) { - if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", @@ -165,4 +165,3 @@ public: }; } - diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index dcdd0e521eb..eac1a7ad1a1 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -49,47 +49,48 @@ Columns convertConstTupleToConstantElements(const ColumnConst & column) return res; } +ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col) +{ + if (col.type->isNullable()) + { + const DataTypePtr & nested_type = static_cast(*col.type).getNestedType(); + + if (!col.column) + { + return ColumnWithTypeAndName{nullptr, nested_type, col.name}; + } + else if (const auto * nullable = checkAndGetColumn(*col.column)) + { + const auto & nested_col = nullable->getNestedColumnPtr(); + return ColumnWithTypeAndName{nested_col, nested_type, col.name}; + } + else if (const auto * const_column = checkAndGetColumn(*col.column)) + { + const auto * nullable_column = checkAndGetColumn(const_column->getDataColumn()); + + ColumnPtr nullable_res; + if (nullable_column) + { + const auto & nested_col = nullable_column->getNestedColumnPtr(); + nullable_res = ColumnConst::create(nested_col, col.column->size()); + } + else + { + nullable_res = makeNullable(col.column); + } + return ColumnWithTypeAndName{ nullable_res, nested_type, col.name }; + } + else + throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); + } + return col; +} ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns) { ColumnsWithTypeAndName res; for (const auto & col : columns) - { - if (col.type->isNullable()) - { - const DataTypePtr & nested_type = static_cast(*col.type).getNestedType(); - - if (!col.column) - { - res.emplace_back(ColumnWithTypeAndName{nullptr, nested_type, col.name}); - } - else if (const auto * nullable = checkAndGetColumn(*col.column)) - { - const auto & nested_col = nullable->getNestedColumnPtr(); - res.emplace_back(ColumnWithTypeAndName{nested_col, nested_type, col.name}); - } - else if (const auto * const_column = checkAndGetColumn(*col.column)) - { - const auto * nullable_column = checkAndGetColumn(const_column->getDataColumn()); - - ColumnPtr nullable_res; - if (nullable_column) - { - const auto & nested_col = nullable_column->getNestedColumnPtr(); - nullable_res = ColumnConst::create(nested_col, col.column->size()); - } - else - { - nullable_res = makeNullable(col.column); - } - res.emplace_back(ColumnWithTypeAndName{ nullable_res, nested_type, col.name }); - } - else - throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); - } - else - res.emplace_back(col); - } + res.emplace_back(columnGetNested(col)); return res; } diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 7ab008b8bea..5abe24f4e50 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -81,6 +81,8 @@ inline std::enable_if_t, Field> toField(const T & x, UInt32 s Columns convertConstTupleToConstantElements(const ColumnConst & column); +/// Returns nested column with corrected type if nullable +ColumnWithTypeAndName columnGetNested(const ColumnWithTypeAndName & col); /// Returns the copy of a given columns in which each column is replaced with its respective nested /// column if it is nullable. diff --git a/src/Functions/FunctionsBinaryRepr.cpp b/src/Functions/FunctionsBinaryRepr.cpp new file mode 100644 index 00000000000..08d74b30166 --- /dev/null +++ b/src/Functions/FunctionsBinaryRepr.cpp @@ -0,0 +1,562 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; +} + +/* + * hex(x) - Returns hexadecimal representation; capital letters; there are no prefixes 0x or suffixes h. + * For numbers, returns a variable-length string - hex in the "human" (big endian) format, with the leading zeros being cut, + * but only by whole bytes. For dates and datetimes - the same as for numbers. + * For example, hex(257) = '0101'. + * + * unhex(string) - Returns a string, hex of which is equal to `string` with regard of case and discarding one leading zero. + * If such a string does not exist, could return arbitrary implementation specific value. + * + * bin(x) - Returns binary representation. + * + * unbin(x) - Returns a string, opposite to `bin`. + * + */ + +struct HexImpl +{ + static constexpr auto name = "hex"; + static constexpr size_t word_size = 2; + + template + static void executeOneUInt(T x, char *& out) + { + bool was_nonzero = false; + for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) + { + UInt8 byte = x >> offset; + + /// Skip leading zeros + if (byte == 0 && !was_nonzero && offset) + continue; + + was_nonzero = true; + writeHexByteUppercase(byte, out); + out += word_size; + } + *out = '\0'; + ++out; + } + + static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) + { + while (pos < end) + { + writeHexByteUppercase(*pos, out); + ++pos; + out += word_size; + } + *out = '\0'; + ++out; + } + + template + static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) + { + const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. + auto col_str = ColumnString::create(); + + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * hex_length); + + size_t pos = 0; + char * out = reinterpret_cast(&out_vec[0]); + for (size_t i = 0; i < size; ++i) + { + const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); + executeOneString(in_pos, in_pos + type_size_in_bytes, out); + + pos += hex_length; + out_offsets[i] = pos; + } + col_res = std::move(col_str); + } +}; + +struct UnhexImpl +{ + static constexpr auto name = "unhex"; + static constexpr size_t word_size = 2; + + static void decode(const char * pos, const char * end, char *& out) + { + if ((end - pos) & 1) + { + *out = unhex(*pos); + ++out; + ++pos; + } + while (pos < end) + { + *out = unhex2(pos); + pos += word_size; + ++out; + } + *out = '\0'; + ++out; + } +}; + +struct BinImpl +{ + static constexpr auto name = "bin"; + static constexpr size_t word_size = 8; + + template + static void executeOneUInt(T x, char *& out) + { + bool was_nonzero = false; + for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) + { + UInt8 byte = x >> offset; + + /// Skip leading zeros + if (byte == 0 && !was_nonzero && offset) + continue; + + was_nonzero = true; + writeBinByte(byte, out); + out += word_size; + } + *out = '\0'; + ++out; + } + + template + static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) + { + const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. + auto col_str = ColumnString::create(); + + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * hex_length); + + size_t pos = 0; + char * out = reinterpret_cast(out_vec.data()); + for (size_t i = 0; i < size; ++i) + { + const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); + executeOneString(in_pos, in_pos + type_size_in_bytes, out); + + pos += hex_length; + out_offsets[i] = pos; + } + col_res = std::move(col_str); + } + + static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) + { + while (pos < end) + { + writeBinByte(*pos, out); + ++pos; + out += word_size; + } + *out = '\0'; + ++out; + } +}; + +struct UnbinImpl +{ + static constexpr auto name = "unbin"; + static constexpr size_t word_size = 8; + + static void decode(const char * pos, const char * end, char *& out) + { + if (pos == end) + { + *out = '\0'; + ++out; + return; + } + + UInt8 left = 0; + + /// end - pos is the length of input. + /// (length & 7) to make remain bits length mod 8 is zero to split. + /// e.g. the length is 9 and the input is "101000001", + /// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1 + /// then, left_cnt is 0, remain input is '01000001'. + for (UInt8 left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) + { + left = left << 1; + if (*pos != '0') + left += 1; + ++pos; + } + + if (left != 0 || end - pos == 0) + { + *out = left; + ++out; + } + + assert((end - pos) % 8 == 0); + + while (end - pos != 0) + { + UInt8 c = 0; + for (UInt8 i = 0; i < 8; ++i) + { + c = c << 1; + if (*pos != '0') + c += 1; + ++pos; + } + *out = c; + ++out; + } + + *out = '\0'; + ++out; + } +}; + +/// Encode number or string to string with binary or hexadecimal representation +template +class EncodeToBinaryRepr : public IFunction +{ +public: + static constexpr auto name = Impl::name; + static constexpr size_t word_size = Impl::word_size; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + WhichDataType which(arguments[0]); + + if (!which.isStringOrFixedString() && + !which.isDate() && + !which.isDateTime() && + !which.isDateTime64() && + !which.isUInt() && + !which.isFloat() && + !which.isDecimal() && + !which.isAggregateFunction()) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const IColumn * column = arguments[0].column.get(); + ColumnPtr res_column; + + WhichDataType which(column->getDataType()); + if (which.isAggregateFunction()) + { + const ColumnPtr to_string = castColumn(arguments[0], std::make_shared()); + const auto * str_column = checkAndGetColumn(to_string.get()); + tryExecuteString(str_column, res_column); + return res_column; + } + + if (tryExecuteUInt(column, res_column) || + tryExecuteUInt(column, res_column) || + tryExecuteUInt(column, res_column) || + tryExecuteUInt(column, res_column) || + tryExecuteString(column, res_column) || + tryExecuteFixedString(column, res_column) || + tryExecuteFloat(column, res_column) || + tryExecuteFloat(column, res_column) || + tryExecuteDecimal(column, res_column) || + tryExecuteDecimal(column, res_column) || + tryExecuteDecimal(column, res_column)) + return res_column; + + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + template + bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + + static constexpr size_t MAX_LENGTH = sizeof(T) * word_size + 1; /// Including trailing zero byte. + + if (col_vec) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const typename ColumnVector::Container & in_vec = col_vec->getData(); + + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); + + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + Impl::executeOneUInt(in_vec[i], end); + + pos += end - begin; + out_offsets[i] = pos; + } + out_vec.resize(pos); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + bool tryExecuteString(const IColumn *col, ColumnPtr &col_res) const + { + const ColumnString * col_str_in = checkAndGetColumn(col); + + if (col_str_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_str_in->getChars(); + const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); + + size_t size = in_offsets.size(); + + out_offsets.resize(size); + /// reserve `word_size` bytes for each non trailing zero byte from input + `size` bytes for trailing zeros + out_vec.resize((in_vec.size() - size) * word_size + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = in_offsets[i]; + + Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); + + out_offsets[i] = pos - begin; + + prev_offset = new_offset; + } + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + template + bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnDecimal * col_dec = checkAndGetColumn>(col); + if (col_dec) + { + const typename ColumnDecimal::Container & in_vec = col_dec->getData(); + Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); + return true; + } + else + { + return false; + } + } + + static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) + { + const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); + + if (col_fstr_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_fstr_in->getChars(); + + size_t size = col_fstr_in->size(); + + out_offsets.resize(size); + out_vec.resize(in_vec.size() * word_size + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + size_t n = col_fstr_in->getN(); + + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = prev_offset + n; + + Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); + + out_offsets[i] = pos - begin; + prev_offset = new_offset; + } + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + template + bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + if (col_vec) + { + const typename ColumnVector::Container & in_vec = col_vec->getData(); + Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); + return true; + } + else + { + return false; + } + } +}; + +/// Decode number or string from string with binary or hexadecimal representation +template +class DecodeFromBinaryRepr : public IFunction +{ +public: + static constexpr auto name = Impl::name; + static constexpr size_t word_size = Impl::word_size; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + + ColumnString::Chars & out_vec = col_res->getChars(); + ColumnString::Offsets & out_offsets = col_res->getOffsets(); + + const ColumnString::Chars & in_vec = col->getChars(); + const ColumnString::Offsets & in_offsets = col->getOffsets(); + + size_t size = in_offsets.size(); + out_offsets.resize(size); + out_vec.resize(in_vec.size() / word_size + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + size_t prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + size_t new_offset = in_offsets[i]; + + Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); + + out_offsets[i] = pos - begin; + + prev_offset = new_offset; + } + + out_vec.resize(pos - begin); + + return col_res; + } + else + { + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + } +}; + +void registerFunctionsBinaryRepr(FunctionFactory & factory) +{ + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp new file mode 100644 index 00000000000..32c45823e0f --- /dev/null +++ b/src/Functions/FunctionsBitToArray.cpp @@ -0,0 +1,337 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + + +/** Functions for an unusual conversion to a string or array: + * + * bitmaskToList - takes an integer - a bitmask, returns a string of degrees of 2 separated by a comma. + * for example, bitmaskToList(50) = '2,16,32' + * + * bitmaskToArray(x) - Returns an array of powers of two in the binary form of x. For example, bitmaskToArray(50) = [2, 16, 32]. + * + */ + +namespace +{ + +class FunctionBitmaskToList : public IFunction +{ +public: + static constexpr auto name = "bitmaskToList"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const DataTypePtr & type = arguments[0]; + + if (!isInteger(type)) + throw Exception("Cannot format " + type->getName() + " as bitmask string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + ColumnPtr res; + if (!((res = executeType(arguments)) + || (res = executeType(arguments)) + || (res = executeType(arguments)) + || (res = executeType(arguments)) + || (res = executeType(arguments)) + || (res = executeType(arguments)) + || (res = executeType(arguments)) + || (res = executeType(arguments)))) + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + return res; + } + +private: + template + inline static void writeBitmask(T x, WriteBuffer & out) + { + using UnsignedT = make_unsigned_t; + UnsignedT u_x = x; + + bool first = true; + while (u_x) + { + UnsignedT y = u_x & (u_x - 1); + UnsignedT bit = u_x ^ y; + u_x = y; + if (!first) + writeChar(',', out); + first = false; + writeIntText(T(bit), out); + } + } + + template + ColumnPtr executeType(const ColumnsWithTypeAndName & columns) const + { + if (const ColumnVector * col_from = checkAndGetColumn>(columns[0].column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColumnVector::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + data_to.resize(size * 2); + offsets_to.resize(size); + + WriteBufferFromVector buf_to(data_to); + + for (size_t i = 0; i < size; ++i) + { + writeBitmask(vec_from[i], buf_to); + writeChar(0, buf_to); + offsets_to[i] = buf_to.count(); + } + + buf_to.finalize(); + return col_to; + } + + return nullptr; + } +}; + + +class FunctionBitmaskToArray : public IFunction +{ +public: + static constexpr auto name = "bitmaskToArray"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isInteger(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(arguments[0]); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + template + bool tryExecute(const IColumn * column, ColumnPtr & out_column) const + { + using UnsignedT = make_unsigned_t; + + if (const ColumnVector * col_from = checkAndGetColumn>(column)) + { + auto col_values = ColumnVector::create(); + auto col_offsets = ColumnArray::ColumnOffsets::create(); + + typename ColumnVector::Container & res_values = col_values->getData(); + ColumnArray::Offsets & res_offsets = col_offsets->getData(); + + const typename ColumnVector::Container & vec_from = col_from->getData(); + size_t size = vec_from.size(); + res_offsets.resize(size); + res_values.reserve(size * 2); + + for (size_t row = 0; row < size; ++row) + { + UnsignedT x = vec_from[row]; + while (x) + { + UnsignedT y = x & (x - 1); + UnsignedT bit = x ^ y; + x = y; + res_values.push_back(bit); + } + res_offsets[row] = res_values.size(); + } + + out_column = ColumnArray::create(std::move(col_values), std::move(col_offsets)); + return true; + } + else + { + return false; + } + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const IColumn * in_column = arguments[0].column.get(); + ColumnPtr out_column; + + if (tryExecute(in_column, out_column) || + tryExecute(in_column, out_column) || + tryExecute(in_column, out_column) || + tryExecute(in_column, out_column) || + tryExecute(in_column, out_column) || + tryExecute(in_column, out_column) || + tryExecute(in_column, out_column) || + tryExecute(in_column, out_column)) + return out_column; + + throw Exception("Illegal column " + arguments[0].column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + +class FunctionBitPositionsToArray : public IFunction +{ +public: + static constexpr auto name = "bitPositionsToArray"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isInteger(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + getName(), + arguments[0]->getName()); + + return std::make_shared(std::make_shared()); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + template + ColumnPtr executeType(const IColumn * column) const + { + const ColumnVector * col_from = checkAndGetColumn>(column); + if (!col_from) + return nullptr; + + auto result_array_values = ColumnVector::create(); + auto result_array_offsets = ColumnArray::ColumnOffsets::create(); + + auto & result_array_values_data = result_array_values->getData(); + auto & result_array_offsets_data = result_array_offsets->getData(); + + auto & vec_from = col_from->getData(); + size_t size = vec_from.size(); + result_array_offsets_data.resize(size); + result_array_values_data.reserve(size * 2); + + using UnsignedType = make_unsigned_t; + + for (size_t row = 0; row < size; ++row) + { + UnsignedType x = static_cast(vec_from[row]); + + if constexpr (is_big_int_v) + { + size_t position = 0; + + while (x) + { + if (x & 1) + result_array_values_data.push_back(position); + + x >>= 1; + ++position; + } + } + else + { + while (x) + { + result_array_values_data.push_back(getTrailingZeroBitsUnsafe(x)); + x &= (x - 1); + } + } + + result_array_offsets_data[row] = result_array_values_data.size(); + } + + auto result_column = ColumnArray::create(std::move(result_array_values), std::move(result_array_offsets)); + + return result_column; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const IColumn * in_column = arguments[0].column.get(); + ColumnPtr result_column; + + if (!((result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)) + || (result_column = executeType(in_column)))) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), + getName()); + } + + return result_column; + } +}; + +} + +void registerFunctionsBitToArray(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); +} + +} + diff --git a/src/Functions/FunctionsCoding.cpp b/src/Functions/FunctionsCoding.cpp deleted file mode 100644 index f1bbeb5c43f..00000000000 --- a/src/Functions/FunctionsCoding.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include - - -namespace DB -{ - -struct NameFunctionIPv4NumToString { static constexpr auto name = "IPv4NumToString"; }; -struct NameFunctionIPv4NumToStringClassC { static constexpr auto name = "IPv4NumToStringClassC"; }; - - -void registerFunctionsCoding(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(FunctionFactory::CaseInsensitive); - factory.registerFunction(FunctionFactory::CaseInsensitive); - factory.registerFunction(FunctionFactory::CaseInsensitive); - factory.registerFunction(FunctionFactory::CaseInsensitive); - factory.registerFunction(FunctionFactory::CaseInsensitive); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - - factory.registerFunction>(); - factory.registerFunction>(); - /// MysQL compatibility alias. - factory.registerFunction>("INET_NTOA", FunctionFactory::CaseInsensitive); - - factory.registerFunction(); - /// MysQL compatibility alias. - factory.registerFunction("INET_ATON", FunctionFactory::CaseInsensitive); - - factory.registerFunction(); - /// MysQL compatibility alias. - factory.registerFunction("INET6_NTOA", FunctionFactory::CaseInsensitive); - - factory.registerFunction(); - /// MysQL compatibility alias. - factory.registerFunction("INET6_ATON", FunctionFactory::CaseInsensitive); -} - -} diff --git a/src/Functions/FunctionsCoding.h b/src/Functions/FunctionsCoding.h deleted file mode 100644 index 00b09acea1f..00000000000 --- a/src/Functions/FunctionsCoding.h +++ /dev/null @@ -1,2218 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int LOGICAL_ERROR; - extern const int ILLEGAL_COLUMN; -} - - -/** TODO This file contains ridiculous amount of copy-paste. - */ - -/** Encoding functions: - * - * IPv4NumToString (num) - See below. - * IPv4StringToNum(string) - Convert, for example, '192.168.0.1' to 3232235521 and vice versa. - * - * hex(x) - Returns hex; capital letters; there are no prefixes 0x or suffixes h. - * For numbers, returns a variable-length string - hex in the "human" (big endian) format, with the leading zeros being cut, - * but only by whole bytes. For dates and datetimes - the same as for numbers. - * For example, hex(257) = '0101'. - * unhex(string) - Returns a string, hex of which is equal to `string` with regard of case and discarding one leading zero. - * If such a string does not exist, could return arbitrary implementation specific value. - * - * bitmaskToArray(x) - Returns an array of powers of two in the binary form of x. For example, bitmaskToArray(50) = [2, 16, 32]. - */ - - -constexpr size_t uuid_bytes_length = 16; -constexpr size_t uuid_text_length = 36; - -class FunctionIPv6NumToString : public IFunction -{ -public: - static constexpr auto name = "IPv6NumToString"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * ptr = checkAndGetDataType(arguments[0].get()); - if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const auto & col_type_name = arguments[0]; - const ColumnPtr & column = col_type_name.column; - - if (const auto * col_in = checkAndGetColumn(column.get())) - { - if (col_in->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + col_type_name.type->getName() + - " of column " + col_in->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto size = col_in->size(); - const auto & vec_in = col_in->getChars(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1)); - offsets_res.resize(size); - - auto * begin = reinterpret_cast(vec_res.data()); - auto * pos = begin; - - for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i) - { - formatIPv6(reinterpret_cast(&vec_in[offset]), pos); - offsets_res[i] = pos - begin; - } - - vec_res.resize(pos - begin); - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -class FunctionCutIPv6 : public IFunction -{ -public: - static constexpr auto name = "cutIPv6"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 3; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * ptr = checkAndGetDataType(arguments[0].get()); - if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument 1 of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!WhichDataType(arguments[1]).isUInt8()) - throw Exception("Illegal type " + arguments[1]->getName() + - " of argument 2 of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!WhichDataType(arguments[2]).isUInt8()) - throw Exception("Illegal type " + arguments[2]->getName() + - " of argument 3 of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const auto & col_type_name = arguments[0]; - const ColumnPtr & column = col_type_name.column; - - const auto & col_ipv6_zeroed_tail_bytes_type = arguments[1]; - const auto & col_ipv6_zeroed_tail_bytes = col_ipv6_zeroed_tail_bytes_type.column; - const auto & col_ipv4_zeroed_tail_bytes_type = arguments[2]; - const auto & col_ipv4_zeroed_tail_bytes = col_ipv4_zeroed_tail_bytes_type.column; - - if (const auto * col_in = checkAndGetColumn(column.get())) - { - if (col_in->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + col_type_name.type->getName() + - " of column " + col_in->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto * ipv6_zeroed_tail_bytes = checkAndGetColumnConst>(col_ipv6_zeroed_tail_bytes.get()); - if (!ipv6_zeroed_tail_bytes) - throw Exception("Illegal type " + col_ipv6_zeroed_tail_bytes_type.type->getName() + - " of argument 2 of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - UInt8 ipv6_zeroed_tail_bytes_count = ipv6_zeroed_tail_bytes->getValue(); - if (ipv6_zeroed_tail_bytes_count > IPV6_BINARY_LENGTH) - throw Exception("Illegal value for argument 2 " + col_ipv6_zeroed_tail_bytes_type.type->getName() + - " of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto * ipv4_zeroed_tail_bytes = checkAndGetColumnConst>(col_ipv4_zeroed_tail_bytes.get()); - if (!ipv4_zeroed_tail_bytes) - throw Exception("Illegal type " + col_ipv4_zeroed_tail_bytes_type.type->getName() + - " of argument 3 of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - UInt8 ipv4_zeroed_tail_bytes_count = ipv4_zeroed_tail_bytes->getValue(); - if (ipv4_zeroed_tail_bytes_count > IPV6_BINARY_LENGTH) - throw Exception("Illegal value for argument 3 " + col_ipv4_zeroed_tail_bytes_type.type->getName() + - " of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto size = col_in->size(); - const auto & vec_in = col_in->getChars(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1)); - offsets_res.resize(size); - - auto * begin = reinterpret_cast(vec_res.data()); - auto * pos = begin; - - for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i) - { - const auto * address = &vec_in[offset]; - UInt8 zeroed_tail_bytes_count = isIPv4Mapped(address) ? ipv4_zeroed_tail_bytes_count : ipv6_zeroed_tail_bytes_count; - cutAddress(reinterpret_cast(address), pos, zeroed_tail_bytes_count); - offsets_res[i] = pos - begin; - } - - vec_res.resize(pos - begin); - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - -private: - static bool isIPv4Mapped(const UInt8 * address) - { - return (unalignedLoad(address) == 0) && - ((unalignedLoad(address + 8) & 0x00000000FFFFFFFFull) == 0x00000000FFFF0000ull); - } - - static void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count) - { - formatIPv6(address, dst, zeroed_tail_bytes_count); - } -}; - - -class FunctionIPv6StringToNum : public IFunction -{ -public: - static constexpr auto name = "IPv6StringToNum"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - static inline bool tryParseIPv4(const char * pos) - { - UInt32 result = 0; - return DB::parseIPv4(pos, reinterpret_cast(&result)); - } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(IPV6_BINARY_LENGTH); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const auto * col_in = checkAndGetColumn(column.get())) - { - auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); - - auto & vec_res = col_res->getChars(); - vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH); - - const ColumnString::Chars & vec_src = col_in->getChars(); - const ColumnString::Offsets & offsets_src = col_in->getOffsets(); - size_t src_offset = 0; - char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:"; - - for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i) - { - /// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes. - - /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address. - /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address. - if (tryParseIPv4(reinterpret_cast(&vec_src[src_offset]))) - { - std::memcpy( - src_ipv4_buf + std::strlen("::ffff:"), - reinterpret_cast(&vec_src[src_offset]), - std::min(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1)); - parseIPv6(src_ipv4_buf, reinterpret_cast(&vec_res[out_offset])); - } - else - { - parseIPv6( - reinterpret_cast(&vec_src[src_offset]), reinterpret_cast(&vec_res[out_offset])); - } - src_offset = offsets_src[i]; - } - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -/** If mask_tail_octets > 0, the last specified number of octets will be filled with "xxx". - */ -template -class FunctionIPv4NumToString : public IFunction -{ -public: - static constexpr auto name = Name::name; - static FunctionPtr create(ContextPtr) { return std::make_shared>(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return mask_tail_octets == 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!WhichDataType(arguments[0]).isUInt32()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt32", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnUInt32 * col = typeid_cast(column.get())) - { - const ColumnUInt32::Container & vec_in = col->getData(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - - vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 - offsets_res.resize(vec_in.size()); - char * begin = reinterpret_cast(vec_res.data()); - char * pos = begin; - - for (size_t i = 0; i < vec_in.size(); ++i) - { - DB::formatIPv4(reinterpret_cast(&vec_in[i]), pos, mask_tail_octets, "xxx"); - offsets_res[i] = pos - begin; - } - - vec_res.resize(pos - begin); - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -class FunctionIPv4StringToNum : public IFunction -{ -public: - static constexpr auto name = "IPv4StringToNum"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - static inline UInt32 parseIPv4(const char * pos) - { - UInt32 result = 0; - DB::parseIPv4(pos, reinterpret_cast(&result)); - - return result; - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnString * col = checkAndGetColumn(column.get())) - { - auto col_res = ColumnUInt32::create(); - - ColumnUInt32::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); - - const ColumnString::Chars & vec_src = col->getChars(); - const ColumnString::Offsets & offsets_src = col->getOffsets(); - size_t prev_offset = 0; - - for (size_t i = 0; i < vec_res.size(); ++i) - { - vec_res[i] = parseIPv4(reinterpret_cast(&vec_src[prev_offset])); - prev_offset = offsets_src[i]; - } - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -class FunctionIPv4ToIPv6 : public IFunction -{ -public: - static constexpr auto name = "IPv4ToIPv6"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!checkAndGetDataType(arguments[0].get())) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(16); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const auto & col_type_name = arguments[0]; - const ColumnPtr & column = col_type_name.column; - - if (const auto * col_in = typeid_cast(column.get())) - { - auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); - - auto & vec_res = col_res->getChars(); - vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH); - - const auto & vec_in = col_in->getData(); - - for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i) - mapIPv4ToIPv6(vec_in[i], &vec_res[out_offset]); - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - -private: - static void mapIPv4ToIPv6(UInt32 in, UInt8 * buf) - { - unalignedStore(buf, 0); - unalignedStore(buf + 8, 0x00000000FFFF0000ull | (static_cast(ntohl(in)) << 32)); - } -}; - -class FunctionToIPv4 : public FunctionIPv4StringToNum -{ -public: - static constexpr auto name = "toIPv4"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return DataTypeFactory::instance().get("IPv4"); - } -}; - -class FunctionToIPv6 : public FunctionIPv6StringToNum -{ -public: - static constexpr auto name = "toIPv6"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return DataTypeFactory::instance().get("IPv6"); - } -}; - -class FunctionMACNumToString : public IFunction -{ -public: - static constexpr auto name = "MACNumToString"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!WhichDataType(arguments[0]).isUInt64()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - static void formatMAC(UInt64 mac, UInt8 * out) - { - /// MAC address is represented in UInt64 in natural order (so, MAC addresses are compared in same order as UInt64). - /// Higher two bytes in UInt64 are just ignored. - - writeHexByteUppercase(mac >> 40, &out[0]); - out[2] = ':'; - writeHexByteUppercase(mac >> 32, &out[3]); - out[5] = ':'; - writeHexByteUppercase(mac >> 24, &out[6]); - out[8] = ':'; - writeHexByteUppercase(mac >> 16, &out[9]); - out[11] = ':'; - writeHexByteUppercase(mac >> 8, &out[12]); - out[14] = ':'; - writeHexByteUppercase(mac, &out[15]); - out[17] = '\0'; - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnUInt64 * col = typeid_cast(column.get())) - { - const ColumnUInt64::Container & vec_in = col->getData(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - - vec_res.resize(vec_in.size() * 18); /// the value is: xx:xx:xx:xx:xx:xx\0 - offsets_res.resize(vec_in.size()); - - size_t current_offset = 0; - for (size_t i = 0; i < vec_in.size(); ++i) - { - formatMAC(vec_in[i], &vec_res[current_offset]); - current_offset += 18; - offsets_res[i] = current_offset; - } - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -struct ParseMACImpl -{ - static constexpr size_t min_string_size = 17; - static constexpr size_t max_string_size = 17; - - /** Example: 01:02:03:04:05:06. - * There could be any separators instead of : and them are just ignored. - * The order of resulting integers are correspond to the order of MAC address. - * If there are any chars other than valid hex digits for bytes, the behaviour is implementation specific. - */ - static UInt64 parse(const char * pos) - { - return (UInt64(unhex(pos[0])) << 44) - | (UInt64(unhex(pos[1])) << 40) - | (UInt64(unhex(pos[3])) << 36) - | (UInt64(unhex(pos[4])) << 32) - | (UInt64(unhex(pos[6])) << 28) - | (UInt64(unhex(pos[7])) << 24) - | (UInt64(unhex(pos[9])) << 20) - | (UInt64(unhex(pos[10])) << 16) - | (UInt64(unhex(pos[12])) << 12) - | (UInt64(unhex(pos[13])) << 8) - | (UInt64(unhex(pos[15])) << 4) - | (UInt64(unhex(pos[16]))); - } - - static constexpr auto name = "MACStringToNum"; -}; - -struct ParseOUIImpl -{ - static constexpr size_t min_string_size = 8; - static constexpr size_t max_string_size = 17; - - /** OUI is the first three bytes of MAC address. - * Example: 01:02:03. - */ - static UInt64 parse(const char * pos) - { - return (UInt64(unhex(pos[0])) << 20) - | (UInt64(unhex(pos[1])) << 16) - | (UInt64(unhex(pos[3])) << 12) - | (UInt64(unhex(pos[4])) << 8) - | (UInt64(unhex(pos[6])) << 4) - | (UInt64(unhex(pos[7]))); - } - - static constexpr auto name = "MACStringToOUI"; -}; - - -template -class FunctionMACStringTo : public IFunction -{ -public: - static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared>(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnString * col = checkAndGetColumn(column.get())) - { - auto col_res = ColumnUInt64::create(); - - ColumnUInt64::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); - - const ColumnString::Chars & vec_src = col->getChars(); - const ColumnString::Offsets & offsets_src = col->getOffsets(); - size_t prev_offset = 0; - - for (size_t i = 0; i < vec_res.size(); ++i) - { - size_t current_offset = offsets_src[i]; - size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte - - if (string_size >= Impl::min_string_size && string_size <= Impl::max_string_size) - vec_res[i] = Impl::parse(reinterpret_cast(&vec_src[prev_offset])); - else - vec_res[i] = 0; - - prev_offset = current_offset; - } - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -class FunctionUUIDNumToString : public IFunction -{ - -public: - static constexpr auto name = "UUIDNumToString"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * ptr = checkAndGetDataType(arguments[0].get()); - if (!ptr || ptr->getN() != uuid_bytes_length) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + - ", expected FixedString(" + toString(uuid_bytes_length) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnWithTypeAndName & col_type_name = arguments[0]; - const ColumnPtr & column = col_type_name.column; - - if (const auto * col_in = checkAndGetColumn(column.get())) - { - if (col_in->getN() != uuid_bytes_length) - throw Exception("Illegal type " + col_type_name.type->getName() + - " of column " + col_in->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(uuid_bytes_length) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto size = col_in->size(); - const auto & vec_in = col_in->getChars(); - - auto col_res = ColumnString::create(); - - ColumnString::Chars & vec_res = col_res->getChars(); - ColumnString::Offsets & offsets_res = col_res->getOffsets(); - vec_res.resize(size * (uuid_text_length + 1)); - offsets_res.resize(size); - - size_t src_offset = 0; - size_t dst_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - formatUUID(&vec_in[src_offset], &vec_res[dst_offset]); - src_offset += uuid_bytes_length; - dst_offset += uuid_text_length; - vec_res[dst_offset] = 0; - ++dst_offset; - offsets_res[i] = dst_offset; - } - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -class FunctionUUIDStringToNum : public IFunction -{ -private: - static void parseHex(const UInt8 * __restrict src, UInt8 * __restrict dst, const size_t num_bytes) - { - size_t src_pos = 0; - size_t dst_pos = 0; - for (; dst_pos < num_bytes; ++dst_pos) - { - dst[dst_pos] = unhex2(reinterpret_cast(&src[src_pos])); - src_pos += 2; - } - } - - static void parseUUID(const UInt8 * src36, UInt8 * dst16) - { - /// If string is not like UUID - implementation specific behaviour. - - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[9], &dst16[4], 2); - parseHex(&src36[14], &dst16[6], 2); - parseHex(&src36[19], &dst16[8], 2); - parseHex(&src36[24], &dst16[10], 6); - } - -public: - static constexpr auto name = "UUIDStringToNum"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - /// String or FixedString(36) - if (!isString(arguments[0])) - { - const auto * ptr = checkAndGetDataType(arguments[0].get()); - if (!ptr || ptr->getN() != uuid_text_length) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + - ", expected FixedString(" + toString(uuid_text_length) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - return std::make_shared(uuid_bytes_length); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnWithTypeAndName & col_type_name = arguments[0]; - const ColumnPtr & column = col_type_name.column; - - if (const auto * col_in = checkAndGetColumn(column.get())) - { - const auto & vec_in = col_in->getChars(); - const auto & offsets_in = col_in->getOffsets(); - const size_t size = offsets_in.size(); - - auto col_res = ColumnFixedString::create(uuid_bytes_length); - - ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); - - size_t src_offset = 0; - size_t dst_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - /// If string has incorrect length - then return zero UUID. - /// If string has correct length but contains something not like UUID - implementation specific behaviour. - - size_t string_size = offsets_in[i] - src_offset; - if (string_size == uuid_text_length + 1) - parseUUID(&vec_in[src_offset], &vec_res[dst_offset]); - else - memset(&vec_res[dst_offset], 0, uuid_bytes_length); - - dst_offset += uuid_bytes_length; - src_offset += string_size; - } - - return col_res; - } - else if (const auto * col_in_fixed = checkAndGetColumn(column.get())) - { - if (col_in_fixed->getN() != uuid_text_length) - throw Exception("Illegal type " + col_type_name.type->getName() + - " of column " + col_in_fixed->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(uuid_text_length) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto size = col_in_fixed->size(); - const auto & vec_in = col_in_fixed->getChars(); - - auto col_res = ColumnFixedString::create(uuid_bytes_length); - - ColumnString::Chars & vec_res = col_res->getChars(); - vec_res.resize(size * uuid_bytes_length); - - size_t src_offset = 0; - size_t dst_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - parseUUID(&vec_in[src_offset], &vec_res[dst_offset]); - src_offset += uuid_text_length; - dst_offset += uuid_bytes_length; - } - - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - } -}; - -/// Encode number or string to string with binary or hexadecimal representation -template -class EncodeToBinaryRepr : public IFunction -{ -public: - static constexpr auto name = Impl::name; - static constexpr size_t word_size = Impl::word_size; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - - bool useDefaultImplementationForConstants() const override { return true; } - - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - WhichDataType which(arguments[0]); - - if (!which.isStringOrFixedString() && - !which.isDate() && - !which.isDateTime() && - !which.isDateTime64() && - !which.isUInt() && - !which.isFloat() && - !which.isDecimal() && - !which.isAggregateFunction()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * column = arguments[0].column.get(); - ColumnPtr res_column; - - WhichDataType which(column->getDataType()); - if (which.isAggregateFunction()) - { - const ColumnPtr to_string = castColumn(arguments[0], std::make_shared()); - const auto * str_column = checkAndGetColumn(to_string.get()); - tryExecuteString(str_column, res_column); - return res_column; - } - - if (tryExecuteUInt(column, res_column) || - tryExecuteUInt(column, res_column) || - tryExecuteUInt(column, res_column) || - tryExecuteUInt(column, res_column) || - tryExecuteString(column, res_column) || - tryExecuteFixedString(column, res_column) || - tryExecuteFloat(column, res_column) || - tryExecuteFloat(column, res_column) || - tryExecuteDecimal(column, res_column) || - tryExecuteDecimal(column, res_column) || - tryExecuteDecimal(column, res_column)) - return res_column; - - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - - template - bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnVector * col_vec = checkAndGetColumn>(col); - - static constexpr size_t MAX_LENGTH = sizeof(T) * word_size + 1; /// Including trailing zero byte. - - if (col_vec) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const typename ColumnVector::Container & in_vec = col_vec->getData(); - - size_t size = in_vec.size(); - out_offsets.resize(size); - out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. - - size_t pos = 0; - for (size_t i = 0; i < size; ++i) - { - /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). - if (pos + MAX_LENGTH > out_vec.size()) - out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); - - char * begin = reinterpret_cast(&out_vec[pos]); - char * end = begin; - Impl::executeOneUInt(in_vec[i], end); - - pos += end - begin; - out_offsets[i] = pos; - } - out_vec.resize(pos); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - bool tryExecuteString(const IColumn *col, ColumnPtr &col_res) const - { - const ColumnString * col_str_in = checkAndGetColumn(col); - - if (col_str_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_str_in->getChars(); - const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); - - size_t size = in_offsets.size(); - - out_offsets.resize(size); - /// reserve `word_size` bytes for each non trailing zero byte from input + `size` bytes for trailing zeros - out_vec.resize((in_vec.size() - size) * word_size + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = in_offsets[i]; - - Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); - - out_offsets[i] = pos - begin; - - prev_offset = new_offset; - } - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - template - bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnDecimal * col_dec = checkAndGetColumn>(col); - if (col_dec) - { - const typename ColumnDecimal::Container & in_vec = col_dec->getData(); - Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); - return true; - } - else - { - return false; - } - } - - static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); - - if (col_fstr_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_fstr_in->getChars(); - - size_t size = col_fstr_in->size(); - - out_offsets.resize(size); - out_vec.resize(in_vec.size() * word_size + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - size_t n = col_fstr_in->getN(); - - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = prev_offset + n; - - Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); - - out_offsets[i] = pos - begin; - prev_offset = new_offset; - } - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - template - bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const - { - const ColumnVector * col_vec = checkAndGetColumn>(col); - if (col_vec) - { - const typename ColumnVector::Container & in_vec = col_vec->getData(); - Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); - return true; - } - else - { - return false; - } - } -}; - -/// Decode number or string from string with binary or hexadecimal representation -template -class DecodeFromBinaryRepr : public IFunction -{ -public: - static constexpr auto name = Impl::name; - static constexpr size_t word_size = Impl::word_size; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnString * col = checkAndGetColumn(column.get())) - { - auto col_res = ColumnString::create(); - - ColumnString::Chars & out_vec = col_res->getChars(); - ColumnString::Offsets & out_offsets = col_res->getOffsets(); - - const ColumnString::Chars & in_vec = col->getChars(); - const ColumnString::Offsets & in_offsets = col->getOffsets(); - - size_t size = in_offsets.size(); - out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - size_t prev_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - size_t new_offset = in_offsets[i]; - - Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); - - out_offsets[i] = pos - begin; - - prev_offset = new_offset; - } - - out_vec.resize(pos - begin); - - return col_res; - } - else - { - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - } -}; - -struct HexImpl -{ - static constexpr auto name = "hex"; - static constexpr size_t word_size = 2; - - template - static void executeOneUInt(T x, char *& out) - { - bool was_nonzero = false; - for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) - { - UInt8 byte = x >> offset; - - /// Skip leading zeros - if (byte == 0 && !was_nonzero && offset) - continue; - - was_nonzero = true; - writeHexByteUppercase(byte, out); - out += word_size; - } - *out = '\0'; - ++out; - } - - static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) - { - while (pos < end) - { - writeHexByteUppercase(*pos, out); - ++pos; - out += word_size; - } - *out = '\0'; - ++out; - } - - template - static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) - { - const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. - auto col_str = ColumnString::create(); - - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - size_t size = in_vec.size(); - out_offsets.resize(size); - out_vec.resize(size * hex_length); - - size_t pos = 0; - char * out = reinterpret_cast(&out_vec[0]); - for (size_t i = 0; i < size; ++i) - { - const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); - executeOneString(in_pos, in_pos + type_size_in_bytes, out); - - pos += hex_length; - out_offsets[i] = pos; - } - col_res = std::move(col_str); - } -}; - -struct UnhexImpl -{ - static constexpr auto name = "unhex"; - static constexpr size_t word_size = 2; - - static void decode(const char * pos, const char * end, char *& out) - { - if ((end - pos) & 1) - { - *out = unhex(*pos); - ++out; - ++pos; - } - while (pos < end) - { - *out = unhex2(pos); - pos += word_size; - ++out; - } - *out = '\0'; - ++out; - } -}; - -struct BinImpl -{ - static constexpr auto name = "bin"; - static constexpr size_t word_size = 8; - - template - static void executeOneUInt(T x, char *& out) - { - bool was_nonzero = false; - for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) - { - UInt8 byte = x >> offset; - - /// Skip leading zeros - if (byte == 0 && !was_nonzero && offset) - continue; - - was_nonzero = true; - writeBinByte(byte, out); - out += word_size; - } - *out = '\0'; - ++out; - } - - template - static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) - { - const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. - auto col_str = ColumnString::create(); - - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - size_t size = in_vec.size(); - out_offsets.resize(size); - out_vec.resize(size * hex_length); - - size_t pos = 0; - char * out = reinterpret_cast(out_vec.data()); - for (size_t i = 0; i < size; ++i) - { - const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); - executeOneString(in_pos, in_pos + type_size_in_bytes, out); - - pos += hex_length; - out_offsets[i] = pos; - } - col_res = std::move(col_str); - } - - static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out) - { - while (pos < end) - { - writeBinByte(*pos, out); - ++pos; - out += word_size; - } - *out = '\0'; - ++out; - } -}; - -struct UnbinImpl -{ - static constexpr auto name = "unbin"; - static constexpr size_t word_size = 8; - - static void decode(const char * pos, const char * end, char *& out) - { - if (pos == end) - { - *out = '\0'; - ++out; - return; - } - - UInt8 left = 0; - - /// end - pos is the length of input. - /// (length & 7) to make remain bits length mod 8 is zero to split. - /// e.g. the length is 9 and the input is "101000001", - /// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1 - /// then, left_cnt is 0, remain input is '01000001'. - for (UInt8 left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt) - { - left = left << 1; - if (*pos != '0') - left += 1; - ++pos; - } - - if (left != 0 || end - pos == 0) - { - *out = left; - ++out; - } - - assert((end - pos) % 8 == 0); - - while (end - pos != 0) - { - UInt8 c = 0; - for (UInt8 i = 0; i < 8; ++i) - { - c = c << 1; - if (*pos != '0') - c += 1; - ++pos; - } - *out = c; - ++out; - } - - *out = '\0'; - ++out; - } -}; - -using FunctionHex = EncodeToBinaryRepr; -using FunctionUnhex = DecodeFromBinaryRepr; -using FunctionBin = EncodeToBinaryRepr; -using FunctionUnbin = DecodeFromBinaryRepr; - -class FunctionChar : public IFunction -{ -public: - static constexpr auto name = "char"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.empty()) - throw Exception("Number of arguments for function " + getName() + " can't be " + toString(arguments.size()) - + ", should be at least 1", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - for (const auto & arg : arguments) - { - WhichDataType which(arg); - if (!(which.isInt() || which.isUInt() || which.isFloat())) - throw Exception("Illegal type " + arg->getName() + " of argument of function " + getName() - + ", must be Int, UInt or Float number", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const auto size_per_row = arguments.size() + 1; - out_vec.resize(size_per_row * input_rows_count); - out_offsets.resize(input_rows_count); - - for (size_t row = 0; row < input_rows_count; ++row) - { - out_offsets[row] = size_per_row + out_offsets[row - 1]; - out_vec[row * size_per_row + size_per_row - 1] = '\0'; - } - - Columns columns_holder(arguments.size()); - for (size_t idx = 0; idx < arguments.size(); ++idx) - { - //partial const column - columns_holder[idx] = arguments[idx].column->convertToFullColumnIfConst(); - const IColumn * column = columns_holder[idx].get(); - - if (!(executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row) - || executeNumber(*column, out_vec, idx, input_rows_count, size_per_row))) - { - throw Exception{"Illegal column " + arguments[idx].column->getName() - + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; - } - } - - return col_str; - } - -private: - template - bool executeNumber(const IColumn & src_data, ColumnString::Chars & out_vec, const size_t & column_idx, const size_t & rows, const size_t & size_per_row) const - { - const ColumnVector * src_data_concrete = checkAndGetColumn>(&src_data); - - if (!src_data_concrete) - { - return false; - } - - for (size_t row = 0; row < rows; ++row) - { - out_vec[row * size_per_row + column_idx] = static_cast(src_data_concrete->getInt(row)); - } - return true; - } -}; - -class FunctionBitmaskToArray : public IFunction -{ -public: - static constexpr auto name = "bitmaskToArray"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isInteger(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(arguments[0]); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - template - bool tryExecute(const IColumn * column, ColumnPtr & out_column) const - { - using UnsignedT = make_unsigned_t; - - if (const ColumnVector * col_from = checkAndGetColumn>(column)) - { - auto col_values = ColumnVector::create(); - auto col_offsets = ColumnArray::ColumnOffsets::create(); - - typename ColumnVector::Container & res_values = col_values->getData(); - ColumnArray::Offsets & res_offsets = col_offsets->getData(); - - const typename ColumnVector::Container & vec_from = col_from->getData(); - size_t size = vec_from.size(); - res_offsets.resize(size); - res_values.reserve(size * 2); - - for (size_t row = 0; row < size; ++row) - { - UnsignedT x = vec_from[row]; - while (x) - { - UnsignedT y = x & (x - 1); - UnsignedT bit = x ^ y; - x = y; - res_values.push_back(bit); - } - res_offsets[row] = res_values.size(); - } - - out_column = ColumnArray::create(std::move(col_values), std::move(col_offsets)); - return true; - } - else - { - return false; - } - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * in_column = arguments[0].column.get(); - ColumnPtr out_column; - - if (tryExecute(in_column, out_column) || - tryExecute(in_column, out_column) || - tryExecute(in_column, out_column) || - tryExecute(in_column, out_column) || - tryExecute(in_column, out_column) || - tryExecute(in_column, out_column) || - tryExecute(in_column, out_column) || - tryExecute(in_column, out_column)) - return out_column; - - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - -class FunctionBitPositionsToArray : public IFunction -{ -public: - static constexpr auto name = "bitPositionsToArray"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isInteger(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument of function {}", - getName(), - arguments[0]->getName()); - - return std::make_shared(std::make_shared()); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - template - ColumnPtr executeType(const IColumn * column) const - { - const ColumnVector * col_from = checkAndGetColumn>(column); - if (!col_from) - return nullptr; - - auto result_array_values = ColumnVector::create(); - auto result_array_offsets = ColumnArray::ColumnOffsets::create(); - - auto & result_array_values_data = result_array_values->getData(); - auto & result_array_offsets_data = result_array_offsets->getData(); - - auto & vec_from = col_from->getData(); - size_t size = vec_from.size(); - result_array_offsets_data.resize(size); - result_array_values_data.reserve(size * 2); - - using UnsignedType = make_unsigned_t; - - for (size_t row = 0; row < size; ++row) - { - UnsignedType x = static_cast(vec_from[row]); - - if constexpr (is_big_int_v) - { - size_t position = 0; - - while (x) - { - if (x & 1) - result_array_values_data.push_back(position); - - x >>= 1; - ++position; - } - } - else - { - while (x) - { - result_array_values_data.push_back(getTrailingZeroBitsUnsafe(x)); - x &= (x - 1); - } - } - - result_array_offsets_data[row] = result_array_values_data.size(); - } - - auto result_column = ColumnArray::create(std::move(result_array_values), std::move(result_array_offsets)); - - return result_column; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * in_column = arguments[0].column.get(); - ColumnPtr result_column; - - if (!((result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)) - || (result_column = executeType(in_column)))) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), - getName()); - } - - return result_column; - } -}; - -class FunctionToStringCutToZero : public IFunction -{ -public: - static constexpr auto name = "toStringCutToZero"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isStringOrFixedString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnString * col_str_in = checkAndGetColumn(col); - - if (col_str_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_str_in->getChars(); - const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); - - size_t size = in_offsets.size(); - out_offsets.resize(size); - out_vec.resize(in_vec.size()); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - ColumnString::Offset current_in_offset = 0; - - for (size_t i = 0; i < size; ++i) - { - const char * pos_in = reinterpret_cast(&in_vec[current_in_offset]); - size_t current_size = strlen(pos_in); - memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size); - pos += current_size; - *pos = '\0'; - ++pos; - out_offsets[i] = pos - begin; - current_in_offset = in_offsets[i]; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) - { - const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); - - if (col_fstr_in) - { - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const ColumnString::Chars & in_vec = col_fstr_in->getChars(); - - size_t size = col_fstr_in->size(); - - out_offsets.resize(size); - out_vec.resize(in_vec.size() + size); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - const char * pos_in = reinterpret_cast(in_vec.data()); - - size_t n = col_fstr_in->getN(); - - for (size_t i = 0; i < size; ++i) - { - size_t current_size = strnlen(pos_in, n); - memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size); - pos += current_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - pos_in += n; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - col_res = std::move(col_str); - return true; - } - else - { - return false; - } - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * column = arguments[0].column.get(); - ColumnPtr res_column; - - if (tryExecuteFixedString(column, res_column) || tryExecuteString(column, res_column)) - return res_column; - - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - - -class FunctionIPv6CIDRToRange : public IFunction -{ -private: - -#if defined(__SSE2__) - - #include - - static inline void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep) - { - __m128i mask = _mm_loadu_si128(reinterpret_cast(getCIDRMaskIPv6(bits_to_keep).data())); - __m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast(src)), mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst_lower), lower); - - __m128i inv_mask = _mm_xor_si128(mask, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - __m128i upper = _mm_or_si128(lower, inv_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst_upper), upper); - } - -#else - - /// NOTE IPv6 is stored in memory in big endian format that makes some difficulties. - static void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep) - { - const auto & mask = getCIDRMaskIPv6(bits_to_keep); - - for (size_t i = 0; i < 16; ++i) - { - dst_lower[i] = src[i] & mask[i]; - dst_upper[i] = dst_lower[i] | ~mask[i]; - } - } - -#endif - -public: - static constexpr auto name = "IPv6CIDRToRange"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const auto * first_argument = checkAndGetDataType(arguments[0].get()); - if (!first_argument || first_argument->getN() != IPV6_BINARY_LENGTH) - throw Exception("Illegal type " + arguments[0]->getName() + - " of first argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const DataTypePtr & second_argument = arguments[1]; - if (!isUInt8(second_argument)) - throw Exception{"Illegal type " + second_argument->getName() - + " of second argument of function " + getName() - + ", expected UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - DataTypePtr element = DataTypeFactory::instance().get("IPv6"); - return std::make_shared(DataTypes{element, element}); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto & col_type_name_ip = arguments[0]; - const ColumnPtr & column_ip = col_type_name_ip.column; - - const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); - const auto * col_ip_in = checkAndGetColumn(column_ip.get()); - - if (!col_ip_in && !col_const_ip_in) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - if ((col_const_ip_in && col_const_ip_in->getValue().size() != IPV6_BINARY_LENGTH) || - (col_ip_in && col_ip_in->getN() != IPV6_BINARY_LENGTH)) - throw Exception("Illegal type " + col_type_name_ip.type->getName() + - " of column " + column_ip->getName() + - " argument of function " + getName() + - ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto & col_type_name_cidr = arguments[1]; - const ColumnPtr & column_cidr = col_type_name_cidr.column; - - const auto * col_const_cidr_in = checkAndGetColumnConst(column_cidr.get()); - const auto * col_cidr_in = checkAndGetColumn(column_cidr.get()); - - if (!col_const_cidr_in && !col_cidr_in) - throw Exception("Illegal column " + arguments[1].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - auto col_res_lower_range = ColumnFixedString::create(IPV6_BINARY_LENGTH); - auto col_res_upper_range = ColumnFixedString::create(IPV6_BINARY_LENGTH); - - ColumnString::Chars & vec_res_lower_range = col_res_lower_range->getChars(); - vec_res_lower_range.resize(input_rows_count * IPV6_BINARY_LENGTH); - - ColumnString::Chars & vec_res_upper_range = col_res_upper_range->getChars(); - vec_res_upper_range.resize(input_rows_count * IPV6_BINARY_LENGTH); - - static constexpr UInt8 max_cidr_mask = IPV6_BINARY_LENGTH * 8; - - const String col_const_ip_str = col_const_ip_in ? col_const_ip_in->getValue() : ""; - const UInt8 * col_const_ip_value = col_const_ip_in ? reinterpret_cast(col_const_ip_str.c_str()) : nullptr; - - for (size_t offset = 0; offset < input_rows_count; ++offset) - { - const size_t offset_ipv6 = offset * IPV6_BINARY_LENGTH; - - const UInt8 * ip = col_const_ip_in - ? col_const_ip_value - : &col_ip_in->getChars()[offset_ipv6]; - - UInt8 cidr = col_const_cidr_in - ? col_const_cidr_in->getValue() - : col_cidr_in->getData()[offset]; - - cidr = std::min(cidr, max_cidr_mask); - - applyCIDRMask(ip, &vec_res_lower_range[offset_ipv6], &vec_res_upper_range[offset_ipv6], cidr); - } - - return ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)}); - } -}; - - -class FunctionIPv4CIDRToRange : public IFunction -{ -private: - static inline std::pair applyCIDRMask(UInt32 src, UInt8 bits_to_keep) - { - if (bits_to_keep >= 8 * sizeof(UInt32)) - return { src, src }; - if (bits_to_keep == 0) - return { UInt32(0), UInt32(-1) }; - - UInt32 mask = UInt32(-1) << (8 * sizeof(UInt32) - bits_to_keep); - UInt32 lower = src & mask; - UInt32 upper = lower | ~mask; - - return { lower, upper }; - } - -public: - static constexpr auto name = "IPv4CIDRToRange"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!WhichDataType(arguments[0]).isUInt32()) - throw Exception("Illegal type " + arguments[0]->getName() + - " of first argument of function " + getName() + - ", expected UInt32", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - - const DataTypePtr & second_argument = arguments[1]; - if (!isUInt8(second_argument)) - throw Exception{"Illegal type " + second_argument->getName() - + " of second argument of function " + getName() - + ", expected UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - DataTypePtr element = DataTypeFactory::instance().get("IPv4"); - return std::make_shared(DataTypes{element, element}); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto & col_type_name_ip = arguments[0]; - const ColumnPtr & column_ip = col_type_name_ip.column; - - const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); - const auto * col_ip_in = checkAndGetColumn(column_ip.get()); - if (!col_const_ip_in && !col_ip_in) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - const auto & col_type_name_cidr = arguments[1]; - const ColumnPtr & column_cidr = col_type_name_cidr.column; - - const auto * col_const_cidr_in = checkAndGetColumnConst(column_cidr.get()); - const auto * col_cidr_in = checkAndGetColumn(column_cidr.get()); - - if (!col_const_cidr_in && !col_cidr_in) - throw Exception("Illegal column " + arguments[1].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - auto col_res_lower_range = ColumnUInt32::create(); - auto col_res_upper_range = ColumnUInt32::create(); - - auto & vec_res_lower_range = col_res_lower_range->getData(); - vec_res_lower_range.resize(input_rows_count); - - auto & vec_res_upper_range = col_res_upper_range->getData(); - vec_res_upper_range.resize(input_rows_count); - - for (size_t i = 0; i < input_rows_count; ++i) - { - UInt32 ip = col_const_ip_in - ? col_const_ip_in->getValue() - : col_ip_in->getData()[i]; - - UInt8 cidr = col_const_cidr_in - ? col_const_cidr_in->getValue() - : col_cidr_in->getData()[i]; - - std::tie(vec_res_lower_range[i], vec_res_upper_range[i]) = applyCIDRMask(ip, cidr); - } - - return ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)}); - } -}; - -class FunctionIsIPv4String : public FunctionIPv4StringToNum -{ -public: - static constexpr auto name = "isIPv4String"; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - if (const ColumnString * col = checkAndGetColumn(column.get())) - { - auto col_res = ColumnUInt8::create(); - - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); - - const ColumnString::Chars & vec_src = col->getChars(); - const ColumnString::Offsets & offsets_src = col->getOffsets(); - size_t prev_offset = 0; - UInt32 result = 0; - - for (size_t i = 0; i < vec_res.size(); ++i) - { - vec_res[i] = DB::parseIPv4(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&result)); - prev_offset = offsets_src[i]; - } - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - -class FunctionIsIPv6String : public FunctionIPv6StringToNum -{ -public: - static constexpr auto name = "isIPv6String"; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const ColumnPtr & column = arguments[0].column; - - if (const ColumnString * col = checkAndGetColumn(column.get())) - { - auto col_res = ColumnUInt8::create(); - - ColumnUInt8::Container & vec_res = col_res->getData(); - vec_res.resize(col->size()); - - const ColumnString::Chars & vec_src = col->getChars(); - const ColumnString::Offsets & offsets_src = col->getOffsets(); - size_t prev_offset = 0; - char v[IPV6_BINARY_LENGTH]; - - for (size_t i = 0; i < vec_res.size(); ++i) - { - vec_res[i] = DB::parseIPv6(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(v)); - prev_offset = offsets_src[i]; - } - return col_res; - } - else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - -} diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp new file mode 100644 index 00000000000..20af7d41aca --- /dev/null +++ b/src/Functions/FunctionsCodingIP.cpp @@ -0,0 +1,1077 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + + +/** Encoding functions for network addresses: + * + * IPv4NumToString (num) - See below. + * IPv4StringToNum(string) - Convert, for example, '192.168.0.1' to 3232235521 and vice versa. + */ +class FunctionIPv6NumToString : public IFunction +{ +public: + static constexpr auto name = "IPv6NumToString"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * ptr = checkAndGetDataType(arguments[0].get()); + if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH) + throw Exception("Illegal type " + arguments[0]->getName() + + " of argument of function " + getName() + + ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const auto & col_type_name = arguments[0]; + const ColumnPtr & column = col_type_name.column; + + if (const auto * col_in = checkAndGetColumn(column.get())) + { + if (col_in->getN() != IPV6_BINARY_LENGTH) + throw Exception("Illegal type " + col_type_name.type->getName() + + " of column " + col_in->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto size = col_in->size(); + const auto & vec_in = col_in->getChars(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1)); + offsets_res.resize(size); + + auto * begin = reinterpret_cast(vec_res.data()); + auto * pos = begin; + + for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i) + { + formatIPv6(reinterpret_cast(&vec_in[offset]), pos); + offsets_res[i] = pos - begin; + } + + vec_res.resize(pos - begin); + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +class FunctionCutIPv6 : public IFunction +{ +public: + static constexpr auto name = "cutIPv6"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * ptr = checkAndGetDataType(arguments[0].get()); + if (!ptr || ptr->getN() != IPV6_BINARY_LENGTH) + throw Exception("Illegal type " + arguments[0]->getName() + + " of argument 1 of function " + getName() + + ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!WhichDataType(arguments[1]).isUInt8()) + throw Exception("Illegal type " + arguments[1]->getName() + + " of argument 2 of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!WhichDataType(arguments[2]).isUInt8()) + throw Exception("Illegal type " + arguments[2]->getName() + + " of argument 3 of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const auto & col_type_name = arguments[0]; + const ColumnPtr & column = col_type_name.column; + + const auto & col_ipv6_zeroed_tail_bytes_type = arguments[1]; + const auto & col_ipv6_zeroed_tail_bytes = col_ipv6_zeroed_tail_bytes_type.column; + const auto & col_ipv4_zeroed_tail_bytes_type = arguments[2]; + const auto & col_ipv4_zeroed_tail_bytes = col_ipv4_zeroed_tail_bytes_type.column; + + if (const auto * col_in = checkAndGetColumn(column.get())) + { + if (col_in->getN() != IPV6_BINARY_LENGTH) + throw Exception("Illegal type " + col_type_name.type->getName() + + " of column " + col_in->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto * ipv6_zeroed_tail_bytes = checkAndGetColumnConst>(col_ipv6_zeroed_tail_bytes.get()); + if (!ipv6_zeroed_tail_bytes) + throw Exception("Illegal type " + col_ipv6_zeroed_tail_bytes_type.type->getName() + + " of argument 2 of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + UInt8 ipv6_zeroed_tail_bytes_count = ipv6_zeroed_tail_bytes->getValue(); + if (ipv6_zeroed_tail_bytes_count > IPV6_BINARY_LENGTH) + throw Exception("Illegal value for argument 2 " + col_ipv6_zeroed_tail_bytes_type.type->getName() + + " of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto * ipv4_zeroed_tail_bytes = checkAndGetColumnConst>(col_ipv4_zeroed_tail_bytes.get()); + if (!ipv4_zeroed_tail_bytes) + throw Exception("Illegal type " + col_ipv4_zeroed_tail_bytes_type.type->getName() + + " of argument 3 of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + UInt8 ipv4_zeroed_tail_bytes_count = ipv4_zeroed_tail_bytes->getValue(); + if (ipv4_zeroed_tail_bytes_count > IPV6_BINARY_LENGTH) + throw Exception("Illegal value for argument 3 " + col_ipv4_zeroed_tail_bytes_type.type->getName() + + " of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto size = col_in->size(); + const auto & vec_in = col_in->getChars(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1)); + offsets_res.resize(size); + + auto * begin = reinterpret_cast(vec_res.data()); + auto * pos = begin; + + for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += IPV6_BINARY_LENGTH, ++i) + { + const auto * address = &vec_in[offset]; + UInt8 zeroed_tail_bytes_count = isIPv4Mapped(address) ? ipv4_zeroed_tail_bytes_count : ipv6_zeroed_tail_bytes_count; + cutAddress(reinterpret_cast(address), pos, zeroed_tail_bytes_count); + offsets_res[i] = pos - begin; + } + + vec_res.resize(pos - begin); + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + +private: + static bool isIPv4Mapped(const UInt8 * address) + { + return (unalignedLoad(address) == 0) && + ((unalignedLoad(address + 8) & 0x00000000FFFFFFFFull) == 0x00000000FFFF0000ull); + } + + static void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count) + { + formatIPv6(address, dst, zeroed_tail_bytes_count); + } +}; + + +class FunctionIPv6StringToNum : public IFunction +{ +public: + static constexpr auto name = "IPv6StringToNum"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + static inline bool tryParseIPv4(const char * pos) + { + UInt32 result = 0; + return DB::parseIPv4(pos, reinterpret_cast(&result)); + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(IPV6_BINARY_LENGTH); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const auto * col_in = checkAndGetColumn(column.get())) + { + auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); + + auto & vec_res = col_res->getChars(); + vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH); + + const ColumnString::Chars & vec_src = col_in->getChars(); + const ColumnString::Offsets & offsets_src = col_in->getOffsets(); + size_t src_offset = 0; + char src_ipv4_buf[sizeof("::ffff:") + IPV4_MAX_TEXT_LENGTH + 1] = "::ffff:"; + + for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i) + { + /// For both cases below: In case of failure, the function parseIPv6 fills vec_res with zero bytes. + + /// If the source IP address is parsable as an IPv4 address, then transform it into a valid IPv6 address. + /// Keeping it simple by just prefixing `::ffff:` to the IPv4 address to represent it as a valid IPv6 address. + if (tryParseIPv4(reinterpret_cast(&vec_src[src_offset]))) + { + std::memcpy( + src_ipv4_buf + std::strlen("::ffff:"), + reinterpret_cast(&vec_src[src_offset]), + std::min(offsets_src[i] - src_offset, IPV4_MAX_TEXT_LENGTH + 1)); + parseIPv6(src_ipv4_buf, reinterpret_cast(&vec_res[out_offset])); + } + else + { + parseIPv6( + reinterpret_cast(&vec_src[src_offset]), reinterpret_cast(&vec_res[out_offset])); + } + src_offset = offsets_src[i]; + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +/** If mask_tail_octets > 0, the last specified number of octets will be filled with "xxx". + */ +template +class FunctionIPv4NumToString : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return mask_tail_octets == 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!WhichDataType(arguments[0]).isUInt32()) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt32", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnUInt32 * col = typeid_cast(column.get())) + { + const ColumnUInt32::Container & vec_in = col->getData(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + + vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0 + offsets_res.resize(vec_in.size()); + char * begin = reinterpret_cast(vec_res.data()); + char * pos = begin; + + for (size_t i = 0; i < vec_in.size(); ++i) + { + DB::formatIPv4(reinterpret_cast(&vec_in[i]), pos, mask_tail_octets, "xxx"); + offsets_res[i] = pos - begin; + } + + vec_res.resize(pos - begin); + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +class FunctionIPv4StringToNum : public IFunction +{ +public: + static constexpr auto name = "IPv4StringToNum"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + static inline UInt32 parseIPv4(const char * pos) + { + UInt32 result = 0; + DB::parseIPv4(pos, reinterpret_cast(&result)); + + return result; + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnUInt32::create(); + + ColumnUInt32::Container & vec_res = col_res->getData(); + vec_res.resize(col->size()); + + const ColumnString::Chars & vec_src = col->getChars(); + const ColumnString::Offsets & offsets_src = col->getOffsets(); + size_t prev_offset = 0; + + for (size_t i = 0; i < vec_res.size(); ++i) + { + vec_res[i] = parseIPv4(reinterpret_cast(&vec_src[prev_offset])); + prev_offset = offsets_src[i]; + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +class FunctionIPv4ToIPv6 : public IFunction +{ +public: + static constexpr auto name = "IPv4ToIPv6"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!checkAndGetDataType(arguments[0].get())) + throw Exception("Illegal type " + arguments[0]->getName() + + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(16); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const auto & col_type_name = arguments[0]; + const ColumnPtr & column = col_type_name.column; + + if (const auto * col_in = typeid_cast(column.get())) + { + auto col_res = ColumnFixedString::create(IPV6_BINARY_LENGTH); + + auto & vec_res = col_res->getChars(); + vec_res.resize(col_in->size() * IPV6_BINARY_LENGTH); + + const auto & vec_in = col_in->getData(); + + for (size_t out_offset = 0, i = 0; out_offset < vec_res.size(); out_offset += IPV6_BINARY_LENGTH, ++i) + mapIPv4ToIPv6(vec_in[i], &vec_res[out_offset]); + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + +private: + static void mapIPv4ToIPv6(UInt32 in, UInt8 * buf) + { + unalignedStore(buf, 0); + unalignedStore(buf + 8, 0x00000000FFFF0000ull | (static_cast(ntohl(in)) << 32)); + } +}; + +class FunctionToIPv4 : public FunctionIPv4StringToNum +{ +public: + static constexpr auto name = "toIPv4"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return DataTypeFactory::instance().get("IPv4"); + } +}; + +class FunctionToIPv6 : public FunctionIPv6StringToNum +{ +public: + static constexpr auto name = "toIPv6"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return DataTypeFactory::instance().get("IPv6"); + } +}; + +class FunctionMACNumToString : public IFunction +{ +public: + static constexpr auto name = "MACNumToString"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!WhichDataType(arguments[0]).isUInt64()) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected UInt64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + static void formatMAC(UInt64 mac, UInt8 * out) + { + /// MAC address is represented in UInt64 in natural order (so, MAC addresses are compared in same order as UInt64). + /// Higher two bytes in UInt64 are just ignored. + + writeHexByteUppercase(mac >> 40, &out[0]); + out[2] = ':'; + writeHexByteUppercase(mac >> 32, &out[3]); + out[5] = ':'; + writeHexByteUppercase(mac >> 24, &out[6]); + out[8] = ':'; + writeHexByteUppercase(mac >> 16, &out[9]); + out[11] = ':'; + writeHexByteUppercase(mac >> 8, &out[12]); + out[14] = ':'; + writeHexByteUppercase(mac, &out[15]); + out[17] = '\0'; + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnUInt64 * col = typeid_cast(column.get())) + { + const ColumnUInt64::Container & vec_in = col->getData(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + + vec_res.resize(vec_in.size() * 18); /// the value is: xx:xx:xx:xx:xx:xx\0 + offsets_res.resize(vec_in.size()); + + size_t current_offset = 0; + for (size_t i = 0; i < vec_in.size(); ++i) + { + formatMAC(vec_in[i], &vec_res[current_offset]); + current_offset += 18; + offsets_res[i] = current_offset; + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +struct ParseMACImpl +{ + static constexpr size_t min_string_size = 17; + static constexpr size_t max_string_size = 17; + + /** Example: 01:02:03:04:05:06. + * There could be any separators instead of : and them are just ignored. + * The order of resulting integers are correspond to the order of MAC address. + * If there are any chars other than valid hex digits for bytes, the behaviour is implementation specific. + */ + static UInt64 parse(const char * pos) + { + return (UInt64(unhex(pos[0])) << 44) + | (UInt64(unhex(pos[1])) << 40) + | (UInt64(unhex(pos[3])) << 36) + | (UInt64(unhex(pos[4])) << 32) + | (UInt64(unhex(pos[6])) << 28) + | (UInt64(unhex(pos[7])) << 24) + | (UInt64(unhex(pos[9])) << 20) + | (UInt64(unhex(pos[10])) << 16) + | (UInt64(unhex(pos[12])) << 12) + | (UInt64(unhex(pos[13])) << 8) + | (UInt64(unhex(pos[15])) << 4) + | (UInt64(unhex(pos[16]))); + } + + static constexpr auto name = "MACStringToNum"; +}; + +struct ParseOUIImpl +{ + static constexpr size_t min_string_size = 8; + static constexpr size_t max_string_size = 17; + + /** OUI is the first three bytes of MAC address. + * Example: 01:02:03. + */ + static UInt64 parse(const char * pos) + { + return (UInt64(unhex(pos[0])) << 20) + | (UInt64(unhex(pos[1])) << 16) + | (UInt64(unhex(pos[3])) << 12) + | (UInt64(unhex(pos[4])) << 8) + | (UInt64(unhex(pos[6])) << 4) + | (UInt64(unhex(pos[7]))); + } + + static constexpr auto name = "MACStringToOUI"; +}; + + +template +class FunctionMACStringTo : public IFunction +{ +public: + static constexpr auto name = Impl::name; + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnUInt64::create(); + + ColumnUInt64::Container & vec_res = col_res->getData(); + vec_res.resize(col->size()); + + const ColumnString::Chars & vec_src = col->getChars(); + const ColumnString::Offsets & offsets_src = col->getOffsets(); + size_t prev_offset = 0; + + for (size_t i = 0; i < vec_res.size(); ++i) + { + size_t current_offset = offsets_src[i]; + size_t string_size = current_offset - prev_offset - 1; /// mind the terminating zero byte + + if (string_size >= Impl::min_string_size && string_size <= Impl::max_string_size) + vec_res[i] = Impl::parse(reinterpret_cast(&vec_src[prev_offset])); + else + vec_res[i] = 0; + + prev_offset = current_offset; + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + +class FunctionIPv6CIDRToRange : public IFunction +{ +private: + +#if defined(__SSE2__) + +#include + + static inline void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep) + { + __m128i mask = _mm_loadu_si128(reinterpret_cast(getCIDRMaskIPv6(bits_to_keep).data())); + __m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast(src)), mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst_lower), lower); + + __m128i inv_mask = _mm_xor_si128(mask, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + __m128i upper = _mm_or_si128(lower, inv_mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst_upper), upper); + } + +#else + + /// NOTE IPv6 is stored in memory in big endian format that makes some difficulties. + static void applyCIDRMask(const UInt8 * __restrict src, UInt8 * __restrict dst_lower, UInt8 * __restrict dst_upper, UInt8 bits_to_keep) + { + const auto & mask = getCIDRMaskIPv6(bits_to_keep); + + for (size_t i = 0; i < 16; ++i) + { + dst_lower[i] = src[i] & mask[i]; + dst_upper[i] = dst_lower[i] | ~mask[i]; + } + } + +#endif + +public: + static constexpr auto name = "IPv6CIDRToRange"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * first_argument = checkAndGetDataType(arguments[0].get()); + if (!first_argument || first_argument->getN() != IPV6_BINARY_LENGTH) + throw Exception("Illegal type " + arguments[0]->getName() + + " of first argument of function " + getName() + + ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const DataTypePtr & second_argument = arguments[1]; + if (!isUInt8(second_argument)) + throw Exception{"Illegal type " + second_argument->getName() + + " of second argument of function " + getName() + + ", expected UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + DataTypePtr element = DataTypeFactory::instance().get("IPv6"); + return std::make_shared(DataTypes{element, element}); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_type_name_ip = arguments[0]; + const ColumnPtr & column_ip = col_type_name_ip.column; + + const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); + const auto * col_ip_in = checkAndGetColumn(column_ip.get()); + + if (!col_ip_in && !col_const_ip_in) + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + if ((col_const_ip_in && col_const_ip_in->getValue().size() != IPV6_BINARY_LENGTH) || + (col_ip_in && col_ip_in->getN() != IPV6_BINARY_LENGTH)) + throw Exception("Illegal type " + col_type_name_ip.type->getName() + + " of column " + column_ip->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(IPV6_BINARY_LENGTH) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto & col_type_name_cidr = arguments[1]; + const ColumnPtr & column_cidr = col_type_name_cidr.column; + + const auto * col_const_cidr_in = checkAndGetColumnConst(column_cidr.get()); + const auto * col_cidr_in = checkAndGetColumn(column_cidr.get()); + + if (!col_const_cidr_in && !col_cidr_in) + throw Exception("Illegal column " + arguments[1].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + auto col_res_lower_range = ColumnFixedString::create(IPV6_BINARY_LENGTH); + auto col_res_upper_range = ColumnFixedString::create(IPV6_BINARY_LENGTH); + + ColumnString::Chars & vec_res_lower_range = col_res_lower_range->getChars(); + vec_res_lower_range.resize(input_rows_count * IPV6_BINARY_LENGTH); + + ColumnString::Chars & vec_res_upper_range = col_res_upper_range->getChars(); + vec_res_upper_range.resize(input_rows_count * IPV6_BINARY_LENGTH); + + static constexpr UInt8 max_cidr_mask = IPV6_BINARY_LENGTH * 8; + + const String col_const_ip_str = col_const_ip_in ? col_const_ip_in->getValue() : ""; + const UInt8 * col_const_ip_value = col_const_ip_in ? reinterpret_cast(col_const_ip_str.c_str()) : nullptr; + + for (size_t offset = 0; offset < input_rows_count; ++offset) + { + const size_t offset_ipv6 = offset * IPV6_BINARY_LENGTH; + + const UInt8 * ip = col_const_ip_in + ? col_const_ip_value + : &col_ip_in->getChars()[offset_ipv6]; + + UInt8 cidr = col_const_cidr_in + ? col_const_cidr_in->getValue() + : col_cidr_in->getData()[offset]; + + cidr = std::min(cidr, max_cidr_mask); + + applyCIDRMask(ip, &vec_res_lower_range[offset_ipv6], &vec_res_upper_range[offset_ipv6], cidr); + } + + return ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)}); + } +}; + + +class FunctionIPv4CIDRToRange : public IFunction +{ +private: + static inline std::pair applyCIDRMask(UInt32 src, UInt8 bits_to_keep) + { + if (bits_to_keep >= 8 * sizeof(UInt32)) + return { src, src }; + if (bits_to_keep == 0) + return { UInt32(0), UInt32(-1) }; + + UInt32 mask = UInt32(-1) << (8 * sizeof(UInt32) - bits_to_keep); + UInt32 lower = src & mask; + UInt32 upper = lower | ~mask; + + return { lower, upper }; + } + +public: + static constexpr auto name = "IPv4CIDRToRange"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!WhichDataType(arguments[0]).isUInt32()) + throw Exception("Illegal type " + arguments[0]->getName() + + " of first argument of function " + getName() + + ", expected UInt32", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + + const DataTypePtr & second_argument = arguments[1]; + if (!isUInt8(second_argument)) + throw Exception{"Illegal type " + second_argument->getName() + + " of second argument of function " + getName() + + ", expected UInt8", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + DataTypePtr element = DataTypeFactory::instance().get("IPv4"); + return std::make_shared(DataTypes{element, element}); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto & col_type_name_ip = arguments[0]; + const ColumnPtr & column_ip = col_type_name_ip.column; + + const auto * col_const_ip_in = checkAndGetColumnConst(column_ip.get()); + const auto * col_ip_in = checkAndGetColumn(column_ip.get()); + if (!col_const_ip_in && !col_ip_in) + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + const auto & col_type_name_cidr = arguments[1]; + const ColumnPtr & column_cidr = col_type_name_cidr.column; + + const auto * col_const_cidr_in = checkAndGetColumnConst(column_cidr.get()); + const auto * col_cidr_in = checkAndGetColumn(column_cidr.get()); + + if (!col_const_cidr_in && !col_cidr_in) + throw Exception("Illegal column " + arguments[1].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + auto col_res_lower_range = ColumnUInt32::create(); + auto col_res_upper_range = ColumnUInt32::create(); + + auto & vec_res_lower_range = col_res_lower_range->getData(); + vec_res_lower_range.resize(input_rows_count); + + auto & vec_res_upper_range = col_res_upper_range->getData(); + vec_res_upper_range.resize(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + UInt32 ip = col_const_ip_in + ? col_const_ip_in->getValue() + : col_ip_in->getData()[i]; + + UInt8 cidr = col_const_cidr_in + ? col_const_cidr_in->getValue() + : col_cidr_in->getData()[i]; + + std::tie(vec_res_lower_range[i], vec_res_upper_range[i]) = applyCIDRMask(ip, cidr); + } + + return ColumnTuple::create(Columns{std::move(col_res_lower_range), std::move(col_res_upper_range)}); + } +}; + +class FunctionIsIPv4String : public FunctionIPv4StringToNum +{ +public: + static constexpr auto name = "isIPv4String"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnUInt8::create(); + + ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(col->size()); + + const ColumnString::Chars & vec_src = col->getChars(); + const ColumnString::Offsets & offsets_src = col->getOffsets(); + size_t prev_offset = 0; + UInt32 result = 0; + + for (size_t i = 0; i < vec_res.size(); ++i) + { + vec_res[i] = DB::parseIPv4(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(&result)); + prev_offset = offsets_src[i]; + } + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + +class FunctionIsIPv6String : public FunctionIPv6StringToNum +{ +public: + static constexpr auto name = "isIPv6String"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr & column = arguments[0].column; + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnUInt8::create(); + + ColumnUInt8::Container & vec_res = col_res->getData(); + vec_res.resize(col->size()); + + const ColumnString::Chars & vec_src = col->getChars(); + const ColumnString::Offsets & offsets_src = col->getOffsets(); + size_t prev_offset = 0; + char v[IPV6_BINARY_LENGTH]; + + for (size_t i = 0; i < vec_res.size(); ++i) + { + vec_res[i] = DB::parseIPv6(reinterpret_cast(&vec_src[prev_offset]), reinterpret_cast(v)); + prev_offset = offsets_src[i]; + } + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + +struct NameFunctionIPv4NumToString { static constexpr auto name = "IPv4NumToString"; }; +struct NameFunctionIPv4NumToStringClassC { static constexpr auto name = "IPv4NumToStringClassC"; }; + +void registerFunctionsCoding(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + + factory.registerFunction>(); + factory.registerFunction>(); + + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + + /// MysQL compatibility aliases: + factory.registerAlias("INET_ATON", FunctionIPv4StringToNum::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("INET6_NTOA", FunctionIPv6NumToString::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("INET6_ATON", FunctionIPv6StringToNum::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("INET_NTOA", NameFunctionIPv4NumToString::name, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/FunctionsCodingUUID.cpp b/src/Functions/FunctionsCodingUUID.cpp new file mode 100644 index 00000000000..5f3e7b0de4a --- /dev/null +++ b/src/Functions/FunctionsCodingUUID.cpp @@ -0,0 +1,236 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +constexpr size_t uuid_bytes_length = 16; +constexpr size_t uuid_text_length = 36; + +class FunctionUUIDNumToString : public IFunction +{ + +public: + static constexpr auto name = "UUIDNumToString"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * ptr = checkAndGetDataType(arguments[0].get()); + if (!ptr || ptr->getN() != uuid_bytes_length) + throw Exception("Illegal type " + arguments[0]->getName() + + " of argument of function " + getName() + + ", expected FixedString(" + toString(uuid_bytes_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnWithTypeAndName & col_type_name = arguments[0]; + const ColumnPtr & column = col_type_name.column; + + if (const auto * col_in = checkAndGetColumn(column.get())) + { + if (col_in->getN() != uuid_bytes_length) + throw Exception("Illegal type " + col_type_name.type->getName() + + " of column " + col_in->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(uuid_bytes_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto size = col_in->size(); + const auto & vec_in = col_in->getChars(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + vec_res.resize(size * (uuid_text_length + 1)); + offsets_res.resize(size); + + size_t src_offset = 0; + size_t dst_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + formatUUID(&vec_in[src_offset], &vec_res[dst_offset]); + src_offset += uuid_bytes_length; + dst_offset += uuid_text_length; + vec_res[dst_offset] = 0; + ++dst_offset; + offsets_res[i] = dst_offset; + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +class FunctionUUIDStringToNum : public IFunction +{ +private: + static void parseHex(const UInt8 * __restrict src, UInt8 * __restrict dst, const size_t num_bytes) + { + size_t src_pos = 0; + size_t dst_pos = 0; + for (; dst_pos < num_bytes; ++dst_pos) + { + dst[dst_pos] = unhex2(reinterpret_cast(&src[src_pos])); + src_pos += 2; + } + } + + static void parseUUID(const UInt8 * src36, UInt8 * dst16) + { + /// If string is not like UUID - implementation specific behaviour. + + parseHex(&src36[0], &dst16[0], 4); + parseHex(&src36[9], &dst16[4], 2); + parseHex(&src36[14], &dst16[6], 2); + parseHex(&src36[19], &dst16[8], 2); + parseHex(&src36[24], &dst16[10], 6); + } + +public: + static constexpr auto name = "UUIDStringToNum"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + /// String or FixedString(36) + if (!isString(arguments[0])) + { + const auto * ptr = checkAndGetDataType(arguments[0].get()); + if (!ptr || ptr->getN() != uuid_text_length) + throw Exception("Illegal type " + arguments[0]->getName() + + " of argument of function " + getName() + + ", expected FixedString(" + toString(uuid_text_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(uuid_bytes_length); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnWithTypeAndName & col_type_name = arguments[0]; + const ColumnPtr & column = col_type_name.column; + + if (const auto * col_in = checkAndGetColumn(column.get())) + { + const auto & vec_in = col_in->getChars(); + const auto & offsets_in = col_in->getOffsets(); + const size_t size = offsets_in.size(); + + auto col_res = ColumnFixedString::create(uuid_bytes_length); + + ColumnString::Chars & vec_res = col_res->getChars(); + vec_res.resize(size * uuid_bytes_length); + + size_t src_offset = 0; + size_t dst_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + /// If string has incorrect length - then return zero UUID. + /// If string has correct length but contains something not like UUID - implementation specific behaviour. + + size_t string_size = offsets_in[i] - src_offset; + if (string_size == uuid_text_length + 1) + parseUUID(&vec_in[src_offset], &vec_res[dst_offset]); + else + memset(&vec_res[dst_offset], 0, uuid_bytes_length); + + dst_offset += uuid_bytes_length; + src_offset += string_size; + } + + return col_res; + } + else if (const auto * col_in_fixed = checkAndGetColumn(column.get())) + { + if (col_in_fixed->getN() != uuid_text_length) + throw Exception("Illegal type " + col_type_name.type->getName() + + " of column " + col_in_fixed->getName() + + " argument of function " + getName() + + ", expected FixedString(" + toString(uuid_text_length) + ")", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto size = col_in_fixed->size(); + const auto & vec_in = col_in_fixed->getChars(); + + auto col_res = ColumnFixedString::create(uuid_bytes_length); + + ColumnString::Chars & vec_res = col_res->getChars(); + vec_res.resize(size * uuid_bytes_length); + + size_t src_offset = 0; + size_t dst_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + parseUUID(&vec_in[src_offset], &vec_res[dst_offset]); + src_offset += uuid_text_length; + dst_offset += uuid_bytes_length; + } + + return col_res; + } + else + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } +}; + +void registerFunctionsCodingUUID(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index bef2d1816f2..67a02e3fd34 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -132,7 +132,7 @@ struct ConvertImpl if (std::is_same_v) { - if (isDate(named_from.type)) + if (isDateOrDate32(named_from.type)) throw Exception("Illegal type " + named_from.type->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } @@ -285,6 +285,10 @@ struct ConvertImpl template struct ConvertImpl : DateTimeTransformImpl {}; +/** Conversion of DateTime to Date32: throw off time component. + */ +template struct ConvertImpl + : DateTimeTransformImpl {}; /** Conversion of Date to DateTime: adding 00:00:00 time component. */ @@ -297,6 +301,11 @@ struct ToDateTimeImpl return time_zone.fromDayNum(DayNum(d)); } + static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(ExtendedDayNum(d)); + } + static inline UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) { return dt; @@ -312,6 +321,9 @@ struct ToDateTimeImpl template struct ConvertImpl : DateTimeTransformImpl {}; +template struct ConvertImpl + : DateTimeTransformImpl {}; + /// Implementation of toDate function. template @@ -322,7 +334,7 @@ struct ToDateTransform32Or64 static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { // since converting to Date, no need in values outside of default LUT range. - return (from < 0xFFFF) + return (from < DATE_LUT_MAX_DAY_NUM) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } @@ -339,7 +351,7 @@ struct ToDateTransform32Or64Signed /// The function should be monotonic (better for query optimizations), so we saturate instead of overflow. if (from < 0) return 0; - return (from < 0xFFFF) + return (from < DATE_LUT_MAX_DAY_NUM) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } @@ -358,6 +370,48 @@ struct ToDateTransform8Or16Signed } }; +/// Implementation of toDate32 function. + +template +struct ToDate32Transform32Or64 +{ + static constexpr auto name = "toDate32"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + ? from + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); + } +}; + +template +struct ToDate32Transform32Or64Signed +{ + static constexpr auto name = "toDate32"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + static const Int32 daynum_min_offset = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + if (from < daynum_min_offset) + return daynum_min_offset; + return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + ? from + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); + } +}; + +template +struct ToDate32Transform8Or16Signed +{ + static constexpr auto name = "toDate32"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + return from; + } +}; + /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, * Float32, Float64) to Date. If the number is negative, saturate it to unix epoch time. If the * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, @@ -384,6 +438,23 @@ template struct ConvertImpl struct ConvertImpl : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; + template struct ToDateTimeTransform64 @@ -580,36 +651,55 @@ template struct ConvertImpl struct FormatImpl { - static void execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) + template + static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) { writeText(x, wb); + return ReturnType(true); } }; template <> struct FormatImpl { - static void execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl *) + template + static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl *) { writeDateText(DayNum(x), wb); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl *) + { + writeDateText(ExtendedDayNum(x), wb); + return ReturnType(true); } }; template <> struct FormatImpl { - static void execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) + template + static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) { writeDateTimeText(x, wb, *time_zone); + return ReturnType(true); } }; template <> struct FormatImpl { - static void execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) + template + static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) { writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); + return ReturnType(true); } }; @@ -617,18 +707,34 @@ struct FormatImpl template struct FormatImpl> { - static void execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) { - writeString(type->getNameForValue(x), wb); + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (throw_exception) + { + writeString(type->getNameForValue(x), wb); + } + else + { + StringRef res; + bool is_ok = type->getNameForValue(x, res); + if (is_ok) + writeString(res, wb); + return ReturnType(is_ok); + } } }; template struct FormatImpl> { - static void execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) { writeText(x, type->getScale(), wb); + return ReturnType(true); } }; @@ -643,6 +749,16 @@ struct ConvertImpl, DataTypeNumber, Name, Con } }; +static ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) +{ + ColumnUInt8::MutablePtr null_map = nullptr; + if (const auto * col_null = checkAndGetColumn(col.get())) + { + null_map = ColumnUInt8::create(); + null_map->insertRangeFrom(col_null->getNullMapColumn(), 0, col_null->size()); + } + return null_map; +} template struct ConvertImpl, DataTypeString>, Name, ConvertDefaultBehaviorTag> @@ -652,13 +768,18 @@ struct ConvertImpl(*col_with_type_and_name.type); const DateLUTImpl * time_zone = nullptr; /// For argument of DateTime type, second argument with time zone could be specified. if constexpr (std::is_same_v || std::is_same_v) - time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) { @@ -684,14 +805,30 @@ struct ConvertImpl write_buffer(data_to); - for (size_t i = 0; i < size; ++i) + if (null_map) { - FormatImpl::execute(vec_from[i], write_buffer, &type, time_zone); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); + for (size_t i = 0; i < size; ++i) + { + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } } write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); return col_to; } else @@ -705,9 +842,11 @@ struct ConvertImplisNullable() && null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); return col_to; } }; @@ -807,7 +949,9 @@ inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuff { ExtendedDayNum tmp(0); if (!tryReadDateText(tmp, rb)) + { return false; + } x = tmp; return true; } @@ -1036,7 +1180,9 @@ struct ConvertThroughParsing SerializationDecimal::readText( vec_to[i], read_buffer, ToDataType::maxPrecision(), vec_to.getScale()); else + { parseImpl(vec_to[i], read_buffer, local_time_zone); + } } if (!isAllRead(read_buffer)) @@ -1086,7 +1232,16 @@ struct ConvertThroughParsing parsed = false; if (!parsed) - vec_to[i] = static_cast(0); + { + if constexpr (std::is_same_v) + { + vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + } + else + { + vec_to[i] = static_cast(0); + } + } if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) (*vec_null_map_to)[i] = !parsed; @@ -1360,6 +1515,8 @@ public: || std::is_same_v // toDate(value[, timezone : String]) || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. + // toDate(value[, timezone : String]) + || std::is_same_v // toDateTime(value[, timezone: String]) || std::is_same_v // toDateTime64(value, scale : Integer[, timezone: String]) @@ -1420,7 +1577,11 @@ public: /// Function actually uses default implementation for nulls, /// but we need to know if return type is Nullable or not, /// so we use checked_return_type only to intercept the first call to getReturnTypeImpl(...). - bool useDefaultImplementationForNulls() const override { return checked_return_type; } + bool useDefaultImplementationForNulls() const override + { + bool to_nullable_string = to_nullable && std::is_same_v; + return checked_return_type && !to_nullable_string; + } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override @@ -1485,7 +1646,10 @@ private: throw Exception{"Function " + getName() + " expects at least 1 argument", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; - const IDataType * from_type = arguments[0].type.get(); + if (result_type->onlyNull()) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + const DataTypePtr from_type = removeNullable(arguments[0].type); ColumnPtr result_column; auto call = [&](const auto & types, const auto & tag) -> bool @@ -1540,7 +1704,9 @@ private: result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); } else + { result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); + } return true; }; @@ -1581,7 +1747,7 @@ private: /// Generic conversion of any type to String. if (std::is_same_v) { - return ConvertImplGenericToString::execute(arguments); + return ConvertImplGenericToString::execute(arguments, result_type); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(), @@ -1912,7 +2078,7 @@ struct ToDateMonotonicity static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) { auto which = WhichDataType(type); - if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16()) + if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16()) return {true, true, true}; else if ( (which.isUInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) @@ -1954,8 +2120,8 @@ struct ToStringMonotonicity if (const auto * low_cardinality_type = checkAndGetDataType(type_ptr)) type_ptr = low_cardinality_type->getDictionaryType().get(); - /// `toString` function is monotonous if the argument is Date or DateTime or String, or non-negative numbers with the same number of symbols. - if (checkDataTypes(type_ptr)) + /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols. + if (checkDataTypes(type_ptr)) return positive; if (left.isNull() || right.isNull()) @@ -2043,6 +2209,7 @@ template <> struct FunctionTo { using Type = FunctionToInt256; } template <> struct FunctionTo { using Type = FunctionToFloat32; }; template <> struct FunctionTo { using Type = FunctionToFloat64; }; template <> struct FunctionTo { using Type = FunctionToDate; }; +template <> struct FunctionTo { using Type = FunctionToDate32; }; template <> struct FunctionTo { using Type = FunctionToDateTime; }; template <> struct FunctionTo { using Type = FunctionToDateTime64; }; template <> struct FunctionTo { using Type = FunctionToUUID; }; @@ -2435,7 +2602,7 @@ private: UInt32 scale = to_type->getScale(); WhichDataType which(type_index); - bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDate() || which.isDateTime() || which.isDateTime64() + bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isStringOrFixedString(); if (!ok) { @@ -3097,6 +3264,7 @@ private: std::is_same_v || std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || std::is_same_v) { @@ -3196,6 +3364,8 @@ public: return monotonicityForType(type); if (const auto * type = checkAndGetDataType(to_type)) return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); if (const auto * type = checkAndGetDataType(to_type)) return monotonicityForType(type); if (const auto * type = checkAndGetDataType(to_type)) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 58cdb68d100..c00baf2850b 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -113,7 +113,8 @@ public: virtual ~IFunctionBase() = default; - virtual ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const + virtual ColumnPtr execute( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const { return prepare(arguments)->execute(arguments, result_type, input_rows_count, dry_run); } @@ -161,7 +162,8 @@ public: * Arguments are passed without modifications, useDefaultImplementationForNulls, useDefaultImplementationForConstants, * useDefaultImplementationForLowCardinality are not applied. */ - virtual ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const { return nullptr; } + virtual ColumnPtr getConstantResultForNonConstArguments( + const ColumnsWithTypeAndName & /* arguments */, const DataTypePtr & /* result_type */) const { return nullptr; } /** Function is called "injective" if it returns different result for different values of arguments. * Example: hex, negate, tuple... diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index f3b279faaef..a390abc4eaf 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -58,10 +58,10 @@ struct CountEqualAction namespace Impl { template < - class ConcreteAction, + typename ConcreteAction, bool RightArgIsConstant = false, - class IntegralInitial = UInt64, - class IntegralResult = UInt64> + typename IntegralInitial = UInt64, + typename IntegralResult = UInt64> struct Main { private: @@ -94,13 +94,13 @@ private: } /// LowCardinality - static bool compare(const IColumn & left, const Result& right, size_t i, size_t) + static bool compare(const IColumn & left, const Result & right, size_t i, size_t) { return left.getUInt(i) == right; } /// Generic - static bool compare(const IColumn& left, const IColumn& right, size_t i, size_t j) + static bool compare(const IColumn & left, const IColumn & right, size_t i, size_t j) { return 0 == left.compareAt(i, RightArgIsConstant ? 0 : j, right, 1); } @@ -109,7 +109,7 @@ private: static constexpr bool hasNull(const NullMap * const null_map, size_t i) noexcept { return (*null_map)[i]; } - template + template static void process( const Data & data, const ArrOffsets & offsets, const Target & target, ResultArr & result, [[maybe_unused]] const NullMap * const null_map_data, @@ -148,7 +148,7 @@ private: continue; } else if (!compare(data, target, current_offset + j, i)) - continue; + continue; ConcreteAction::apply(current, j); @@ -162,7 +162,7 @@ private: } public: - template + template static void vector( const Data & data, const ArrOffsets & offsets, @@ -183,7 +183,7 @@ public: }; /// When the 2nd function argument is a NULL value. -template +template struct Null { using ResultType = typename ConcreteAction::ResultType; @@ -227,7 +227,7 @@ struct Null } }; -template +template struct String { private: @@ -350,7 +350,7 @@ public: }; } -template +template class FunctionArrayIndex : public IFunction { public: @@ -565,7 +565,7 @@ private: * Integral s = {s1, s2, ...} * (s1, s1, s2, ...), (s2, s1, s2, ...), (s3, s1, s2, ...) */ - template + template static inline ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); @@ -590,14 +590,14 @@ private: return nullptr; } - template + template static inline bool executeIntegral(ExecutionData& data) { return (executeIntegralExpanded(data) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... - template + template static inline bool executeIntegralExpanded(ExecutionData& data) { return (executeIntegralImpl(data) || ...); @@ -608,7 +608,7 @@ private: * second argument, namely, the @e value, so it's possible to invoke the has(Array(Int8), UInt64) e.g. * so we have to check all possible variants for #Initial and #Resulting types. */ - template + template static bool executeIntegralImpl(ExecutionData& data) { const ColumnVector * col_nested = checkAndGetColumn>(&data.left); @@ -647,7 +647,7 @@ private: } /** - * Catches arguments of type LC(T) (left) and U (right). + * Catches arguments of type LowCardinality(T) (left) and U (right). * * The perftests * https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html @@ -726,7 +726,7 @@ private: return col_result; } - else if (col_lc->nestedIsNullable()) // LC(Nullable(T)) and U + else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U { const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T) const ColumnNullable& left_nullable = *checkAndGetColumn(left_casted.get()); @@ -746,16 +746,17 @@ private: ? right_nullable->getNestedColumn() : *right_casted.get(); - ExecutionData data = { + ExecutionData data = + { left_ptr, right_ptr, col_array->getOffsets(), nullptr, {null_map_left_casted, null_map_right_casted}}; - if (dispatchConvertedLCColumns(data)) + if (dispatchConvertedLowCardinalityColumns(data)) return data.result_column; } - else // LC(T) and U, T not Nullable + else // LowCardinality(T) and U, T not Nullable { if (col_arg.isNullable()) return nullptr; @@ -764,24 +765,25 @@ private: arg_lc && arg_lc->isNullable()) return nullptr; - // LC(T) and U (possibly LC(V)) + // LowCardinality(T) and U (possibly LowCardinality(V)) const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality(); - ExecutionData data = { + ExecutionData data = + { *left_casted.get(), *right_casted.get(), col_array->getOffsets(), nullptr, {null_map_data, null_map_item} }; - if (dispatchConvertedLCColumns(data)) + if (dispatchConvertedLowCardinalityColumns(data)) return data.result_column; } return nullptr; } - static bool dispatchConvertedLCColumns(ExecutionData& data) + static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data) { if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays return executeIntegral(data); diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 5b9886580dc..9eefc4f178d 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -31,8 +32,10 @@ class FunctionRange : public IFunction { public: static constexpr auto name = "range"; - static constexpr size_t max_elements = 100'000'000; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + const size_t max_elements; + static FunctionPtr create(ContextPtr context_) { return std::make_shared(std::move(context_)); } + explicit FunctionRange(ContextPtr context) : max_elements(context->getSettingsRef().function_range_max_elements_in_block) {} private: String getName() const override { return name; } diff --git a/src/Functions/bitmaskToList.cpp b/src/Functions/bitmaskToList.cpp deleted file mode 100644 index 8c3105724ac..00000000000 --- a/src/Functions/bitmaskToList.cpp +++ /dev/null @@ -1,132 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; -} - - -/** Function for an unusual conversion to a string: - * - * bitmaskToList - takes an integer - a bitmask, returns a string of degrees of 2 separated by a comma. - * for example, bitmaskToList(50) = '2,16,32' - */ - -namespace -{ - -class FunctionBitmaskToList : public IFunction -{ -public: - static constexpr auto name = "bitmaskToList"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - const DataTypePtr & type = arguments[0]; - - if (!isInteger(type)) - throw Exception("Cannot format " + type->getName() + " as bitmask string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - - return res; - } - -private: - template - inline static void writeBitmask(T x, WriteBuffer & out) - { - using UnsignedT = make_unsigned_t; - UnsignedT u_x = x; - - bool first = true; - while (u_x) - { - UnsignedT y = u_x & (u_x - 1); - UnsignedT bit = u_x ^ y; - u_x = y; - if (!first) - writeChar(',', out); - first = false; - writeIntText(T(bit), out); - } - } - - template - ColumnPtr executeType(const ColumnsWithTypeAndName & columns) const - { - if (const ColumnVector * col_from = checkAndGetColumn>(columns[0].column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColumnVector::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); - - WriteBufferFromVector buf_to(data_to); - - for (size_t i = 0; i < size; ++i) - { - writeBitmask(vec_from[i], buf_to); - writeChar(0, buf_to); - offsets_to[i] = buf_to.count(); - } - - buf_to.finalize(); - return col_to; - } - - return nullptr; - } -}; - -} - -void registerFunctionBitmaskToList(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} - diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in index eb96c13c355..3e1c862300c 100644 --- a/src/Functions/config_functions.h.in +++ b/src/Functions/config_functions.h.in @@ -6,4 +6,5 @@ #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON #cmakedefine01 USE_H3 +#cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_FASTOPS diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 2dad8fc13f2..1b12e6c9ad3 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -46,20 +46,23 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isFloat64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isFloat64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 2, getName()); arg = arguments[2].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(3) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 3, getName()); return std::make_shared(); } diff --git a/src/Functions/geoToS2.cpp b/src/Functions/geoToS2.cpp new file mode 100644 index 00000000000..c415cfade89 --- /dev/null +++ b/src/Functions/geoToS2.cpp @@ -0,0 +1,111 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +class S2CellId; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** + * Accepts points of the form (longitude, latitude) + * Returns s2 identifier + */ +class FunctionGeoToS2 : public IFunction +{ +public: + static constexpr auto name = "geoToS2"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), i, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lon = arguments[0].column.get(); + const auto * col_lat = arguments[1].column.get(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const Float64 lon = col_lon->getFloat64(row); + const Float64 lat = col_lat->getFloat64(row); + + if (isNaN(lon) || isNaN(lat)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments must not be NaN"); + + if (!(isFinite(lon) && isFinite(lat))) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments must not be infinite"); + + /// S2 acceptes point as (latitude, longitude) + S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon); + S2CellId id(lat_lng); + + dst_data[row] = id.id(); + } + + return dst; + } + +}; + +} + +void registerFunctionGeoToS2(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index 071581a7c60..aab8aeaf3a2 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -44,8 +44,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -62,8 +63,10 @@ public: { const int resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, getName(), MAX_H3_RES); // Numerical constant is 180 degrees / pi / Earth radius, Earth radius is from h3 sources Float64 res = 8.99320592271288084e-6 * getHexagonEdgeLengthAvgM(resolution); diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index 56374e10077..3d745b21bd7 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -49,8 +49,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -67,8 +68,10 @@ public: { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, getName(), MAX_H3_RES); Float64 res = getHexagonEdgeLengthAvgM(resolution); diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp index b73245f751b..4c424e4a1ab 100644 --- a/src/Functions/h3GetBaseCell.cpp +++ b/src/Functions/h3GetBaseCell.cpp @@ -41,8 +41,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp index 49ade509934..f387cdac2f0 100644 --- a/src/Functions/h3GetResolution.cpp +++ b/src/Functions/h3GetResolution.cpp @@ -41,8 +41,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index 7f41348a14b..c4c6b5a57b2 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -44,8 +44,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -62,8 +63,10 @@ public: { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, getName(), MAX_H3_RES); Float64 res = getHexagonAreaAvgM2(resolution); diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp index 6507998e24c..2c9ceb9cc32 100644 --- a/src/Functions/h3IndexesAreNeighbors.cpp +++ b/src/Functions/h3IndexesAreNeighbors.cpp @@ -41,14 +41,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 2, getName()); return std::make_shared(); } diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp index bc140450b71..37ec2b99cd9 100644 --- a/src/Functions/h3IsValid.cpp +++ b/src/Functions/h3IsValid.cpp @@ -41,8 +41,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index 88ac3056e72..d0d586cdf19 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -50,14 +50,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 2, getName()); return std::make_shared(std::make_shared()); } @@ -81,14 +83,17 @@ public: const UInt8 child_resolution = col_resolution->getUInt(row); if (child_resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(child_resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", + toString(child_resolution), getName(), toString(MAX_H3_RES)); const size_t vec_size = cellToChildrenSize(parent_hindex, child_resolution); if (vec_size > MAX_ARRAY_SIZE) - throw Exception("The result of function" + getName() - + " (array of " + toString(vec_size) + " elements) will be too large with resolution argument = " - + toString(child_resolution), ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception( + ErrorCodes::TOO_LARGE_ARRAY_SIZE, + "The result of function {} (array of {} elements) will be too large with resolution argument = {}", + getName(), toString(vec_size), toString(child_resolution)); hindex_vec.resize(vec_size); cellToChildren(parent_hindex, child_resolution, hindex_vec.data()); diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp index 9755184d63c..0ec3df37e2e 100644 --- a/src/Functions/h3ToParent.cpp +++ b/src/Functions/h3ToParent.cpp @@ -44,14 +44,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!WhichDataType(arg).isUInt8()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be UInt8", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 2, getName()); return std::make_shared(); } @@ -71,8 +73,10 @@ public: const UInt8 resolution = col_resolution->getUInt(row); if (resolution > MAX_H3_RES) - throw Exception("The argument 'resolution' (" + toString(resolution) + ") of function " + getName() - + " is out of bounds because the maximum resolution in H3 library is " + toString(MAX_H3_RES), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", + toString(resolution), getName(), toString(MAX_H3_RES)); UInt64 res = cellToParent(hindex, resolution); diff --git a/src/Functions/h3ToString.cpp b/src/Functions/h3ToString.cpp index 8ac97db0621..372afb97296 100644 --- a/src/Functions/h3ToString.cpp +++ b/src/Functions/h3ToString.cpp @@ -42,8 +42,9 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); return std::make_shared(); } @@ -67,16 +68,14 @@ public: const UInt64 hindex = col_hindex->getUInt(i); if (!isValidCell(hindex)) - { - throw Exception("Invalid H3 index: " + std::to_string(hindex), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {}", hindex); + h3ToString(hindex, pos, H3_INDEX_STRING_LENGTH); // move to end of the index while (*pos != '\0') - { pos++; - } + vec_offsets[i] = ++pos - begin; } vec_res.resize(pos - begin); diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index 8b91f2fa1c7..583681e315e 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -47,14 +47,16 @@ public: const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isUInt64()) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be UInt64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); arg = arguments[1].get(); if (!isInteger(arg)) throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be integer", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be integer", + arg->getName(), 2, getName()); return std::make_shared(std::make_shared()); } diff --git a/src/Functions/h3toGeo.cpp b/src/Functions/h3toGeo.cpp new file mode 100644 index 00000000000..64facd1f010 --- /dev/null +++ b/src/Functions/h3toGeo.cpp @@ -0,0 +1,96 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/// Implements the function h3ToGeo which takes a single argument (h3Index) +/// and returns the longitude and latitude that correspond to the provided h3 index +class FunctionH3ToGeo : public IFunction +{ +public: + static constexpr auto name = "h3ToGeo"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared( + DataTypes{std::make_shared(), std::make_shared()}, + Strings{"longitude", "latitude"}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_index = arguments[0].column.get(); + + auto latitude = ColumnFloat64::create(input_rows_count); + auto longitude = ColumnFloat64::create(input_rows_count); + + ColumnFloat64::Container & lon_data = longitude->getData(); + ColumnFloat64::Container & lat_data = latitude->getData(); + + + for (size_t row = 0; row < input_rows_count; ++row) + { + H3Index h3index = col_index->getUInt(row); + LatLng coord{}; + + cellToLatLng(h3index,&coord); + lon_data[row] = radsToDegs(coord.lng); + lat_data[row] = radsToDegs(coord.lat); + } + + MutableColumns columns; + columns.emplace_back(std::move(longitude)); + columns.emplace_back(std::move(latitude)); + return ColumnTuple::create(std::move(columns)); + } +}; + +} + +void registerFunctionH3ToGeo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 29343a871a8..7e8f35bc0c4 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -12,7 +12,10 @@ void registerFunctionsArray(FunctionFactory &); void registerFunctionsTuple(FunctionFactory &); void registerFunctionsMap(FunctionFactory &); void registerFunctionsBitmap(FunctionFactory &); +void registerFunctionsBinaryRepr(FunctionFactory &); void registerFunctionsCoding(FunctionFactory &); +void registerFunctionsCodingUUID(FunctionFactory &); +void registerFunctionChar(FunctionFactory &); void registerFunctionsComparison(FunctionFactory &); void registerFunctionsConditional(FunctionFactory &); void registerFunctionsConversion(FunctionFactory &); @@ -73,7 +76,10 @@ void registerFunctions() #if !defined(ARCADIA_BUILD) registerFunctionsBitmap(factory); #endif + registerFunctionsBinaryRepr(factory); registerFunctionsCoding(factory); + registerFunctionsCodingUUID(factory); + registerFunctionChar(factory); registerFunctionsComparison(factory); registerFunctionsConditional(factory); registerFunctionsConversion(factory); diff --git a/src/Functions/registerFunctionsFormatting.cpp b/src/Functions/registerFunctionsFormatting.cpp index ab258589b92..e434b0e49f0 100644 --- a/src/Functions/registerFunctionsFormatting.cpp +++ b/src/Functions/registerFunctionsFormatting.cpp @@ -3,14 +3,14 @@ namespace DB class FunctionFactory; -void registerFunctionBitmaskToList(FunctionFactory &); +void registerFunctionsBitToArray(FunctionFactory &); void registerFunctionFormatReadableSize(FunctionFactory &); void registerFunctionFormatReadableQuantity(FunctionFactory &); void registerFunctionFormatReadableTimeDelta(FunctionFactory &); void registerFunctionsFormatting(FunctionFactory & factory) { - registerFunctionBitmaskToList(factory); + registerFunctionsBitToArray(factory); registerFunctionFormatReadableSize(factory); registerFunctionFormatReadableQuantity(factory); registerFunctionFormatReadableTimeDelta(factory); diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index 605dd4dcba0..eb881870446 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -28,6 +28,7 @@ void registerFunctionSvg(FunctionFactory & factory); #if USE_H3 void registerFunctionGeoToH3(FunctionFactory &); +void registerFunctionH3ToGeo(FunctionFactory &); void registerFunctionH3EdgeAngle(FunctionFactory &); void registerFunctionH3EdgeLengthM(FunctionFactory &); void registerFunctionH3GetResolution(FunctionFactory &); @@ -42,6 +43,19 @@ void registerFunctionH3ToString(FunctionFactory &); void registerFunctionH3HexAreaM2(FunctionFactory &); #endif +#if USE_S2_GEOMETRY +void registerFunctionGeoToS2(FunctionFactory &); +void registerFunctionS2ToGeo(FunctionFactory &); +void registerFunctionS2GetNeighbors(FunctionFactory &); +void registerFunctionS2CellsIntersect(FunctionFactory &); +void registerFunctionS2CapContains(FunctionFactory &); +void registerFunctionS2CapUnion(FunctionFactory &); +void registerFunctionS2RectAdd(FunctionFactory &); +void registerFunctionS2RectContains(FunctionFactory &); +void registerFunctionS2RectUnion(FunctionFactory &); +void registerFunctionS2RectIntersection(FunctionFactory &); +#endif + void registerFunctionsGeo(FunctionFactory & factory) { @@ -66,6 +80,7 @@ void registerFunctionsGeo(FunctionFactory & factory) #if USE_H3 registerFunctionGeoToH3(factory); + registerFunctionH3ToGeo(factory); registerFunctionH3EdgeAngle(factory); registerFunctionH3EdgeLengthM(factory); registerFunctionH3GetResolution(factory); @@ -79,6 +94,19 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3ToString(factory); registerFunctionH3HexAreaM2(factory); #endif + +#if USE_S2_GEOMETRY + registerFunctionGeoToS2(factory); + registerFunctionS2ToGeo(factory); + registerFunctionS2GetNeighbors(factory); + registerFunctionS2CellsIntersect(factory); + registerFunctionS2CapContains(factory); + registerFunctionS2CapUnion(factory); + registerFunctionS2RectAdd(factory); + registerFunctionS2RectContains(factory); + registerFunctionS2RectUnion(factory); + registerFunctionS2RectIntersection(factory); +#endif } } diff --git a/src/Functions/registerFunctionsString.cpp b/src/Functions/registerFunctionsString.cpp index 18a30469386..b0b0e4434bc 100644 --- a/src/Functions/registerFunctionsString.cpp +++ b/src/Functions/registerFunctionsString.cpp @@ -37,6 +37,7 @@ void registerFunctionCountMatches(FunctionFactory &); void registerFunctionEncodeXMLComponent(FunctionFactory &); void registerFunctionDecodeXMLComponent(FunctionFactory &); void registerFunctionExtractTextFromHTML(FunctionFactory &); +void registerFunctionToStringCutToZero(FunctionFactory &); #if USE_BASE64 @@ -77,6 +78,7 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionEncodeXMLComponent(factory); registerFunctionDecodeXMLComponent(factory); registerFunctionExtractTextFromHTML(factory); + registerFunctionToStringCutToZero(factory); #if USE_BASE64 registerFunctionBase64Encode(factory); registerFunctionBase64Decode(factory); diff --git a/src/Functions/s2CapContains.cpp b/src/Functions/s2CapContains.cpp new file mode 100644 index 00000000000..ce2abc14fad --- /dev/null +++ b/src/Functions/s2CapContains.cpp @@ -0,0 +1,132 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * The cap represents a portion of the sphere that has been cut off by a plane. + * It is defined by a point on a sphere and a radius in degrees. + * Imagine that we draw a line through the center of the sphere and our point. + * An infinite number of planes pass through this line, but any plane will intersect the cap in two points. + * Thus the angle is defined by one of this points and the entire line. + * So, the radius of Pi/2 defines a hemisphere and the radius of Pi defines a whole sphere. + * + * This function returns whether a cap contains a point. + */ +class FunctionS2CapContains : public IFunction +{ +public: + static constexpr auto name = "s2CapContains"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 3; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t index = 0; index < getNumberOfArguments(); ++index) + { + const auto * arg = arguments[index].get(); + + /// Radius + if (index == 1) + { + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 2, getName()); + } + else if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), index + 1, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_center = arguments[0].column.get(); + const auto * col_degrees = arguments[1].column.get(); + const auto * col_point = arguments[2].column.get(); + + auto dst = ColumnUInt8::create(); + auto & dst_data = dst->getData(); + dst_data.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto center = S2CellId(col_center->getUInt(row)); + const Float64 degrees = col_degrees->getFloat64(row); + const auto point = S2CellId(col_point->getUInt(row)); + + if (isNaN(degrees)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); + + if (std::isinf(degrees)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be infinite"); + + if (!center.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Center is not valid"); + + if (!point.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S1Angle angle = S1Angle::Degrees(degrees); + S2Cap cap(center.ToPoint(), angle); + + dst_data.emplace_back(cap.Contains(point.ToPoint())); + } + + return dst; + } +}; + +} + +void registerFunctionS2CapContains(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2CapUnion.cpp b/src/Functions/s2CapUnion.cpp new file mode 100644 index 00000000000..4520f436161 --- /dev/null +++ b/src/Functions/s2CapUnion.cpp @@ -0,0 +1,141 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * The cap represents a portion of the sphere that has been cut off by a plane. + * See comment for s2CapContains function. + * This function returns the smallest cap that contains both of input caps. + * It is represented by identifier of the center and a radius. + */ +class FunctionS2CapUnion : public IFunction +{ +public: + static constexpr auto name = "s2CapUnion"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t index = 0; index < getNumberOfArguments(); ++index) + { + const auto * arg = arguments[index].get(); + if (index == 1 || index == 3) + { + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), index + 1, getName()); + } + else if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), index + 1, getName() + ); + } + + DataTypePtr center = std::make_shared(); + DataTypePtr radius = std::make_shared(); + + return std::make_shared(DataTypes{center, radius}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_center1 = arguments[0].column.get(); + const auto * col_radius1 = arguments[1].column.get(); + const auto * col_center2 = arguments[2].column.get(); + const auto * col_radius2 = arguments[3].column.get(); + + auto col_res_center = ColumnUInt64::create(); + auto col_res_radius = ColumnFloat64::create(); + + auto & vec_res_center = col_res_center->getData(); + vec_res_center.reserve(input_rows_count); + + auto & vec_res_radius = col_res_radius->getData(); + vec_res_radius.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const UInt64 first_center = col_center1->getUInt(row); + const Float64 first_radius = col_radius1->getFloat64(row); + const UInt64 second_center = col_center2->getUInt(row); + const Float64 second_radius = col_radius2->getFloat64(row); + + if (isNaN(first_radius) || isNaN(second_radius)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); + + if (std::isinf(first_radius) || std::isinf(second_radius)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be infinite"); + + auto first_center_cell = S2CellId(first_center); + auto second_center_cell = S2CellId(second_center); + + if (!first_center_cell.is_valid() || !second_center_cell.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Center of the cap is not valid"); + + S2Cap cap1(first_center_cell.ToPoint(), S1Angle::Degrees(first_radius)); + S2Cap cap2(second_center_cell.ToPoint(), S1Angle::Degrees(second_radius)); + + S2Cap cap_union = cap1.Union(cap2); + + vec_res_center.emplace_back(S2CellId(cap_union.center()).id()); + vec_res_radius.emplace_back(cap_union.GetRadius().degrees()); + } + + return ColumnTuple::create(Columns{std::move(col_res_center), std::move(col_res_radius)}); + } + +}; + +} + +void registerFunctionS2CapUnion(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2CellsIntersect.cpp b/src/Functions/s2CellsIntersect.cpp new file mode 100644 index 00000000000..3d25fdbe44d --- /dev/null +++ b/src/Functions/s2CellsIntersect.cpp @@ -0,0 +1,104 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * Each cell in s2 library is a quadrilateral bounded by four geodesics. + */ +class FunctionS2CellsIntersect : public IFunction +{ +public: + static constexpr auto name = "s2CellsIntersect"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_id_first = arguments[0].column.get(); + const auto * col_id_second = arguments[1].column.get(); + + auto dst = ColumnUInt8::create(); + auto & dst_data = dst->getData(); + dst_data.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const UInt64 id_first = col_id_first->getInt(row); + const UInt64 id_second = col_id_second->getInt(row); + + auto first_cell = S2CellId(id_first); + auto second_cell = S2CellId(id_second); + + if (!first_cell.is_valid() || !second_cell.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell is not valid"); + + dst_data.emplace_back(S2CellId(id_first).intersects(S2CellId(id_second))); + } + + return dst; + } + +}; + +} + +void registerFunctionS2CellsIntersect(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp new file mode 100644 index 00000000000..8da0777a4ef --- /dev/null +++ b/src/Functions/s2GetNeighbors.cpp @@ -0,0 +1,111 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * Each cell in s2 library is a quadrilateral bounded by four geodesics. + * So, each cell has 4 neighbors + */ +class FunctionS2GetNeighbors : public IFunction +{ +public: + static constexpr auto name = "s2GetNeighbors"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_id = arguments[0].column.get(); + + auto dst = ColumnArray::create(ColumnUInt64::create()); + auto & dst_data = dst->getData(); + auto & dst_offsets = dst->getOffsets(); + dst_offsets.resize(input_rows_count); + size_t current_offset = 0; + + for (const auto row : collections::range(0, input_rows_count)) + { + const UInt64 id = col_id->getUInt(row); + + S2CellId cell_id(id); + + if (!cell_id.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cell is not valid"); + + S2CellId neighbors[4]; + cell_id.GetEdgeNeighbors(neighbors); + + dst_data.reserve(dst_data.size() + 4); + for (auto & neighbor : neighbors) + { + ++current_offset; + dst_data.insert(neighbor.id()); + } + dst_offsets[row] = current_offset; + } + + return dst; + } + +}; + +} + +void registerFunctionS2GetNeighbors(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp new file mode 100644 index 00000000000..ceceb11da05 --- /dev/null +++ b/src/Functions/s2RectAdd.cpp @@ -0,0 +1,115 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class FunctionS2RectAdd : public IFunction +{ +public: + static constexpr auto name = "s2RectAdd"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t index = 0; index < getNumberOfArguments(); ++index) + { + const auto * arg = arguments[index].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), index, getName()); + } + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo = arguments[0].column.get(); + const auto * col_hi = arguments[1].column.get(); + const auto * col_point = arguments[2].column.get(); + + auto col_res_first = ColumnUInt64::create(); + auto col_res_second = ColumnUInt64::create(); + + auto & vec_res_first = col_res_first->getData(); + vec_res_first.reserve(input_rows_count); + + auto & vec_res_second = col_res_second->getData(); + vec_res_second.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo = S2CellId(col_lo->getUInt(row)); + const auto hi = S2CellId(col_hi->getUInt(row)); + const auto point = S2CellId(col_point->getUInt(row)); + + if (!lo.is_valid() || !hi.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid"); + + if (!point.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng()); + + rect.AddPoint(point.ToPoint()); + + vec_res_first.emplace_back(S2CellId(rect.lo()).id()); + vec_res_second.emplace_back(S2CellId(rect.hi()).id()); + } + + return ColumnTuple::create(Columns{std::move(col_res_first), std::move(col_res_second)}); + } + +}; + +} + +void registerFunctionS2RectAdd(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp new file mode 100644 index 00000000000..2b4ae31a6b2 --- /dev/null +++ b/src/Functions/s2RectContains.cpp @@ -0,0 +1,105 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +class FunctionS2RectContains : public IFunction +{ +public: + static constexpr auto name = "s2RectContains"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i, getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo = arguments[0].column.get(); + const auto * col_hi = arguments[1].column.get(); + const auto * col_point = arguments[2].column.get(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo = S2CellId(col_lo->getUInt(row)); + const auto hi = S2CellId(col_hi->getUInt(row)); + const auto point = S2CellId(col_point->getUInt(row)); + + if (!lo.is_valid() || !hi.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid"); + + if (!point.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng()); + + dst_data.emplace_back(rect.Contains(point.ToLatLng())); + } + + return dst; + } + +}; + +} + +void registerFunctionS2RectContains(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp new file mode 100644 index 00000000000..f106167247b --- /dev/null +++ b/src/Functions/s2RectIntersection.cpp @@ -0,0 +1,121 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +class S2CellId; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + + +class FunctionS2RectIntersection : public IFunction +{ +public: + static constexpr auto name = "s2RectIntersection"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i, getName()); + } + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo1 = arguments[0].column.get(); + const auto * col_hi1 = arguments[1].column.get(); + const auto * col_lo2 = arguments[2].column.get(); + const auto * col_hi2 = arguments[3].column.get(); + + auto col_res_first = ColumnUInt64::create(); + auto col_res_second = ColumnUInt64::create(); + + auto & vec_res_first = col_res_first->getData(); + vec_res_first.reserve(input_rows_count); + + auto & vec_res_second = col_res_second->getData(); + vec_res_second.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo1 = S2CellId(col_lo1->getUInt(row)); + const auto hi1 = S2CellId(col_hi1->getUInt(row)); + const auto lo2 = S2CellId(col_lo2->getUInt(row)); + const auto hi2 = S2CellId(col_hi2->getUInt(row)); + + if (!lo1.is_valid() || !hi1.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid"); + + if (!lo2.is_valid() || !hi2.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid"); + + S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng()); + S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng()); + + S2LatLngRect rect_intersection = rect1.Intersection(rect2); + + vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id()); + vec_res_second.emplace_back(S2CellId(rect_intersection.hi()).id()); + } + + return ColumnTuple::create(Columns{std::move(col_res_first), std::move(col_res_second)}); + } + +}; + +} + +void registerFunctionS2RectIntersection(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp new file mode 100644 index 00000000000..387d8b25f29 --- /dev/null +++ b/src/Functions/s2RectUnion.cpp @@ -0,0 +1,119 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + + +class FunctionS2RectUnion : public IFunction +{ +public: + static constexpr auto name = "s2RectUnion"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 4; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (size_t i = 0; i < getNumberOfArguments(); ++i) + { + const auto * arg = arguments[i].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), i + 1, getName()); + } + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_lo1 = arguments[0].column.get(); + const auto * col_hi1 = arguments[1].column.get(); + const auto * col_lo2 = arguments[2].column.get(); + const auto * col_hi2 = arguments[3].column.get(); + + auto col_res_first = ColumnUInt64::create(); + auto col_res_second = ColumnUInt64::create(); + + auto & vec_res_first = col_res_first->getData(); + vec_res_first.reserve(input_rows_count); + + auto & vec_res_second = col_res_second->getData(); + vec_res_second.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto lo1 = S2CellId(col_lo1->getUInt(row)); + const auto hi1 = S2CellId(col_hi1->getUInt(row)); + const auto lo2 = S2CellId(col_lo2->getUInt(row)); + const auto hi2 = S2CellId(col_hi2->getUInt(row)); + + if (!lo1.is_valid() || !hi1.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid"); + + if (!lo2.is_valid() || !hi2.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid"); + + S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng()); + S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng()); + + S2LatLngRect rect_union = rect1.Union(rect2); + + vec_res_first.emplace_back(S2CellId(rect_union.lo()).id()); + vec_res_second.emplace_back(S2CellId(rect_union.hi()).id()); + } + + return ColumnTuple::create(Columns{std::move(col_res_first), std::move(col_res_second)}); + } + +}; + +} + +void registerFunctionS2RectUnion(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2ToGeo.cpp b/src/Functions/s2ToGeo.cpp new file mode 100644 index 00000000000..98f71e898bd --- /dev/null +++ b/src/Functions/s2ToGeo.cpp @@ -0,0 +1,110 @@ +#if !defined(ARCADIA_BUILD) +# include "config_functions.h" +#endif + +#if USE_S2_GEOMETRY + +#include +#include +#include +#include +#include +#include +#include + +#include "s2_fwd.h" + +class S2CellId; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +/** + * Returns a point (longitude, latitude) in degrees + */ +class FunctionS2ToGeo : public IFunction +{ +public: + static constexpr auto name = "s2ToGeo"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + std::string getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); + + DataTypePtr element = std::make_shared(); + + return std::make_shared(DataTypes{element, element}); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * col_id = arguments[0].column.get(); + + auto col_longitude = ColumnFloat64::create(); + auto col_latitude = ColumnFloat64::create(); + + auto & longitude = col_longitude->getData(); + longitude.reserve(input_rows_count); + + auto & latitude = col_latitude->getData(); + latitude.reserve(input_rows_count); + + for (const auto row : collections::range(0, input_rows_count)) + { + const auto id = S2CellId(col_id->getUInt(row)); + + if (!id.is_valid()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); + + S2Point point = id.ToPoint(); + S2LatLng ll(point); + + longitude.emplace_back(ll.lng().degrees()); + latitude.emplace_back(ll.lat().degrees()); + } + + return ColumnTuple::create(Columns{std::move(col_longitude), std::move(col_latitude)}); + } + +}; + +} + +void registerFunctionS2ToGeo(FunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/Functions/s2_fwd.h b/src/Functions/s2_fwd.h new file mode 100644 index 00000000000..e3f7026e48c --- /dev/null +++ b/src/Functions/s2_fwd.h @@ -0,0 +1,16 @@ +#pragma once +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wambiguous-reversed-operator" +#endif + +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE +#include // Y_IGNORE + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 8f78fd19a1f..304d51760de 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -5,11 +5,17 @@ #include #include #include +#include #include #include #include #include +namespace ProfileEvents +{ +extern const Event SleepFunctionCalls; +extern const Event SleepFunctionMicroseconds; +} namespace DB { @@ -91,8 +97,11 @@ public: if (seconds > 3.0) /// The choice is arbitrary throw Exception("The maximum sleep time is 3 seconds. Requested: " + toString(seconds), ErrorCodes::TOO_SLOW); - UInt64 microseconds = seconds * (variant == FunctionSleepVariant::PerBlock ? 1 : size) * 1e6; + UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); + UInt64 microseconds = seconds * count * 1e6; sleepForMicroseconds(microseconds); + ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count); + ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds); } /// convertToFullColumn needed, because otherwise (constant expression case) function will not get called on each columns. diff --git a/src/Functions/stringCutToZero.cpp b/src/Functions/stringCutToZero.cpp new file mode 100644 index 00000000000..ed8cee0d70c --- /dev/null +++ b/src/Functions/stringCutToZero.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; +} + +class FunctionToStringCutToZero : public IFunction +{ +public: + static constexpr auto name = "toStringCutToZero"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isStringOrFixedString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res) + { + const ColumnString * col_str_in = checkAndGetColumn(col); + + if (col_str_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_str_in->getChars(); + const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); + + size_t size = in_offsets.size(); + out_offsets.resize(size); + out_vec.resize(in_vec.size()); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + ColumnString::Offset current_in_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + const char * pos_in = reinterpret_cast(&in_vec[current_in_offset]); + size_t current_size = strlen(pos_in); + memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size); + pos += current_size; + *pos = '\0'; + ++pos; + out_offsets[i] = pos - begin; + current_in_offset = in_offsets[i]; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) + { + const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); + + if (col_fstr_in) + { + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const ColumnString::Chars & in_vec = col_fstr_in->getChars(); + + size_t size = col_fstr_in->size(); + + out_offsets.resize(size); + out_vec.resize(in_vec.size() + size); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + const char * pos_in = reinterpret_cast(in_vec.data()); + + size_t n = col_fstr_in->getN(); + + for (size_t i = 0; i < size; ++i) + { + size_t current_size = strnlen(pos_in, n); + memcpySmallAllowReadWriteOverflow15(pos, pos_in, current_size); + pos += current_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + pos_in += n; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const IColumn * column = arguments[0].column.get(); + ColumnPtr res_column; + + if (tryExecuteFixedString(column, res_column) || tryExecuteString(column, res_column)) + return res_column; + + throw Exception("Illegal column " + arguments[0].column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionToStringCutToZero(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/toTimezone.cpp b/src/Functions/toTimezone.cpp index 551e07a8354..4bb5ab47659 100644 --- a/src/Functions/toTimezone.cpp +++ b/src/Functions/toTimezone.cpp @@ -19,20 +19,70 @@ namespace ErrorCodes namespace { +class ExecutableFunctionToTimeZone : public IExecutableFunction +{ +public: + explicit ExecutableFunctionToTimeZone() = default; + + String getName() const override { return "toTimezone"; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override + { + return arguments[0].column; + } +}; + +class FunctionBaseToTimeZone : public IFunctionBase +{ +public: + FunctionBaseToTimeZone( + bool is_constant_timezone_, + DataTypes argument_types_, + DataTypePtr return_type_) + : is_constant_timezone(is_constant_timezone_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) {} + + String getName() const override { return "toTimezone"; } + + const DataTypes & getArgumentTypes() const override + { + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*arguments*/) const override + { + return std::make_unique(); + } + + bool hasInformationAboutMonotonicity() const override { return is_constant_timezone; } + + Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const override + { + return {is_constant_timezone, is_constant_timezone, is_constant_timezone}; + } + +private: + bool is_constant_timezone; + DataTypes argument_types; + DataTypePtr return_type; +}; /// Just changes time zone information for data type. The calculation is free. -class FunctionToTimezone : public IFunction +class ToTimeZoneOverloadResolver : public IFunctionOverloadResolver { public: static constexpr auto name = "toTimezone"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } + static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique(); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { @@ -54,9 +104,17 @@ public: return std::make_shared(date_time64->getScale(), time_zone_name); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override { - return arguments[0].column; + bool is_constant_timezone = false; + if (arguments[1].column) + is_constant_timezone = isColumnConst(*arguments[1].column); + + DataTypes data_types(arguments.size()); + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + return std::make_unique(is_constant_timezone, data_types, result_type); } }; @@ -64,7 +122,7 @@ public: void registerFunctionToTimeZone(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); factory.registerAlias("toTimeZone", "toTimezone"); } diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 2db4a7645a1..aa8ca3d9b8f 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -39,6 +39,7 @@ PEERDIR( SRCS( CRC.cpp + FunctionChar.cpp FunctionFQDN.cpp FunctionFactory.cpp FunctionFile.cpp @@ -46,7 +47,10 @@ SRCS( FunctionJoinGet.cpp FunctionSQLJSON.cpp FunctionsAES.cpp - FunctionsCoding.cpp + FunctionsBinaryRepr.cpp + FunctionsBitToArray.cpp + FunctionsCodingIP.cpp + FunctionsCodingUUID.cpp FunctionsConversion.cpp FunctionsEmbeddedDictionaries.cpp FunctionsExternalDictionaries.cpp @@ -209,7 +213,6 @@ SRCS( bitTestAny.cpp bitWrapperFunc.cpp bitXor.cpp - bitmaskToList.cpp blockNumber.cpp blockSerializedSize.cpp blockSize.cpp @@ -277,6 +280,7 @@ SRCS( gcd.cpp generateUUIDv4.cpp geoToH3.cpp + geoToS2.cpp geohashDecode.cpp geohashEncode.cpp geohashesInBox.cpp @@ -300,6 +304,7 @@ SRCS( h3ToParent.cpp h3ToString.cpp h3kRing.cpp + h3toGeo.cpp hasColumnInTable.cpp hasThreadFuzzer.cpp hasToken.cpp @@ -455,6 +460,15 @@ SRCS( runningConcurrency.cpp runningDifference.cpp runningDifferenceStartingWithFirstValue.cpp + s2CapContains.cpp + s2CapUnion.cpp + s2CellsIntersect.cpp + s2GetNeighbors.cpp + s2RectAdd.cpp + s2RectContains.cpp + s2RectIntersection.cpp + s2RectUnion.cpp + s2ToGeo.cpp sigmoid.cpp sign.cpp sin.cpp @@ -463,6 +477,7 @@ SRCS( sleepEachRow.cpp sqrt.cpp startsWith.cpp + stringCutToZero.cpp stringToH3.cpp substring.cpp subtractDays.cpp diff --git a/src/IO/FileEncryptionCommon.cpp b/src/IO/FileEncryptionCommon.cpp new file mode 100644 index 00000000000..a2cb3fde65f --- /dev/null +++ b/src/IO/FileEncryptionCommon.cpp @@ -0,0 +1,363 @@ +#include + +#if USE_SSL +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int DATA_ENCRYPTION_ERROR; +} + +namespace FileEncryption +{ + +namespace +{ + const EVP_CIPHER * getCipher(Algorithm algorithm) + { + switch (algorithm) + { + case Algorithm::AES_128_CTR: return EVP_aes_128_ctr(); + case Algorithm::AES_192_CTR: return EVP_aes_192_ctr(); + case Algorithm::AES_256_CTR: return EVP_aes_256_ctr(); + } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Encryption algorithm {} is not supported, specify one of the following: aes_128_ctr, aes_192_ctr, aes_256_ctr", + std::to_string(static_cast(algorithm))); + } + + void checkKeySize(const EVP_CIPHER * evp_cipher, size_t key_size) + { + if (!key_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encryption key must not be empty"); + size_t expected_key_size = static_cast(EVP_CIPHER_key_length(evp_cipher)); + if (key_size != expected_key_size) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Got an encryption key with unexpected size {}, the size should be {}", key_size, expected_key_size); + } + + void checkInitVectorSize(const EVP_CIPHER * evp_cipher) + { + size_t expected_iv_length = static_cast(EVP_CIPHER_iv_length(evp_cipher)); + if (InitVector::kSize != expected_iv_length) + throw Exception( + ErrorCodes::DATA_ENCRYPTION_ERROR, + "Got an initialization vector with unexpected size {}, the size should be {}", + InitVector::kSize, + expected_iv_length); + } + + constexpr const size_t kBlockSize = 16; + + size_t blockOffset(size_t pos) { return pos % kBlockSize; } + size_t blocks(size_t pos) { return pos / kBlockSize; } + + size_t partBlockSize(size_t size, size_t off) + { + assert(off < kBlockSize); + /// write the part as usual block + if (off == 0) + return 0; + return off + size <= kBlockSize ? size : (kBlockSize - off) % kBlockSize; + } + + size_t encryptBlocks(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, WriteBuffer & out) + { + const uint8_t * in = reinterpret_cast(data); + size_t in_size = 0; + size_t out_size = 0; + + while (in_size < size) + { + out.nextIfAtEnd(); + size_t part_size = std::min(size - in_size, out.available()); + uint8_t * ciphertext = reinterpret_cast(out.position()); + int ciphertext_size = 0; + if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], part_size)) + throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + + in_size += part_size; + if (ciphertext_size) + { + out.position() += ciphertext_size; + out_size += ciphertext_size; + } + } + + return out_size; + } + + size_t encryptBlockWithPadding(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, size_t pad_left, WriteBuffer & out) + { + assert((size <= kBlockSize) && (size + pad_left <= kBlockSize)); + uint8_t padded_data[kBlockSize] = {}; + memcpy(&padded_data[pad_left], data, size); + size_t padded_data_size = pad_left + size; + + uint8_t ciphertext[kBlockSize]; + int ciphertext_size = 0; + if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, padded_data_size)) + throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (!ciphertext_size) + return 0; + + if (static_cast(ciphertext_size) < pad_left) + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Unexpected size of encrypted data: {} < {}", ciphertext_size, pad_left); + + uint8_t * ciphertext_begin = &ciphertext[pad_left]; + ciphertext_size -= pad_left; + out.write(reinterpret_cast(ciphertext_begin), ciphertext_size); + return ciphertext_size; + } + + size_t encryptFinal(EVP_CIPHER_CTX * evp_ctx, WriteBuffer & out) + { + uint8_t ciphertext[kBlockSize]; + int ciphertext_size = 0; + if (!EVP_EncryptFinal_ex(evp_ctx, + ciphertext, &ciphertext_size)) + throw Exception("Failed to finalize encrypting", ErrorCodes::DATA_ENCRYPTION_ERROR); + if (ciphertext_size) + out.write(reinterpret_cast(ciphertext), ciphertext_size); + return ciphertext_size; + } + + size_t decryptBlocks(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, char * out) + { + const uint8_t * in = reinterpret_cast(data); + uint8_t * plaintext = reinterpret_cast(out); + int plaintext_size = 0; + if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, size)) + throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + return plaintext_size; + } + + size_t decryptBlockWithPadding(EVP_CIPHER_CTX * evp_ctx, const char * data, size_t size, size_t pad_left, char * out) + { + assert((size <= kBlockSize) && (size + pad_left <= kBlockSize)); + uint8_t padded_data[kBlockSize] = {}; + memcpy(&padded_data[pad_left], data, size); + size_t padded_data_size = pad_left + size; + + uint8_t plaintext[kBlockSize]; + int plaintext_size = 0; + if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, padded_data_size)) + throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (!plaintext_size) + return 0; + + if (static_cast(plaintext_size) < pad_left) + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Unexpected size of decrypted data: {} < {}", plaintext_size, pad_left); + + const uint8_t * plaintext_begin = &plaintext[pad_left]; + plaintext_size -= pad_left; + memcpy(out, plaintext_begin, plaintext_size); + return plaintext_size; + } + + size_t decryptFinal(EVP_CIPHER_CTX * evp_ctx, char * out) + { + uint8_t plaintext[kBlockSize]; + int plaintext_size = 0; + if (!EVP_DecryptFinal_ex(evp_ctx, plaintext, &plaintext_size)) + throw Exception("Failed to finalize decrypting", ErrorCodes::DATA_ENCRYPTION_ERROR); + if (plaintext_size) + memcpy(out, plaintext, plaintext_size); + return plaintext_size; + } +} + + +String toString(Algorithm algorithm) +{ + switch (algorithm) + { + case Algorithm::AES_128_CTR: return "aes_128_ctr"; + case Algorithm::AES_192_CTR: return "aes_192_ctr"; + case Algorithm::AES_256_CTR: return "aes_256_ctr"; + } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Encryption algorithm {} is not supported, specify one of the following: aes_128_ctr, aes_192_ctr, aes_256_ctr", + std::to_string(static_cast(algorithm))); +} + +void parseFromString(Algorithm & algorithm, const String & str) +{ + if (boost::iequals(str, "aes_128_ctr")) + algorithm = Algorithm::AES_128_CTR; + else if (boost::iequals(str, "aes_192_ctr")) + algorithm = Algorithm::AES_192_CTR; + else if (boost::iequals(str, "aes_256_ctr")) + algorithm = Algorithm::AES_256_CTR; + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Encryption algorithm '{}' is not supported, specify one of the following: aes_128_ctr, aes_192_ctr, aes_256_ctr", + str); +} + +void checkKeySize(Algorithm algorithm, size_t key_size) { checkKeySize(getCipher(algorithm), key_size); } + + +String InitVector::toString() const +{ + static_assert(sizeof(counter) == InitVector::kSize); + WriteBufferFromOwnString out; + writeBinaryBigEndian(counter, out); + return std::move(out.str()); +} + +InitVector InitVector::fromString(const String & str) +{ + if (str.length() != InitVector::kSize) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected iv with size {}, got iv with size {}", InitVector::kSize, str.length()); + ReadBufferFromMemory in{str.data(), str.length()}; + UInt128 counter; + readBinaryBigEndian(counter, in); + return InitVector{counter}; +} + +void InitVector::read(ReadBuffer & in) +{ + readBinaryBigEndian(counter, in); +} + +void InitVector::write(WriteBuffer & out) const +{ + writeBinaryBigEndian(counter, out); +} + +InitVector InitVector::random() +{ + std::random_device rd; + std::mt19937 gen{rd()}; + std::uniform_int_distribution dis; + UInt128 counter; + for (size_t i = 0; i != std::size(counter.items); ++i) + counter.items[i] = dis(gen); + return InitVector{counter}; +} + + +Encryptor::Encryptor(Algorithm algorithm_, const String & key_, const InitVector & iv_) + : key(key_) + , init_vector(iv_) + , evp_cipher(getCipher(algorithm_)) +{ + checkKeySize(evp_cipher, key.size()); + checkInitVectorSize(evp_cipher); +} + +void Encryptor::encrypt(const char * data, size_t size, WriteBuffer & out) +{ + if (!size) + return; + + auto current_iv = (init_vector + blocks(offset)).toString(); + + auto evp_ctx_ptr = std::unique_ptr(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free); + auto * evp_ctx = evp_ctx_ptr.get(); + + if (!EVP_EncryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr)) + throw Exception("Failed to initialize encryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (!EVP_EncryptInit_ex(evp_ctx, nullptr, nullptr, + reinterpret_cast(key.c_str()), reinterpret_cast(current_iv.c_str()))) + throw Exception("Failed to set key and IV for encryption", ErrorCodes::DATA_ENCRYPTION_ERROR); + + size_t in_size = 0; + size_t out_size = 0; + + auto off = blockOffset(offset); + if (off) + { + size_t in_part_size = partBlockSize(size, off); + size_t out_part_size = encryptBlockWithPadding(evp_ctx, &data[in_size], in_part_size, off, out); + in_size += in_part_size; + out_size += out_part_size; + } + + if (in_size < size) + { + size_t in_part_size = size - in_size; + size_t out_part_size = encryptBlocks(evp_ctx, &data[in_size], in_part_size, out); + in_size += in_part_size; + out_size += out_part_size; + } + + out_size += encryptFinal(evp_ctx, out); + + if (out_size != in_size) + throw Exception("Only part of the data was encrypted", ErrorCodes::DATA_ENCRYPTION_ERROR); + offset += in_size; +} + +void Encryptor::decrypt(const char * data, size_t size, char * out) +{ + if (!size) + return; + + auto current_iv = (init_vector + blocks(offset)).toString(); + + auto evp_ctx_ptr = std::unique_ptr(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free); + auto * evp_ctx = evp_ctx_ptr.get(); + + if (!EVP_DecryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr)) + throw Exception("Failed to initialize decryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR); + + if (!EVP_DecryptInit_ex(evp_ctx, nullptr, nullptr, + reinterpret_cast(key.c_str()), reinterpret_cast(current_iv.c_str()))) + throw Exception("Failed to set key and IV for decryption", ErrorCodes::DATA_ENCRYPTION_ERROR); + + size_t in_size = 0; + size_t out_size = 0; + + auto off = blockOffset(offset); + if (off) + { + size_t in_part_size = partBlockSize(size, off); + size_t out_part_size = decryptBlockWithPadding(evp_ctx, &data[in_size], in_part_size, off, &out[out_size]); + in_size += in_part_size; + out_size += out_part_size; + } + + if (in_size < size) + { + size_t in_part_size = size - in_size; + size_t out_part_size = decryptBlocks(evp_ctx, &data[in_size], in_part_size, &out[out_size]); + in_size += in_part_size; + out_size += out_part_size; + } + + out_size += decryptFinal(evp_ctx, &out[out_size]); + + if (out_size != in_size) + throw Exception("Only part of the data was decrypted", ErrorCodes::DATA_ENCRYPTION_ERROR); + offset += in_size; +} + +bool isKeyLengthSupported(size_t key_length) +{ + return (key_length == 16) || (key_length == 24) || (key_length == 32); +} + +} +} + +#endif diff --git a/src/IO/FileEncryptionCommon.h b/src/IO/FileEncryptionCommon.h new file mode 100644 index 00000000000..b2390f920d9 --- /dev/null +++ b/src/IO/FileEncryptionCommon.h @@ -0,0 +1,107 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + +namespace DB +{ +class ReadBuffer; +class WriteBuffer; + +namespace FileEncryption +{ + +/// Encryption algorithm. +/// We chose to use CTR cipther algorithms because they have the following features which are important for us: +/// - No right padding, so we can append encrypted files without deciphering; +/// - One byte is always ciphered as one byte, so we get random access to encrypted files easily. +enum class Algorithm +{ + AES_128_CTR, /// Size of key is 16 bytes. + AES_192_CTR, /// Size of key is 24 bytes. + AES_256_CTR, /// Size of key is 32 bytes. +}; + +String toString(Algorithm algorithm); +void parseFromString(Algorithm & algorithm, const String & str); + +/// Throws an exception if a specified key size doesn't correspond a specified encryption algorithm. +void checkKeySize(Algorithm algorithm, size_t key_size); + + +/// Initialization vector. Its size is always 16 bytes. +class InitVector +{ +public: + static constexpr const size_t kSize = 16; + + InitVector() = default; + explicit InitVector(const UInt128 & counter_) { set(counter_); } + + void set(const UInt128 & counter_) { counter = counter_; } + UInt128 get() const { return counter; } + + void read(ReadBuffer & in); + void write(WriteBuffer & out) const; + + /// Write 16 bytes of the counter to a string in big endian order. + /// We need big endian because the used cipher algorithms treat an initialization vector as a counter in big endian. + String toString() const; + + /// Converts a string of 16 bytes length in big endian order to a counter. + static InitVector fromString(const String & str_); + + /// Adds a specified offset to the counter. + InitVector & operator++() { ++counter; return *this; } + InitVector operator++(int) { InitVector res = *this; ++counter; return res; } + InitVector & operator+=(size_t offset) { counter += offset; return *this; } + InitVector operator+(size_t offset) const { InitVector res = *this; return res += offset; } + + /// Generates a random initialization vector. + static InitVector random(); + +private: + UInt128 counter = 0; +}; + +/// Encrypts or decrypts data. +class Encryptor +{ +public: + /// The `key` should have size 16 or 24 or 32 bytes depending on which `algorithm` is specified. + Encryptor(Algorithm algorithm_, const String & key_, const InitVector & iv_); + + /// Sets the current position in the data stream from the very beginning of data. + /// It affects how the data will be encrypted or decrypted because + /// the initialization vector is increased by an index of the current block + /// and the index of the current block is calculated from this offset. + void setOffset(size_t offset_) { offset = offset_; } + + /// Encrypts some data. + /// Also the function moves `offset` by `size` (for successive encryptions). + void encrypt(const char * data, size_t size, WriteBuffer & out); + + /// Decrypts some data. + /// The used cipher algorithms generate the same number of bytes in output as they were in input, + /// so the function always writes `size` bytes of the plaintext to `out`. + /// Also the function moves `offset` by `size` (for successive decryptions). + void decrypt(const char * data, size_t size, char * out); + +private: + const String key; + const InitVector init_vector; + const EVP_CIPHER * const evp_cipher; + + /// The current position in the data stream from the very beginning of data. + size_t offset = 0; +}; + +} +} + +#endif diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 446acef9abd..e1253ab8eb8 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -120,4 +121,12 @@ struct Progress } }; + +/** Callback to track the progress of the query. + * Used in IBlockInputStream and Context. + * The function takes the number of rows in the last block, the number of bytes in the last block. + * Note that the callback can be called from different threads. + */ +using ProgressCallback = std::function; + } diff --git a/src/IO/ReadBufferFromEncryptedFile.cpp b/src/IO/ReadBufferFromEncryptedFile.cpp new file mode 100644 index 00000000000..252851d6934 --- /dev/null +++ b/src/IO/ReadBufferFromEncryptedFile.cpp @@ -0,0 +1,106 @@ +#include + +#if USE_SSL + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} + +using InitVector = FileEncryption::InitVector; + +ReadBufferFromEncryptedFile::ReadBufferFromEncryptedFile( + size_t buffer_size_, + std::unique_ptr in_, + FileEncryption::Algorithm encryption_algorithm_, + const String & key_, + const InitVector & init_vector_) + : ReadBufferFromFileBase(buffer_size_, nullptr, 0) + , in(std::move(in_)) + , encrypted_buffer(buffer_size_) + , encryptor(encryption_algorithm_, key_, init_vector_) +{ + /// We should start reading from `in` at the offset == InitVector::kSize. + need_seek = true; +} + +off_t ReadBufferFromEncryptedFile::seek(off_t off, int whence) +{ + off_t new_pos; + if (whence == SEEK_SET) + { + if (off < 0) + throw Exception("SEEK_SET underflow: off = " + std::to_string(off), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + new_pos = off; + } + else if (whence == SEEK_CUR) + { + if (off < 0 && -off > getPosition()) + throw Exception("SEEK_CUR shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + new_pos = getPosition() + off; + } + else + throw Exception("ReadBufferFromFileEncrypted::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + if ((offset - static_cast(working_buffer.size()) <= new_pos) && (new_pos <= offset) && !need_seek) + { + /// Position is still inside buffer. + pos = working_buffer.end() - offset + new_pos; + assert(pos >= working_buffer.begin()); + assert(pos <= working_buffer.end()); + } + else + { + need_seek = true; + offset = new_pos; + + /// No more reading from the current working buffer until next() is called. + pos = working_buffer.end(); + assert(!hasPendingData()); + } + + /// The encryptor always needs to know what the current offset is. + encryptor.setOffset(new_pos); + + return new_pos; +} + +off_t ReadBufferFromEncryptedFile::getPosition() +{ + return offset - available(); +} + +bool ReadBufferFromEncryptedFile::nextImpl() +{ + if (need_seek) + { + off_t raw_offset = offset + InitVector::kSize; + if (in->seek(raw_offset, SEEK_SET) != raw_offset) + return false; + need_seek = false; + } + + if (in->eof()) + return false; + + /// Read up to the size of `encrypted_buffer`. + size_t bytes_read = 0; + while (bytes_read < encrypted_buffer.size() && !in->eof()) + { + bytes_read += in->read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read); + } + + /// The used cipher algorithms generate the same number of bytes in output as it were in input, + /// so after deciphering the numbers of bytes will be still `bytes_read`. + working_buffer.resize(bytes_read); + encryptor.decrypt(encrypted_buffer.data(), bytes_read, working_buffer.begin()); + + pos = working_buffer.begin(); + return true; +} + +} + +#endif diff --git a/src/IO/ReadBufferFromEncryptedFile.h b/src/IO/ReadBufferFromEncryptedFile.h new file mode 100644 index 00000000000..12faece6b15 --- /dev/null +++ b/src/IO/ReadBufferFromEncryptedFile.h @@ -0,0 +1,45 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + + +namespace DB +{ + +/// Reads data from the underlying read buffer and decrypts it. +class ReadBufferFromEncryptedFile : public ReadBufferFromFileBase +{ +public: + ReadBufferFromEncryptedFile( + size_t buffer_size_, + std::unique_ptr in_, + FileEncryption::Algorithm encryption_algorithm_, + const String & key_, + const FileEncryption::InitVector & init_vector_); + + off_t seek(off_t off, int whence) override; + off_t getPosition() override; + + std::string getFileName() const override { return in->getFileName(); } + +private: + bool nextImpl() override; + + std::unique_ptr in; + + off_t offset = 0; + bool need_seek = false; + + Memory<> encrypted_buffer; + FileEncryption::Encryptor encryptor; +}; + +} + +#endif diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 9abdab11259..aa241322edf 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -45,20 +45,27 @@ bool ReadBufferFromS3::nextImpl() { Stopwatch watch; bool next_result = false; - auto sleep_time_with_backoff_milliseconds = std::chrono::milliseconds(100); - if (!impl) + if (impl) + { + /// `impl` has been initialized earlier and now we're at the end of the current portion of data. + impl->position() = position(); + assert(!impl->hasPendingData()); + } + else + { + /// `impl` is not initialized and we're about to read the first portion of data. impl = initialize(); + next_result = impl->hasPendingData(); + } - for (size_t attempt = 0; attempt < max_single_read_retries; ++attempt) + auto sleep_time_with_backoff_milliseconds = std::chrono::milliseconds(100); + for (size_t attempt = 0; (attempt < max_single_read_retries) && !next_result; ++attempt) { try { + /// Try to read a next portion of data. next_result = impl->next(); - /// FIXME. 1. Poco `istream` cannot read less than buffer_size or this state is being discarded during - /// istream <-> iostream conversion. `gcount` always contains 0, - /// that's why we always have error "Cannot read from istream at offset 0". - break; } catch (const Exception & e) @@ -68,24 +75,26 @@ bool ReadBufferFromS3::nextImpl() LOG_INFO(log, "Caught exception while reading S3 object. Bucket: {}, Key: {}, Offset: {}, Attempt: {}, Message: {}", bucket, key, getPosition(), attempt, e.message()); + /// Pause before next attempt. + std::this_thread::sleep_for(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + + /// Try to reinitialize `impl`. impl.reset(); impl = initialize(); + next_result = impl->hasPendingData(); } - - std::this_thread::sleep_for(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; } watch.stop(); ProfileEvents::increment(ProfileEvents::S3ReadMicroseconds, watch.elapsedMicroseconds()); + if (!next_result) return false; - working_buffer = internal_buffer = impl->buffer(); - pos = working_buffer.begin(); - - ProfileEvents::increment(ProfileEvents::S3ReadBytes, internal_buffer.size()); + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl` + ProfileEvents::increment(ProfileEvents::S3ReadBytes, working_buffer.size()); offset += working_buffer.size(); return true; diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 2a5594a6866..f6ccfbd56bb 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -327,6 +327,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) && decoded_char != '"' && decoded_char != '`' /// MySQL style identifiers && decoded_char != '/' /// JavaScript in HTML + && decoded_char != '=' /// Yandex's TSKV && !isControlASCII(decoded_char)) { s.push_back('\\'); @@ -351,9 +352,12 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf) }; ++buf.position(); + if (buf.eof()) return error("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + assert(buf.hasPendingData()); + switch (*buf.position()) { case '"': @@ -1124,10 +1128,13 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current) const size_t old_bytes = memory.size(); const size_t additional_bytes = current - in.position(); const size_t new_bytes = old_bytes + additional_bytes; + /// There are no new bytes to add to memory. /// No need to do extra stuff. if (new_bytes == 0) return; + + assert(in.position() + additional_bytes <= in.buffer().end()); memory.resize(new_bytes); memcpy(memory.data() + old_bytes, in.position(), additional_bytes); in.position() = current; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index e3a71789979..d8e31c18617 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -644,7 +644,7 @@ inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf) else if (!readDateTextImpl(local_date, buf)) return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. - date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day(), -DateLUT::instance().getDayNumOffsetEpoch()); + date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(DateLUT::instance().getDayNumOffsetEpoch())); return ReturnType(true); } @@ -921,6 +921,17 @@ readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little endian archi x = __builtin_bswap64(x); } +template +inline std::enable_if_t, void> +readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little endian architecture. +{ + for (size_t i = 0; i != std::size(x.items); ++i) + { + auto & item = x.items[std::size(x.items) - i - 1]; + readBinaryBigEndian(item, buf); + } +} + /// Generic methods to read value in text tab-separated format. template diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index f8e6d817fb1..97620f0c03c 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -17,7 +17,7 @@ public: * Shifts buffer current position to given offset. * @param off Offset. * @param whence Seek mode (@see SEEK_SET, @see SEEK_CUR). - * @return New position from the begging of underlying buffer / file. + * @return New position from the beginning of underlying buffer / file. */ virtual off_t seek(off_t off, int whence) = 0; diff --git a/src/IO/WriteBufferFromEncryptedFile.cpp b/src/IO/WriteBufferFromEncryptedFile.cpp new file mode 100644 index 00000000000..654e80e03a8 --- /dev/null +++ b/src/IO/WriteBufferFromEncryptedFile.cpp @@ -0,0 +1,90 @@ +#include + +#if USE_SSL +#include + +namespace DB +{ + +using InitVector = FileEncryption::InitVector; + +WriteBufferFromEncryptedFile::WriteBufferFromEncryptedFile( + size_t buffer_size_, + std::unique_ptr out_, + FileEncryption::Algorithm encryption_algorithm_, + const String & key_, + const InitVector & init_vector_, + size_t old_file_size) + : WriteBufferFromFileBase(buffer_size_, nullptr, 0) + , out(std::move(out_)) + , iv(init_vector_) + , flush_iv(!old_file_size) + , encryptor(encryption_algorithm_, key_, init_vector_) +{ + encryptor.setOffset(old_file_size); +} + +WriteBufferFromEncryptedFile::~WriteBufferFromEncryptedFile() +{ + /// FIXME move final flush into the caller + MemoryTracker::LockExceptionInThread lock(VariableContext::Global); + finish(); +} + +void WriteBufferFromEncryptedFile::finish() +{ + if (finished) + return; + + try + { + finishImpl(); + out->finalize(); + finished = true; + } + catch (...) + { + /// Do not try to flush next time after exception. + out->position() = out->buffer().begin(); + finished = true; + throw; + } +} + +void WriteBufferFromEncryptedFile::finishImpl() +{ + /// If buffer has pending data - write it. + next(); + + /// Note that if there is no data to write an empty file will be written, even without the initialization vector + /// (see nextImpl(): it writes the initialization vector only if there is some data ready to write). + /// That's fine because DiskEncrypted allows files without initialization vectors when they're empty. + + out->finalize(); +} + +void WriteBufferFromEncryptedFile::sync() +{ + /// If buffer has pending data - write it. + next(); + + out->sync(); +} + +void WriteBufferFromEncryptedFile::nextImpl() +{ + if (!offset()) + return; + + if (flush_iv) + { + iv.write(*out); + flush_iv = false; + } + + encryptor.encrypt(working_buffer.begin(), offset(), *out); +} + +} + +#endif diff --git a/src/IO/WriteBufferFromEncryptedFile.h b/src/IO/WriteBufferFromEncryptedFile.h new file mode 100644 index 00000000000..02ca70e8750 --- /dev/null +++ b/src/IO/WriteBufferFromEncryptedFile.h @@ -0,0 +1,51 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include + + +namespace DB +{ + +/// Encrypts data and writes the encrypted data to the underlying write buffer. +class WriteBufferFromEncryptedFile : public WriteBufferFromFileBase +{ +public: + /// `old_file_size` should be set to non-zero if we're going to append an existing file. + WriteBufferFromEncryptedFile( + size_t buffer_size_, + std::unique_ptr out_, + FileEncryption::Algorithm encryption_algorithm_, + const String & key_, + const FileEncryption::InitVector & init_vector_, + size_t old_file_size = 0); + ~WriteBufferFromEncryptedFile() override; + + void sync() override; + void finalize() override { finish(); } + + std::string getFileName() const override { return out->getFileName(); } + +private: + void nextImpl() override; + + void finish(); + void finishImpl(); + + bool finished = false; + std::unique_ptr out; + + FileEncryption::InitVector iv; + bool flush_iv = false; + + FileEncryption::Encryptor encryptor; +}; + +} + +#endif diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index d5a123fa1f6..556adbe2d6f 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1099,6 +1099,17 @@ writeBinaryBigEndian(T x, WriteBuffer & buf) /// Assuming little endian archi writePODBinary(x, buf); } +template +inline std::enable_if_t, void> +writeBinaryBigEndian(const T & x, WriteBuffer & buf) /// Assuming little endian architecture. +{ + for (size_t i = 0; i != std::size(x.items); ++i) + { + const auto & item = x.items[std::size(x.items) - i - 1]; + writeBinaryBigEndian(item, buf); + } +} + struct PcgSerializer { static void serializePcg32(const pcg32_fast & rng, WriteBuffer & buf) diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp index b441a6a7210..6c03ea420a9 100644 --- a/src/IO/ZstdInflatingReadBuffer.cpp +++ b/src/IO/ZstdInflatingReadBuffer.cpp @@ -56,6 +56,13 @@ bool ZstdInflatingReadBuffer::nextImpl() eof = true; return !working_buffer.empty(); } + else if (output.pos == 0) + { + /// It is possible, that input buffer is not at eof yet, but nothing was decompressed in current iteration. + /// But there are cases, when such behaviour is not allowed - i.e. if input buffer is not eof, then + /// it has to be guaranteed that working_buffer is not empty. So if it is empty, continue. + return nextImpl(); + } return true; } diff --git a/src/IO/tests/gtest_file_encryption.cpp b/src/IO/tests/gtest_file_encryption.cpp new file mode 100644 index 00000000000..187073c7262 --- /dev/null +++ b/src/IO/tests/gtest_file_encryption.cpp @@ -0,0 +1,215 @@ +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_SSL +#include +#include +#include + + +using namespace DB; +using namespace DB::FileEncryption; + + +struct InitVectorTestParam +{ + const String init; + const String after_inc; + const UInt64 adder; + const String after_add; +}; + +class FileEncryptionInitVectorTest : public ::testing::TestWithParam {}; + +TEST_P(FileEncryptionInitVectorTest, InitVector) +{ + const auto & param = GetParam(); + + auto iv = InitVector::fromString(param.init); + ASSERT_EQ(param.init, iv.toString()); + + ++iv; + ASSERT_EQ(param.after_inc, iv.toString()); + + iv += param.adder; + ASSERT_EQ(param.after_add, iv.toString()); +} + +INSTANTIATE_TEST_SUITE_P(All, + FileEncryptionInitVectorTest, + ::testing::ValuesIn(std::initializer_list + { + { // #0. Basic init vector test. Get zero-string, add 1, add 0. + String(16, 0), + String("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", 16), + 0, + String("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", 16), + }, + { + // #1. Init vector test. Get zero-string, add 1, add 85, add 1024. + String(16, 0), + String("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", 16), + 85, + String("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x56", 16), + }, + { + // #2. Init vector test #2. Get zero-string, add 1, add 1024. + String(16, 0), + String("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", 16), + 1024, + String("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x01", 16) + }, + { + // #3. Long init vector test. + String("\xa8\x65\x9c\x73\xf8\x5d\x83\xb4\x9c\xa6\x8c\x19\xf4\x77\x80\xe1", 16), + String("\xa8\x65\x9c\x73\xf8\x5d\x83\xb4\x9c\xa6\x8c\x19\xf4\x77\x80\xe2", 16), + 9349249176525638641ULL, + String("\xa8\x65\x9c\x73\xf8\x5d\x83\xb5\x1e\x65\xc0\xb1\x67\xe4\x0c\xd3", 16) + }, + }) +); + + +struct CipherTestParam +{ + const Algorithm algorithm; + const String key; + const InitVector iv; + const size_t offset; + const String plaintext; + const String ciphertext; +}; + +class FileEncryptionCipherTest : public ::testing::TestWithParam {}; + +TEST_P(FileEncryptionCipherTest, Encryption) +{ + const auto & param = GetParam(); + + Encryptor encryptor{param.algorithm, param.key, param.iv}; + std::string_view input = param.plaintext; + std::string_view expected = param.ciphertext; + size_t base_offset = param.offset; + + encryptor.setOffset(base_offset); + for (size_t i = 0; i < expected.size(); ++i) + { + WriteBufferFromOwnString buf; + encryptor.encrypt(&input[i], 1, buf); + ASSERT_EQ(expected.substr(i, 1), buf.str()); + } + + for (size_t i = 0; i < expected.size(); ++i) + { + WriteBufferFromOwnString buf; + encryptor.setOffset(base_offset + i); + encryptor.encrypt(&input[i], 1, buf); + ASSERT_EQ(expected.substr(i, 1), buf.str()); + } + + for (size_t i = 0; i <= expected.size(); ++i) + { + WriteBufferFromOwnString buf; + encryptor.setOffset(base_offset); + encryptor.encrypt(input.data(), i, buf); + ASSERT_EQ(expected.substr(0, i), buf.str()); + } +} + +TEST_P(FileEncryptionCipherTest, Decryption) +{ + const auto & param = GetParam(); + + Encryptor encryptor{param.algorithm, param.key, param.iv}; + std::string_view input = param.ciphertext; + std::string_view expected = param.plaintext; + size_t base_offset = param.offset; + + encryptor.setOffset(base_offset); + for (size_t i = 0; i < expected.size(); ++i) + { + char c; + encryptor.decrypt(&input[i], 1, &c); + ASSERT_EQ(expected[i], c); + } + + for (size_t i = 0; i < expected.size(); ++i) + { + char c; + encryptor.setOffset(base_offset + i); + encryptor.decrypt(&input[i], 1, &c); + ASSERT_EQ(expected[i], c); + } + + String buf(expected.size(), 0); + for (size_t i = 0; i <= expected.size(); ++i) + { + encryptor.setOffset(base_offset); + encryptor.decrypt(input.data(), i, buf.data()); + ASSERT_EQ(expected.substr(0, i), buf.substr(0, i)); + } +} + +INSTANTIATE_TEST_SUITE_P(All, + FileEncryptionCipherTest, + ::testing::ValuesIn(std::initializer_list + { + { + // #0 + Algorithm::AES_128_CTR, + "1234567812345678", + InitVector{}, + 0, + "abcd1234efgh5678ijkl", + "\xfb\x8a\x9e\x66\x82\x72\x1b\xbe\x6b\x1d\xd8\x98\xc5\x8c\x63\xee\xcd\x36\x4a\x50" + }, + { + // #1 + Algorithm::AES_128_CTR, + "1234567812345678", + InitVector{}, + 25, + "abcd1234efgh5678ijkl", + "\x6c\x67\xe4\xf5\x8f\x86\xb0\x19\xe5\xcd\x53\x59\xe0\xc6\x01\x5e\xc1\xfd\x60\x9d" + }, + { + // #2 + Algorithm::AES_128_CTR, + String{"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 16}, + InitVector{}, + 0, + "abcd1234efgh5678ijkl", + "\xa7\xc3\x58\x53\xb6\xbd\x68\xb6\x0a\x29\xe6\x0a\x94\xfe\xef\x41\x1a\x2c\x78\xf9" + }, + { + // #3 + Algorithm::AES_128_CTR, + "1234567812345678", + InitVector::fromString(String{"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", 16}), + 0, + "abcd1234efgh5678ijkl", + "\xcf\xab\x7c\xad\xa9\xdc\x67\x60\x90\x85\x7b\xb8\x72\xa9\x6f\x9c\x29\xb2\x4f\xf6" + }, + { + // #4 + Algorithm::AES_192_CTR, + "123456781234567812345678", + InitVector{}, + 0, + "abcd1234efgh5678ijkl", + "\xcc\x25\x2b\xad\xe8\xa2\xdc\x64\x3e\xf9\x60\xe0\x6e\xde\x70\xb6\x63\xa8\xfa\x02" + }, + { + // #5 + Algorithm::AES_256_CTR, + "12345678123456781234567812345678", + InitVector{}, + 0, + "abcd1234efgh5678ijkl", + "\xc7\x41\xa6\x63\x04\x60\x1b\x1a\xcb\x84\x19\xce\x3a\x36\xa3\xbd\x21\x71\x93\xfb" + }, + }) +); + +#endif diff --git a/src/IO/ya.make b/src/IO/ya.make index bca108ca426..3bd704ec6f0 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -26,6 +26,7 @@ SRCS( CascadeWriteBuffer.cpp CompressionMethod.cpp DoubleConverter.cpp + FileEncryptionCommon.cpp HTTPChunkedReadBuffer.cpp HTTPCommon.cpp HashingWriteBuffer.cpp @@ -44,6 +45,7 @@ SRCS( NullWriteBuffer.cpp PeekableReadBuffer.cpp Progress.cpp + ReadBufferFromEncryptedFile.cpp ReadBufferFromFile.cpp ReadBufferFromFileBase.cpp ReadBufferFromFileDecorator.cpp @@ -55,6 +57,7 @@ SRCS( SeekAvoidingReadBuffer.cpp TimeoutSetter.cpp UseSSL.cpp + WriteBufferFromEncryptedFile.cpp WriteBufferFromFile.cpp WriteBufferFromFileBase.cpp WriteBufferFromFileDecorator.cpp diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 1518706f0a6..63b0345b372 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int THERE_IS_NO_COLUMN; extern const int ILLEGAL_COLUMN; extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int BAD_ARGUMENTS; } const char * ActionsDAG::typeToString(ActionsDAG::ActionType type) @@ -202,6 +203,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction( node.function_base = function->build(arguments); node.result_type = node.function_base->getResultType(); node.function = node.function_base->prepare(arguments); + node.is_deterministic = node.function_base->isDeterministic(); /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. if (node.function_base->isSuitableForConstantFolding()) @@ -426,6 +428,16 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) { /// Constant folding. node->type = ActionsDAG::ActionType::COLUMN; + + for (const auto & child : node->children) + { + if (!child->is_deterministic) + { + node->is_deterministic = false; + break; + } + } + node->children.clear(); } @@ -981,6 +993,14 @@ bool ActionsDAG::trivial() const return true; } +void ActionsDAG::assertDeterministic() const +{ + for (const auto & node : nodes) + if (!node.is_deterministic) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name); +} + void ActionsDAG::addMaterializingOutputActions() { for (auto & node : index) diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 9cd0057bb1a..bfb5b177ac7 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -83,6 +83,9 @@ public: ExecutableFunctionPtr function; /// If function is a compiled statement. bool is_function_compiled = false; + /// It is deterministic (See IFunction::isDeterministic). + /// This property is kept after constant folding of non-deterministic functions like 'now', 'today'. + bool is_deterministic = true; /// For COLUMN node and propagated constants. ColumnPtr column; @@ -175,6 +178,7 @@ public: bool hasArrayJoin() const; bool hasStatefulFunctions() const; bool trivial() const; /// If actions has no functions or array join. + void assertDeterministic() const; /// Throw if not isDeterministic. #if USE_EMBEDDED_COMPILER void compileExpressions(size_t min_count_to_compile_expression); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index da514759eb5..6b2940154f8 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -546,13 +546,16 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti Int64 peak = total_memory_tracker.getPeak(); Int64 new_amount = data.resident; - LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"), - "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", - ReadableSize(amount), - ReadableSize(peak), - ReadableSize(new_amount), - ReadableSize(new_amount - amount) - ); + Int64 difference = new_amount - amount; + + /// Log only if difference is high. This is for convenience. The threshold is arbitrary. + if (difference >= 1048576 || difference <= -1048576) + LOG_TRACE(&Poco::Logger::get("AsynchronousMetrics"), + "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", + ReadableSize(amount), + ReadableSize(peak), + ReadableSize(new_amount), + ReadableSize(difference)); total_memory_tracker.set(new_amount); CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount); diff --git a/src/Interpreters/ClusterProxy/IStreamFactory.h b/src/Interpreters/ClusterProxy/IStreamFactory.h index f66eee93e0a..d85e97e5a2e 100644 --- a/src/Interpreters/ClusterProxy/IStreamFactory.h +++ b/src/Interpreters/ClusterProxy/IStreamFactory.h @@ -18,6 +18,8 @@ using Pipes = std::vector; class QueryPlan; using QueryPlanPtr = std::unique_ptr; +struct StorageID; + namespace ClusterProxy { @@ -28,15 +30,31 @@ class IStreamFactory public: virtual ~IStreamFactory() = default; + struct Shard + { + /// Query and header may be changed depending on shard. + ASTPtr query; + Block header; + + size_t shard_num = 0; + ConnectionPoolWithFailoverPtr pool; + + /// If we connect to replicas lazily. + /// (When there is a local replica with big delay). + bool lazy = false; + UInt32 local_delay = 0; + }; + + using Shards = std::vector; + virtual void createForShard( const Cluster::ShardInfo & shard_info, const ASTPtr & query_ast, - ContextPtr context, const ThrottlerPtr & throttler, - const SelectQueryInfo & query_info, - std::vector & res, - Pipes & remote_pipes, - Pipes & delayed_pipes, - Poco::Logger * log) = 0; + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards) = 0; }; } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0c9d42e1381..efad9f899d4 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -11,10 +10,6 @@ #include #include -#include -#include -#include -#include #include #include #include @@ -32,7 +27,6 @@ namespace DB namespace ErrorCodes { - extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; } @@ -42,35 +36,13 @@ namespace ClusterProxy SelectStreamFactory::SelectStreamFactory( const Block & header_, QueryProcessingStage::Enum processed_stage_, - StorageID main_table_, - const Scalars & scalars_, - bool has_virtual_shard_num_column_, - const Tables & external_tables_) + bool has_virtual_shard_num_column_) : header(header_), processed_stage{processed_stage_}, - main_table(std::move(main_table_)), - table_func_ptr{nullptr}, - scalars{scalars_}, - has_virtual_shard_num_column(has_virtual_shard_num_column_), - external_tables{external_tables_} + has_virtual_shard_num_column(has_virtual_shard_num_column_) { } -SelectStreamFactory::SelectStreamFactory( - const Block & header_, - QueryProcessingStage::Enum processed_stage_, - ASTPtr table_func_ptr_, - const Scalars & scalars_, - bool has_virtual_shard_num_column_, - const Tables & external_tables_) - : header(header_), - processed_stage{processed_stage_}, - table_func_ptr{table_func_ptr_}, - scalars{scalars_}, - has_virtual_shard_num_column(has_virtual_shard_num_column_), - external_tables{external_tables_} -{ -} namespace { @@ -152,18 +124,6 @@ void addConvertingActions(QueryPlan & plan, const Block & header) plan.addStep(std::move(converting)); } -void addConvertingActions(Pipe & pipe, const Block & header) -{ - if (blocksHaveEqualStructure(pipe.getHeader(), header)) - return; - - auto convert_actions = std::make_shared(getConvertingDAG(pipe.getHeader(), header)); - pipe.addSimpleTransform([&](const Block & cur_header, Pipe::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header, convert_actions); - }); -} - std::unique_ptr createLocalPlan( const ASTPtr & query_ast, const Block & header, @@ -182,37 +142,17 @@ std::unique_ptr createLocalPlan( return query_plan; } -String formattedAST(const ASTPtr & ast) -{ - if (!ast) - return {}; - WriteBufferFromOwnString buf; - formatAST(*ast, buf, false, true); - return buf.str(); -} - } void SelectStreamFactory::createForShard( const Cluster::ShardInfo & shard_info, const ASTPtr & query_ast, - ContextPtr context, const ThrottlerPtr & throttler, - const SelectQueryInfo &, - std::vector & plans, - Pipes & remote_pipes, - Pipes & delayed_pipes, - Poco::Logger * log) + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards) { - bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - bool add_totals = false; - bool add_extremes = false; - bool async_read = context->getSettingsRef().async_socket_for_remote; - if (processed_stage == QueryProcessingStage::Complete) - { - add_totals = query_ast->as().group_by_with_totals; - add_extremes = context->getSettingsRef().extremes; - } - auto modified_query_ast = query_ast->clone(); auto modified_header = header; if (has_virtual_shard_num_column) @@ -231,25 +171,19 @@ void SelectStreamFactory::createForShard( auto emplace_local_stream = [&]() { - plans.emplace_back(createLocalPlan(modified_query_ast, modified_header, context, processed_stage)); - addConvertingActions(*plans.back(), header); + local_plans.emplace_back(createLocalPlan(modified_query_ast, modified_header, context, processed_stage)); + addConvertingActions(*local_plans.back(), header); }; - String modified_query = formattedAST(modified_query_ast); - auto emplace_remote_stream = [&]() { - auto remote_query_executor = std::make_shared( - shard_info.pool, modified_query, modified_header, context, throttler, scalars, external_tables, processed_stage); - remote_query_executor->setLogger(log); - - remote_query_executor->setPoolMode(PoolMode::GET_MANY); - if (!table_func_ptr) - remote_query_executor->setMainTable(main_table); - - remote_pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read)); - remote_pipes.back().addInterpreterContext(context); - addConvertingActions(remote_pipes.back(), header); + remote_shards.emplace_back(Shard{ + .query = modified_query_ast, + .header = modified_header, + .shard_num = shard_info.shard_num, + .pool = shard_info.pool, + .lazy = false + }); }; const auto & settings = context->getSettingsRef(); @@ -340,65 +274,14 @@ void SelectStreamFactory::createForShard( /// Try our luck with remote replicas, but if they are stale too, then fallback to local replica. /// Do it lazily to avoid connecting in the main thread. - auto lazily_create_stream = [ - pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = modified_header, modified_query_ast, - context, throttler, - main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, - stage = processed_stage, local_delay, add_agg_info, add_totals, add_extremes, async_read]() - -> Pipe - { - auto current_settings = context->getSettingsRef(); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover( - current_settings).getSaturated( - current_settings.max_execution_time); - std::vector try_results; - try - { - if (table_func_ptr) - try_results = pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); - else - try_results = pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, main_table.getQualifiedName()); - } - catch (const Exception & ex) - { - if (ex.code() == ErrorCodes::ALL_CONNECTION_TRIES_FAILED) - LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), - "Connections to remote replicas of local shard {} failed, will use stale local replica", shard_num); - else - throw; - } - - double max_remote_delay = 0.0; - for (const auto & try_result : try_results) - { - if (!try_result.is_up_to_date) - max_remote_delay = std::max(try_result.staleness, max_remote_delay); - } - - if (try_results.empty() || local_delay < max_remote_delay) - { - auto plan = createLocalPlan(modified_query_ast, header, context, stage); - return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context), - BuildQueryPipelineSettings::fromContext(context)))); - } - else - { - std::vector connections; - connections.reserve(try_results.size()); - for (auto & try_result : try_results) - connections.emplace_back(std::move(try_result.entry)); - - auto remote_query_executor = std::make_shared( - std::move(connections), modified_query, header, context, throttler, scalars, external_tables, stage); - - return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read); - } - }; - - delayed_pipes.emplace_back(createDelayedPipe(modified_header, lazily_create_stream, add_totals, add_extremes)); - delayed_pipes.back().addInterpreterContext(context); - addConvertingActions(delayed_pipes.back(), header); + remote_shards.emplace_back(Shard{ + .query = modified_query_ast, + .header = modified_header, + .shard_num = shard_info.shard_num, + .pool = shard_info.pool, + .lazy = true, + .local_delay = local_delay + }); } else emplace_remote_stream(); diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 0705bcb2903..d041ac8ea5f 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -14,42 +14,25 @@ namespace ClusterProxy class SelectStreamFactory final : public IStreamFactory { public: - /// Database in a query. SelectStreamFactory( const Block & header_, QueryProcessingStage::Enum processed_stage_, - StorageID main_table_, - const Scalars & scalars_, - bool has_virtual_shard_num_column_, - const Tables & external_tables); - - /// TableFunction in a query. - SelectStreamFactory( - const Block & header_, - QueryProcessingStage::Enum processed_stage_, - ASTPtr table_func_ptr_, - const Scalars & scalars_, - bool has_virtual_shard_num_column_, - const Tables & external_tables_); + bool has_virtual_shard_num_column_); void createForShard( const Cluster::ShardInfo & shard_info, const ASTPtr & query_ast, - ContextPtr context, const ThrottlerPtr & throttler, - const SelectQueryInfo & query_info, - std::vector & plans, - Pipes & remote_pipes, - Pipes & delayed_pipes, - Poco::Logger * log) override; + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards) override; private: const Block header; QueryProcessingStage::Enum processed_stage; - StorageID main_table = StorageID::createEmpty(); - ASTPtr table_func_ptr; - Scalars scalars; + bool has_virtual_shard_num_column = false; - Tables external_tables; }; } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index a857bf81f95..d3a1b40a8e3 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include @@ -101,6 +101,10 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c void executeQuery( QueryPlan & query_plan, + const Block & header, + QueryProcessingStage::Enum processed_stage, + const StorageID & main_table, + const ASTPtr & table_func_ptr, IStreamFactory & stream_factory, Poco::Logger * log, const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, const ExpressionActionsPtr & sharding_key_expr, @@ -115,8 +119,7 @@ void executeQuery( throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); std::vector plans; - Pipes remote_pipes; - Pipes delayed_pipes; + IStreamFactory::Shards remote_shards; auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, log); @@ -149,6 +152,7 @@ void executeQuery( OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, + sharding_key_expr->getSampleBlock().getByPosition(0).type, sharding_key_column_name, shard_info, not_optimized_cluster->getSlotToShard(), @@ -160,29 +164,33 @@ void executeQuery( query_ast_for_shard = query_ast; stream_factory.createForShard(shard_info, - query_ast_for_shard, - new_context, throttler, query_info, plans, - remote_pipes, delayed_pipes, log); + query_ast_for_shard, main_table, table_func_ptr, + new_context, plans, remote_shards); } - if (!remote_pipes.empty()) + if (!remote_shards.empty()) { + const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; + auto external_tables = context->getExternalTables(); + auto plan = std::make_unique(); - auto read_from_remote = std::make_unique(Pipe::unitePipes(std::move(remote_pipes))); + auto read_from_remote = std::make_unique( + std::move(remote_shards), + header, + processed_stage, + main_table, + table_func_ptr, + new_context, + throttler, + scalars, + std::move(external_tables), + log); + read_from_remote->setStepDescription("Read from remote replica"); plan->addStep(std::move(read_from_remote)); plans.emplace_back(std::move(plan)); } - if (!delayed_pipes.empty()) - { - auto plan = std::make_unique(); - auto read_from_remote = std::make_unique(Pipe::unitePipes(std::move(delayed_pipes))); - read_from_remote->setStepDescription("Read from delayed local replica"); - plan->addStep(std::move(read_from_remote)); - plans.emplace_back(std::move(plan)); - } - if (plans.empty()) return; diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index c9efedfc422..0a77b7b6035 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB @@ -17,6 +18,8 @@ class QueryPlan; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +struct StorageID; + namespace ClusterProxy { @@ -38,6 +41,10 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c /// (currently SELECT, DESCRIBE). void executeQuery( QueryPlan & query_plan, + const Block & header, + QueryProcessingStage::Enum processed_stage, + const StorageID & main_table, + const ASTPtr & table_func_ptr, IStreamFactory & stream_factory, Poco::Logger * log, const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, const ExpressionActionsPtr & sharding_key_expr, diff --git a/src/Interpreters/ColumnAliasesVisitor.cpp b/src/Interpreters/ColumnAliasesVisitor.cpp index b239d36ee13..9b7e0a91c18 100644 --- a/src/Interpreters/ColumnAliasesVisitor.cpp +++ b/src/Interpreters/ColumnAliasesVisitor.cpp @@ -81,6 +81,7 @@ void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data else ast->setAlias(*column_name); + data.changed = true; // revisit ast to track recursive alias columns Visitor(data).visit(ast); } diff --git a/src/Interpreters/ColumnAliasesVisitor.h b/src/Interpreters/ColumnAliasesVisitor.h index e340ab0daa0..9be83d83d49 100644 --- a/src/Interpreters/ColumnAliasesVisitor.h +++ b/src/Interpreters/ColumnAliasesVisitor.h @@ -60,6 +60,9 @@ public: /// private_aliases are from lambda, so these are local names. NameSet private_aliases; + /// Check if query is changed by this visitor. + bool changed = false; + Data(const ColumnsDescription & columns_, const NameToNameMap & array_join_result_columns_, ContextPtr context_) : columns(columns_), context(context_) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9b204f12ab2..842818e7660 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -386,6 +386,7 @@ struct ContextSharedPart ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers std::unique_ptr system_logs; /// Used to log queries and operations on parts std::optional storage_s3_settings; /// Settings of S3 storage + std::vector warnings; /// Store warning messages about server configuration. RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml @@ -514,6 +515,13 @@ struct ContextSharedPart trace_collector.emplace(std::move(trace_log)); } + + void addWarningMessage(const String & message) + { + /// A warning goes both: into server's log; stored to be placed in `system.warnings` table. + log->warning(message); + warnings.push_back(message); + } }; @@ -635,6 +643,12 @@ String Context::getDictionariesLibPath() const return shared->dictionaries_lib_path; } +std::vector Context::getWarnings() const +{ + auto lock = getLock(); + return shared->warnings; +} + VolumePtr Context::getTemporaryVolume() const { auto lock = getLock(); @@ -706,6 +720,12 @@ void Context::setDictionariesLibPath(const String & path) shared->dictionaries_lib_path = path; } +void Context::addWarningMessage(const String & msg) +{ + auto lock = getLock(); + shared->addWarningMessage(msg); +} + void Context::setConfig(const ConfigurationPtr & config) { auto lock = getLock(); @@ -1178,26 +1198,22 @@ void Context::applySettingsChanges(const SettingsChanges & changes) void Context::checkSettingsConstraints(const SettingChange & change) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, change); + getSettingsConstraints()->check(settings, change); } void Context::checkSettingsConstraints(const SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, changes); + getSettingsConstraints()->check(settings, changes); } void Context::checkSettingsConstraints(SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, changes); + getSettingsConstraints()->check(settings, changes); } void Context::clampToSettingsConstraints(SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->clamp(settings, changes); + getSettingsConstraints()->clamp(settings, changes); } std::shared_ptr Context::getSettingsConstraints() const @@ -2335,11 +2351,6 @@ OutputFormatPtr Context::getOutputFormatParallelIfPossible(const String & name, return FormatFactory::instance().getOutputFormatParallelIfPossible(name, buf, sample, shared_from_this()); } -OutputFormatPtr Context::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const -{ - return FormatFactory::instance().getOutputFormat(name, buf, sample, shared_from_this()); -} - time_t Context::getUptimeSeconds() const { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 2b53c737915..847b287c9c6 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -319,6 +319,9 @@ public: String getUserFilesPath() const; String getDictionariesLibPath() const; + /// A list of warnings about server configuration to place in `system.warnings` table. + std::vector getWarnings() const; + VolumePtr getTemporaryVolume() const; void setPath(const String & path); @@ -326,6 +329,8 @@ public: void setUserFilesPath(const String & path); void setDictionariesLibPath(const String & path); + void addWarningMessage(const String & msg); + VolumePtr setTemporaryStorage(const String & path, const String & policy_name = ""); using ConfigurationPtr = Poco::AutoPtr; @@ -533,7 +538,6 @@ public: BlockOutputStreamPtr getOutputStream(const String & name, WriteBuffer & buf, const Block & sample) const; OutputFormatPtr getOutputFormatParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const; - OutputFormatPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const; InterserverIOHandler & getInterserverIOHandler(); @@ -789,15 +793,6 @@ public: /// Returns context of current distributed DDL query or nullptr. ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const; - struct MySQLWireContext - { - uint8_t sequence_id = 0; - uint32_t client_capabilities = 0; - size_t max_packet_size = 0; - }; - - MySQLWireContext mysql; - PartUUIDsPtr getPartUUIDs() const; PartUUIDsPtr getIgnoredPartUUIDs() const; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 1ce6c4f36d8..adddb0c33d9 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -18,6 +17,7 @@ #include #include + namespace DB { @@ -157,9 +157,14 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr if (data.only_analyze || !settings.enable_scalar_subquery_optimization || worthConvertingToLiteral(scalar) || !data.getContext()->hasQueryContext()) { + /// subquery and ast can be the same object and ast will be moved. + /// Save these fields to avoid use after move. + auto alias = subquery.alias; + auto prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; + auto lit = std::make_unique((*scalar.safeGetByPosition(0).column)[0]); - lit->alias = subquery.alias; - lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; + lit->alias = alias; + lit->prefer_alias_to_column_name = prefer_alias_to_column_name; ast = addTypeConversionToAST(std::move(lit), scalar.safeGetByPosition(0).type->getName()); /// If only analyze was requested the expression is not suitable for constant folding, disable it. @@ -167,8 +172,8 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr { ast->as()->alias.clear(); auto func = makeASTFunction("identity", std::move(ast)); - func->alias = subquery.alias; - func->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; + func->alias = alias; + func->prefer_alias_to_column_name = prefer_alias_to_column_name; ast = std::move(func); } } diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index bd06c753319..905fcf0331c 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -531,11 +531,12 @@ Names ExpressionActions::getRequiredColumns() const bool ExpressionActions::hasArrayJoin() const { - for (const auto & action : actions) - if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - return true; + return getActionsDAG().hasArrayJoin(); +} - return false; +void ExpressionActions::assertDeterministic() const +{ + getActionsDAG().assertDeterministic(); } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 7699e82a73b..4fddd1fd27e 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -103,6 +103,7 @@ public: void execute(Block & block, bool dry_run = false) const; bool hasArrayJoin() const; + void assertDeterministic() const; /// Obtain a sample block that contains the names and types of result columns. const Block & getSampleBlock() const { return sample_block; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1496ea3dc61..875a7bef862 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -364,7 +364,7 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o } -/// Performance optimisation for IN() if storage supports it. +/// Performance optimization for IN() if storage supports it. void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) { if (!node || !storage() || !storage()->supportsIndexForIn()) @@ -1478,12 +1478,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.clear(); }; - if (storage) - { - query_analyzer.makeSetsForIndex(query.where()); - query_analyzer.makeSetsForIndex(query.prewhere()); - } - { ExpressionActionsChain chain(context); Names additional_required_columns_after_prewhere; diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ac5d281f337..94a0b24d41a 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -326,15 +326,15 @@ public: /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; + /// Create Set-s that we make from IN section to use index on them. + void makeSetsForIndex(const ASTPtr & node); + private: StorageMetadataPtr metadata_snapshot; /// If non-empty, ignore all expressions not from this list. NameSet required_result_columns; SelectQueryOptions query_options; - /// Create Set-s that we make from IN section to use index on them. - void makeSetsForIndex(const ASTPtr & node); - JoinPtr makeTableJoin( const ASTTablesInSelectQueryElement & join_element, const ColumnsWithTypeAndName & left_sample_columns); diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5d92f4f8b6f..a9c7cb61a0a 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -198,8 +198,9 @@ private: { ASTPtr & ast = func.arguments->children[1]; - /// Literal can use regular IN - if (ast->as()) + /// Literal or function can use regular IN. + /// NOTE: We don't support passing table functions to IN. + if (ast->as() || ast->as()) { if (func.name == "globalIn") func.name = "in"; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6e5f7df99bd..56ad13511ac 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1368,18 +1368,6 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) throw Exception("Logical error: unknown combination of JOIN", ErrorCodes::LOGICAL_ERROR); } - -void HashJoin::joinTotals(Block & block) const -{ - Block sample_right_block = sample_block_with_columns_to_add.cloneEmpty(); - /// For StorageJoin column names isn't qualified in sample_block_with_columns_to_add - for (auto & col : sample_right_block) - col.name = getTableJoin().renamedRightColumnName(col.name); - - JoinCommon::joinTotals(totals, sample_right_block, *table_join, block); -} - - template struct AdderNonJoined { diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 84c447d875e..86c53081059 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -155,9 +155,7 @@ public: /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later. */ void setTotals(const Block & block) override { totals = block; } - bool hasTotals() const override { return totals; } - - void joinTotals(Block & block) const override; + const Block & getTotals() const override { return totals; } bool isFilled() const override { return from_storage_join || data->type == Type::DICT; } diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 0f486fbe523..c2cf007d823 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -31,11 +31,9 @@ public: /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr & not_processed) = 0; - virtual bool hasTotals() const = 0; - /// Set totals for right table + /// Set/Get totals for right table virtual void setTotals(const Block & block) = 0; - /// Add totals to block from left table - virtual void joinTotals(Block & block) const = 0; + virtual const Block & getTotals() const = 0; virtual size_t getTotalRowCount() const = 0; virtual size_t getTotalByteCount() const = 0; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index b4a91170bc4..37650f5caa7 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -78,17 +78,35 @@ BlockIO InterpreterExplainQuery::execute() } -Block InterpreterExplainQuery::getSampleBlock() +Block InterpreterExplainQuery::getSampleBlock(const ASTExplainQuery::ExplainKind kind) { - Block block; - - ColumnWithTypeAndName col; - col.name = "explain"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - return block; + if (kind == ASTExplainQuery::ExplainKind::QueryEstimates) + { + auto cols = NamesAndTypes{ + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"parts", std::make_shared()}, + {"rows", std::make_shared()}, + {"marks", std::make_shared()}, + }; + return Block({ + {cols[0].type->createColumn(), cols[0].type, cols[0].name}, + {cols[1].type->createColumn(), cols[1].type, cols[1].name}, + {cols[2].type->createColumn(), cols[2].type, cols[2].name}, + {cols[3].type->createColumn(), cols[3].type, cols[3].name}, + {cols[4].type->createColumn(), cols[4].type, cols[4].name}, + }); + } + else + { + Block res; + ColumnWithTypeAndName col; + col.name = "explain"; + col.type = std::make_shared(); + col.column = col.type->createColumn(); + res.insert(col); + return res; + } } /// Split str by line feed and write as separate row to ColumnString. @@ -223,9 +241,9 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) BlockInputStreamPtr InterpreterExplainQuery::executeImpl() { - const auto & ast = query->as(); + const auto & ast = query->as(); - Block sample_block = getSampleBlock(); + Block sample_block = getSampleBlock(ast.getKind()); MutableColumns res_columns = sample_block.cloneEmptyColumns(); WriteBufferFromOwnString buf; @@ -313,11 +331,32 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() plan.explainPipeline(buf, settings.query_pipeline_options); } } + else if (ast.getKind() == ASTExplainQuery::QueryEstimates) + { + if (!dynamic_cast(ast.getExplainedQuery().get())) + throw Exception("Only SELECT is supported for EXPLAIN ESTIMATE query", ErrorCodes::INCORRECT_QUERY); - if (single_line) - res_columns[0]->insertData(buf.str().data(), buf.str().size()); - else - fillColumn(*res_columns[0], buf.str()); + auto settings = checkAndGetSettings(ast.getSettings()); + QueryPlan plan; + + InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions()); + interpreter.buildQueryPlan(plan); + // collect the selected marks, rows, parts during build query pipeline. + plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(getContext()), + BuildQueryPipelineSettings::fromContext(getContext())); + + if (settings.optimize) + plan.optimize(QueryPlanOptimizationSettings::fromContext(getContext())); + plan.explainEstimate(res_columns); + } + if (ast.getKind() != ASTExplainQuery::QueryEstimates) + { + if (single_line) + res_columns[0]->insertData(buf.str().data(), buf.str().size()); + else + fillColumn(*res_columns[0], buf.str()); + } return std::make_shared(sample_block.cloneWithColumns(std::move(res_columns))); } diff --git a/src/Interpreters/InterpreterExplainQuery.h b/src/Interpreters/InterpreterExplainQuery.h index f16b1a8f69d..a7f54a10e3e 100644 --- a/src/Interpreters/InterpreterExplainQuery.h +++ b/src/Interpreters/InterpreterExplainQuery.h @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { @@ -15,7 +15,7 @@ public: BlockIO execute() override; - static Block getSampleBlock(); + static Block getSampleBlock(const ASTExplainQuery::ExplainKind kind); private: ASTPtr query; diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index 7487ca79bde..d5fcb82d408 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -93,24 +93,28 @@ namespace const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { const auto & elements = query.access_rights_elements; + need_check_grantees_are_allowed = true; if (elements.empty()) + { + /// No access rights to grant or revoke. + need_check_grantees_are_allowed = false; return; + } - /// To execute the command GRANT the current user needs to have the access granted - /// with GRANT OPTION. if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the access granted with GRANT OPTION. access.checkGrantOption(elements); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return; } if (access.hasGrantOption(elements)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the grant option for all the access rights specified for REVOKE. return; } @@ -137,6 +141,7 @@ namespace all_granted_access.makeUnion(user->access); } } + need_check_grantees_are_allowed = false; /// already checked AccessRights required_access; if (elements[0].is_partial_revoke) @@ -158,21 +163,28 @@ namespace } } - std::vector getRoleIDsAndCheckAdminOption( const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, const RolesOrUsersSet & roles_from_query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { - std::vector matching_ids; + need_check_grantees_are_allowed = true; + if (roles_from_query.empty()) + { + /// No roles to grant or revoke. + need_check_grantees_are_allowed = false; + return {}; + } + std::vector matching_ids; if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the roles granted with ADMIN OPTION. matching_ids = roles_from_query.getMatchingIDs(access_control); access.checkAdminOption(matching_ids); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return matching_ids; } @@ -181,7 +193,7 @@ namespace matching_ids = roles_from_query.getMatchingIDs(); if (access.hasAdminOption(matching_ids)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the admin option for all the roles specified for REVOKE. return matching_ids; } } @@ -209,6 +221,7 @@ namespace all_granted_roles.makeUnion(user->granted_roles); } } + need_check_grantees_are_allowed = false; /// already checked const auto & all_granted_roles_set = query.admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted(); if (roles_from_query.all) @@ -218,6 +231,33 @@ namespace access.checkAdminOption(matching_ids); return matching_ids; } + + void checkGrantOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + checkGrantOption(access_control, access, query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + } + + std::vector getRoleIDsAndCheckAdminOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + auto role_ids = getRoleIDsAndCheckAdminOption( + access_control, access, query, roles_from_query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + return role_ids; + } } @@ -243,7 +283,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding roles granted with admin option. std::vector roles; if (roles_set) - roles = getRoleIDsAndCheckAdminOption(access_control, *getContext()->getAccess(), query, *roles_set, grantees); + roles = getRoleIDsAndCheckAdminOptionAndGrantees(access_control, *getContext()->getAccess(), query, *roles_set, grantees); if (!query.cluster.empty()) { @@ -258,7 +298,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding access rights with grant option. if (!query.access_rights_elements.empty()) - checkGrantOption(access_control, *getContext()->getAccess(), query, grantees); + checkGrantOptionAndGrantees(access_control, *getContext()->getAccess(), query, grantees); /// Update roles and users listed in `grantees`. auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d820cbbae45..79e60a9a02c 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -283,6 +283,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( checkStackSize(); query_info.ignore_projections = options.ignore_projections; + query_info.is_projection_query = options.is_projection_query; initSettings(); const Settings & settings = context->getSettingsRef(); @@ -387,6 +388,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.syntax_analyzer_result = syntax_analyzer_result; + if (storage && !query.final() && storage->needRewriteQueryWithFinal(syntax_analyzer_result->requiredSourceColumns())) + query.setFinal(); + /// Save scalar sub queries's results in the query context if (!options.only_analyze && context->hasQueryContext()) for (const auto & it : syntax_analyzer_result->getScalars()) @@ -399,7 +403,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere && storage && query.where() && !query.prewhere()) + if (try_move_to_prewhere && storage && storage->supportsPrewhere() && query.where() && !query.prewhere()) { /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) @@ -575,9 +579,9 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) /// We must guarantee that result structure is the same as in getSampleBlock() /// - /// But if we ignore aggregation, plan header does not match result_header. + /// But if it's a projection query, plan header does not match result_header. /// TODO: add special stage for InterpreterSelectQuery? - if (!options.ignore_aggregation && !blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header)) + if (!options.is_projection_query && !blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header)) { auto convert_actions_dag = ActionsDAG::makeConvertingActions( query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), @@ -608,17 +612,17 @@ Block InterpreterSelectQuery::getSampleBlockImpl() query_info.query = query_ptr; query_info.has_window = query_analyzer->hasWindow(); - if (storage && !options.only_analyze) { - from_stage = storage->getQueryProcessingStage(context, options.to_stage, metadata_snapshot, query_info); - - /// TODO how can we make IN index work if we cache parts before selecting a projection? - /// XXX Used for IN set index analysis. Is this a proper way? - if (query_info.projection) - metadata_snapshot->selected_projection = query_info.projection->desc; + auto & query = getSelectQuery(); + query_analyzer->makeSetsForIndex(query.where()); + query_analyzer->makeSetsForIndex(query.prewhere()); + query_info.sets = query_analyzer->getPreparedSets(); } + if (storage && !options.only_analyze) + from_stage = storage->getQueryProcessingStage(context, options.to_stage, metadata_snapshot, query_info); + /// Do I need to perform the first part of the pipeline? /// Running on remote servers during distributed processing or if query is not distributed. /// @@ -1881,8 +1885,6 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (max_streams > 1 && !is_remote) max_streams *= settings.max_streams_to_max_threads_ratio; - // TODO figure out how to make set for projections - query_info.sets = query_analyzer->getPreparedSets(); auto & prewhere_info = analysis_result.prewhere_info; if (prewhere_info) @@ -2013,7 +2015,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac expression_before_aggregation->setStepDescription("Before GROUP BY"); query_plan.addStep(std::move(expression_before_aggregation)); - if (options.ignore_aggregation) + if (options.is_projection_query) return; const auto & header_before_aggregation = query_plan.getCurrentDataStream().header; diff --git a/src/Interpreters/InterpreterWatchQuery.h b/src/Interpreters/InterpreterWatchQuery.h index 45b61a18b66..51eb4a00556 100644 --- a/src/Interpreters/InterpreterWatchQuery.h +++ b/src/Interpreters/InterpreterWatchQuery.h @@ -13,7 +13,6 @@ limitations under the License. */ #include #include -#include #include #include #include diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index 75ff7bb9b2c..a89ac6d5d98 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -31,9 +31,9 @@ public: join->joinBlock(block, not_processed); } - bool hasTotals() const override + const Block & getTotals() const override { - return join->hasTotals(); + return join->getTotals(); } void setTotals(const Block & block) override @@ -41,11 +41,6 @@ public: join->setTotals(block); } - void joinTotals(Block & block) const override - { - join->joinTotals(block); - } - size_t getTotalRowCount() const override { return join->getTotalRowCount(); diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 26463c8c6ed..8f9d94b6079 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -503,11 +503,6 @@ void MergeJoin::setTotals(const Block & totals_block) used_rows_bitmap = std::make_shared(getRightBlocksCount()); } -void MergeJoin::joinTotals(Block & block) const -{ - JoinCommon::joinTotals(totals, right_columns_to_add, *table_join, block); -} - void MergeJoin::mergeRightBlocks() { if (is_in_memory) @@ -1053,7 +1048,10 @@ private: } if (rows_added >= max_block_size) + { + ++block_number; break; + } } return rows_added; diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index b6bde8fb131..8c829569a41 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -26,9 +26,10 @@ public: const TableJoin & getTableJoin() const override { return *table_join; } bool addJoinedBlock(const Block & block, bool check_limits) override; void joinBlock(Block &, ExtraBlockPtr & not_processed) override; - void joinTotals(Block &) const override; + void setTotals(const Block &) override; - bool hasTotals() const override { return totals; } + const Block & getTotals() const override { return totals; } + size_t getTotalRowCount() const override { return right_blocks.row_count; } size_t getTotalByteCount() const override { return right_blocks.bytes; } diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 65ad027118a..c9a589e6b6d 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 399def00006..ecfda4cd0c1 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -13,12 +12,12 @@ namespace using namespace DB; Field executeFunctionOnField( - const Field & field, const std::string & name, + const Field & field, + const std::string & name, const ExpressionActionsPtr & sharding_expr, + const DataTypePtr & type, const std::string & sharding_key_column_name) { - DataTypePtr type = applyVisitor(FieldToDataType{}, field); - ColumnWithTypeAndName column; column.column = type->createColumnConst(1, field); column.name = name; @@ -34,25 +33,26 @@ Field executeFunctionOnField( /// @param sharding_column_value - one of values from IN /// @param sharding_column_name - name of that column -/// @param sharding_expr - expression of sharding_key for the Distributed() table -/// @param sharding_key_column_name - name of the column for sharding_expr -/// @param shard_info - info for the current shard (to compare shard_num with calculated) -/// @param slots - weight -> shard mapping /// @return true if shard may contain such value (or it is unknown), otherwise false. bool shardContains( - const Field & sharding_column_value, + Field sharding_column_value, const std::string & sharding_column_name, - const ExpressionActionsPtr & sharding_expr, - const std::string & sharding_key_column_name, - const Cluster::ShardInfo & shard_info, - const Cluster::SlotToShard & slots) + const OptimizeShardingKeyRewriteInMatcher::Data & data) { + UInt64 field_value; + /// Convert value to numeric (if required). + if (!sharding_column_value.tryGet(field_value)) + sharding_column_value = convertFieldToType(sharding_column_value, *data.sharding_key_type); + /// NULL is not allowed in sharding key, /// so it should be safe to assume that shard cannot contain it. if (sharding_column_value.isNull()) return false; - Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, sharding_expr, sharding_key_column_name); + Field sharding_value = executeFunctionOnField( + sharding_column_value, sharding_column_name, + data.sharding_key_expr, data.sharding_key_type, + data.sharding_key_column_name); /// The value from IN can be non-numeric, /// but in this case it should be convertible to numeric type, let's try. sharding_value = convertFieldToType(sharding_value, DataTypeUInt64()); @@ -61,8 +61,8 @@ bool shardContains( return false; UInt64 value = sharding_value.get(); - const auto shard_num = slots[value % slots.size()] + 1; - return shard_info.shard_num == shard_num; + const auto shard_num = data.slots[value % data.slots.size()] + 1; + return data.shard_info.shard_num == shard_num; } } @@ -92,10 +92,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d if (!identifier) return; - const auto & sharding_expr = data.sharding_key_expr; - const auto & sharding_key_column_name = data.sharding_key_column_name; - - if (!sharding_expr->getRequiredColumnsWithTypes().contains(identifier->name())) + if (!data.sharding_key_expr->getRequiredColumnsWithTypes().contains(identifier->name())) return; /// NOTE: that we should not take care about empty tuple, @@ -107,7 +104,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d std::erase_if(tuple_elements->children, [&](auto & child) { auto * literal = child->template as(); - return literal && !shardContains(literal->value, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots); + return literal && !shardContains(literal->value, identifier->name(), data); }); } else if (auto * tuple_literal = right->as(); @@ -116,7 +113,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d auto & tuple = tuple_literal->value.get(); std::erase_if(tuple, [&](auto & child) { - return !shardContains(child, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots); + return !shardContains(child, identifier->name(), data); }); } } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h index 3087fb844ed..d546db40df7 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h @@ -25,9 +25,15 @@ struct OptimizeShardingKeyRewriteInMatcher struct Data { + /// Expression of sharding_key for the Distributed() table const ExpressionActionsPtr & sharding_key_expr; + /// Type of sharding_key column. + const DataTypePtr & sharding_key_type; + /// Name of the column for sharding_expr const std::string & sharding_key_column_name; + /// Info for the current shard (to compare shard_num with calculated) const Cluster::ShardInfo & shard_info; + /// weight -> shard mapping const Cluster::SlotToShard & slots; }; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 1a1f0267ab0..52ce7c83741 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -32,13 +32,14 @@ struct SelectQueryOptions bool remove_duplicates = false; bool ignore_quota = false; bool ignore_limits = false; - /// This is a temporary flag to avoid adding aggregating step. Used for projections. - /// TODO: we need more stages for InterpreterSelectQuery - bool ignore_aggregation = false; /// This flag is needed to analyze query ignoring table projections. /// It is needed because we build another one InterpreterSelectQuery while analyzing projections. /// It helps to avoid infinite recursion. bool ignore_projections = false; + /// This flag is also used for projection analysis. + /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform. + /// It is also used to avoid adding aggregating step when aggregate projection is chosen. + bool is_projection_query = false; bool ignore_alias = false; bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select @@ -100,9 +101,9 @@ struct SelectQueryOptions return *this; } - SelectQueryOptions & ignoreAggregation(bool value = true) + SelectQueryOptions & projectionQuery(bool value = true) { - ignore_aggregation = value; + is_projection_query = value; return *this; } diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 66ba1f9ac9c..ff502b499cd 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -7,8 +7,6 @@ #include -#include - #include #include @@ -217,6 +215,8 @@ bool Set::insertFromBlock(const Block & block) set_elements[i] = filtered_column; else set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); + if (transform_null_in && null_map_holder) + set_elements[i]->insert(Null{}); } } @@ -281,7 +281,7 @@ ColumnPtr Set::execute(const Block & block, bool negative) const key_columns.emplace_back() = materialized_columns.back().get(); } - /// We will check existence in Set only for keys, where all components are not NULL. + /// We will check existence in Set only for keys whose components do not contain any NULL value. ConstNullMapPtr null_map{}; ColumnPtr null_map_holder; if (!transform_null_in) @@ -408,7 +408,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector & key_ranges, { size_t tuple_size = indexes_mapping.size(); - ColumnsWithInfinity left_point; - ColumnsWithInfinity right_point; + FieldValues left_point; + FieldValues right_point; left_point.reserve(tuple_size); right_point.reserve(tuple_size); @@ -458,8 +458,8 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, right_point.emplace_back(ordered_set[i]->cloneEmpty()); } - bool invert_left_infinities = false; - bool invert_right_infinities = false; + bool left_included = true; + bool right_included = true; for (size_t i = 0; i < tuple_size; ++i) { @@ -471,48 +471,29 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, if (!new_range) return {true, true}; - /** A range that ends in (x, y, ..., +inf) exclusive is the same as a range - * that ends in (x, y, ..., -inf) inclusive and vice versa for the left bound. - */ - if (new_range->left_bounded) - { - if (!new_range->left_included) - invert_left_infinities = true; - - left_point[i].update(new_range->left); - } - else - { - if (invert_left_infinities) - left_point[i].update(ValueWithInfinity::PLUS_INFINITY); - else - left_point[i].update(ValueWithInfinity::MINUS_INFINITY); - } - - if (new_range->right_bounded) - { - if (!new_range->right_included) - invert_right_infinities = true; - - right_point[i].update(new_range->right); - } - else - { - if (invert_right_infinities) - right_point[i].update(ValueWithInfinity::MINUS_INFINITY); - else - right_point[i].update(ValueWithInfinity::PLUS_INFINITY); - } + left_point[i].update(new_range->left); + left_included &= new_range->left_included; + right_point[i].update(new_range->right); + right_included &= new_range->right_included; } - auto compare = [](const IColumn & lhs, const ValueWithInfinity & rhs, size_t row) + /// lhs < rhs return -1 + /// lhs == rhs return 0 + /// lhs > rhs return 1 + auto compare = [](const IColumn & lhs, const FieldValue & rhs, size_t row) { - auto type = rhs.getType(); - /// Return inverted infinity sign, because in 'lhs' all values are finite. - if (type != ValueWithInfinity::NORMAL) - return -static_cast(type); - - return lhs.compareAt(row, 0, rhs.getColumnIfFinite(), 1); + if (rhs.isNegativeInfinity()) + return 1; + if (rhs.isPositiveInfinity()) + { + Field f; + lhs.get(row, f); + if (f.isNull()) + return 0; // +Inf == +Inf + else + return -1; + } + return lhs.compareAt(row, 0, *rhs.column, 1); }; auto less = [this, &compare, tuple_size](size_t row, const auto & point) @@ -535,31 +516,32 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, }; /** Because each hyperrectangle maps to a contiguous sequence of elements - * laid out in the lexicographically increasing order, the set intersects the range - * if and only if either bound coincides with an element or at least one element - * is between the lower bounds - */ + * laid out in the lexicographically increasing order, the set intersects the range + * if and only if either bound coincides with an element or at least one element + * is between the lower bounds + */ auto indices = collections::range(0, size()); auto left_lower = std::lower_bound(indices.begin(), indices.end(), left_point, less); auto right_lower = std::lower_bound(indices.begin(), indices.end(), right_point, less); - /// A special case of 1-element KeyRange. It's useful for partition pruning + /// A special case of 1-element KeyRange. It's useful for partition pruning. bool one_element_range = true; for (size_t i = 0; i < tuple_size; ++i) { auto & left = left_point[i]; auto & right = right_point[i]; - if (left.getType() == right.getType()) + if (left.isNormal() && right.isNormal()) { - if (left.getType() == ValueWithInfinity::NORMAL) + if (0 != left.column->compareAt(0, 0, *right.column, 1)) { - if (0 != left.getColumnIfFinite().compareAt(0, 0, right.getColumnIfFinite(), 1)) - { - one_element_range = false; - break; - } + one_element_range = false; + break; } } + else if ((left.isPositiveInfinity() && right.isPositiveInfinity()) || (left.isNegativeInfinity() && right.isNegativeInfinity())) + { + /// Special value equality. + } else { one_element_range = false; @@ -571,19 +553,40 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, /// Here we know that there is one element in range. /// The main difference with the normal case is that we can definitely say that /// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0). - if (left_lower != indices.end() && equals(*left_lower, left_point)) + + /// Check if it's an empty range + if (!left_included || !right_included) + return {false, true}; + else if (left_lower != indices.end() && equals(*left_lower, left_point)) return {true, false}; else return {false, true}; } - return + /// If there are more than one element in the range, it can always be false. Thus we only need to check if it may be true or not. + /// Given left_lower >= left_point, right_lower >= right_point, find if there may be a match in between left_lower and right_lower. + if (left_lower + 1 < right_lower) { - left_lower != right_lower - || (left_lower != indices.end() && equals(*left_lower, left_point)) - || (right_lower != indices.end() && equals(*right_lower, right_point)), - true - }; + /// There is an point in between: left_lower + 1 + return {true, true}; + } + else if (left_lower + 1 == right_lower) + { + /// Need to check if left_lower is a valid match, as left_point <= left_lower < right_point <= right_lower. + /// Note: left_lower is valid. + if (left_included || !equals(*left_lower, left_point)) + return {true, true}; + + /// We are unlucky that left_point fails to cover a point. Now we need to check if right_point can cover right_lower. + /// Check if there is a match at the right boundary. + return {right_included && right_lower != indices.end() && equals(*right_lower, right_point), true}; + } + else // left_lower == right_lower + { + /// Need to check if right_point is a valid match, as left_point < right_point <= left_lower = right_lower. + /// Check if there is a match at the left boundary. + return {right_included && right_lower != indices.end() && equals(*right_lower, right_point), true}; + } } bool MergeTreeSetIndex::hasMonotonicFunctionsChain() const @@ -594,23 +597,18 @@ bool MergeTreeSetIndex::hasMonotonicFunctionsChain() const return false; } -void ValueWithInfinity::update(const Field & x) +void FieldValue::update(const Field & x) { - /// Keep at most one element in column. - if (!column->empty()) - column->popBack(1); - column->insert(x); - type = NORMAL; -} - -const IColumn & ValueWithInfinity::getColumnIfFinite() const -{ -#ifndef NDEBUG - if (type != NORMAL) - throw Exception("Trying to get column of infinite type", ErrorCodes::LOGICAL_ERROR); -#endif - - return *column; + if (x.isNegativeInfinity() || x.isPositiveInfinity()) + value = x; + else + { + /// Keep at most one element in column. + if (!column->empty()) + column->popBack(1); + column->insert(x); + value = Field(); // Set back to normal value. + } } } diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index c9bfbf0625c..9bf6630b844 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -178,29 +178,19 @@ using FunctionPtr = std::shared_ptr; * Single field is stored in column for more optimal inplace comparisons with other regular columns. * Extracting fields from columns and further their comparison is suboptimal and requires extra copying. */ -class ValueWithInfinity +struct FieldValue { -public: - enum Type - { - MINUS_INFINITY = -1, - NORMAL = 0, - PLUS_INFINITY = 1 - }; - - ValueWithInfinity(MutableColumnPtr && column_) - : column(std::move(column_)), type(NORMAL) {} - + FieldValue(MutableColumnPtr && column_) : column(std::move(column_)) {} void update(const Field & x); - void update(Type type_) { type = type_; } - const IColumn & getColumnIfFinite() const; + bool isNormal() const { return !value.isPositiveInfinity() && !value.isNegativeInfinity(); } + bool isPositiveInfinity() const { return value.isPositiveInfinity(); } + bool isNegativeInfinity() const { return value.isNegativeInfinity(); } - Type getType() const { return type; } + Field value; // Null, -Inf, +Inf -private: + // If value is Null, uses the actual value in column MutableColumnPtr column; - Type type; }; @@ -230,7 +220,7 @@ private: Columns ordered_set; std::vector indexes_mapping; - using ColumnsWithInfinity = std::vector; + using FieldValues = std::vector; }; } diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index b12616dba1e..e56c355852f 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -6,6 +6,7 @@ #include #include + namespace DB { diff --git a/src/Interpreters/SortedBlocksWriter.h b/src/Interpreters/SortedBlocksWriter.h index 3c7bd8dc625..b0488ec90c9 100644 --- a/src/Interpreters/SortedBlocksWriter.h +++ b/src/Interpreters/SortedBlocksWriter.h @@ -9,6 +9,7 @@ #include #include + namespace DB { diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 2bdad8b698f..44a33d0eecf 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -951,8 +951,13 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( /// rewrite filters for select query, must go after getArrayJoinedColumns if (settings.optimize_respect_aliases && result.metadata_snapshot) { - replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext()); - result.collectUsedColumns(query, true); + /// If query is changed, we need to redo some work to correct name resolution. + if (replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.array_join_result_to_source, getContext())) + { + result.aggregates = getAggregates(query, *select_query); + result.window_function_asts = getWindowFunctions(query, *select_query); + result.collectUsedColumns(query, true); + } } result.ast_join = select_query->join(); diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index bbd39a6e8ec..650d15b723e 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -3,6 +3,8 @@ #include #include #include +#include + namespace zkutil { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 99c08c70b7c..b2b9ba8567d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -875,13 +876,6 @@ static std::tuple executeQueryImpl( res.finish_callback = std::move(finish_callback); res.exception_callback = std::move(exception_callback); - - if (!internal && res.in) - { - WriteBufferFromOwnString msg_buf; - res.in->dumpTree(msg_buf); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query pipeline:\n{}", msg_buf.str()); - } } } catch (...) @@ -949,6 +943,7 @@ void executeQuery( bool allow_into_outfile, ContextMutablePtr context, std::function set_result_details, + const std::optional & output_format_settings, std::function before_finalize_callback) { PODArray parse_buf; @@ -1020,7 +1015,7 @@ void executeQuery( ? getIdentifierName(ast_query_with_output->format) : context->getDefaultFormat(); - auto out = context->getOutputStreamParallelIfPossible(format_name, *out_buf, streams.in->getHeader()); + auto out = FormatFactory::instance().getOutputStreamParallelIfPossible(format_name, *out_buf, streams.in->getHeader(), context, {}, output_format_settings); /// Save previous progress callback if any. TODO Do it more conveniently. auto previous_progress_callback = context->getProgressCallback(); @@ -1066,7 +1061,7 @@ void executeQuery( return std::make_shared(header); }); - auto out = context->getOutputFormatParallelIfPossible(format_name, *out_buf, pipeline.getHeader()); + auto out = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, *out_buf, pipeline.getHeader(), context, {}, output_format_settings); out->setAutoFlush(); /// Save previous progress callback if any. TODO Do it more conveniently. diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 77f142de121..179016e8523 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -16,8 +16,9 @@ void executeQuery( ReadBuffer & istr, /// Where to read query from (and data for INSERT, if present). WriteBuffer & ostr, /// Where to write query output to. bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file. - ContextMutablePtr context, /// DB, tables, data types, storage engines, functions, aggregate functions... + ContextMutablePtr context, /// DB, tables, data types, storage engines, functions, aggregate functions... std::function set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone. + const std::optional & output_format_settings = std::nullopt, /// Format settings for output format, will be calculated from the context if not set. std::function before_finalize_callback = {} /// Will be set in output format to be called before finalize. ); diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index 5548667e1a7..74f2c26a2ef 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -322,46 +322,26 @@ void createMissedColumns(Block & block) } /// Append totals from right to left block, correct types if needed -void joinTotals(const Block & totals, const Block & columns_to_add, const TableJoin & table_join, Block & block) +void joinTotals(Block left_totals, Block right_totals, const TableJoin & table_join, Block & out_block) { if (table_join.forceNullableLeft()) - convertColumnsToNullable(block); + JoinCommon::convertColumnsToNullable(left_totals); - if (Block totals_without_keys = totals) + if (table_join.forceNullableRight()) + JoinCommon::convertColumnsToNullable(right_totals); + + for (auto & col : out_block) { - for (const auto & name : table_join.keyNamesRight()) - totals_without_keys.erase(totals_without_keys.getPositionByName(name)); + if (const auto * left_col = left_totals.findByName(col.name)) + col = *left_col; + else if (const auto * right_col = right_totals.findByName(col.name)) + col = *right_col; + else + col.column = col.type->createColumnConstWithDefaultValue(1)->convertToFullColumnIfConst(); - for (auto & col : totals_without_keys) - { - if (table_join.rightBecomeNullable(col.type)) - JoinCommon::convertColumnToNullable(col); - - /// In case of arrayJoin it can be not one row - if (col.column->size() != 1) - col.column = col.column->cloneResized(1); - } - - for (size_t i = 0; i < totals_without_keys.columns(); ++i) - block.insert(totals_without_keys.safeGetByPosition(i)); - } - else - { - /// We will join empty `totals` - from one row with the default values. - - for (size_t i = 0; i < columns_to_add.columns(); ++i) - { - const auto & col = columns_to_add.getByPosition(i); - if (block.has(col.name)) - { - /// For StorageJoin we discarded table qualifiers, so some names may clash - continue; - } - block.insert({ - col.type->createColumnConstWithDefaultValue(1)->convertToFullColumnIfConst(), - col.type, - col.name}); - } + /// In case of using `arrayJoin` we can get more or less rows than one + if (col.column->size() != 1) + col.column = col.column->cloneResized(1); } } diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index 9334b9d672f..2da795d0d4c 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -35,7 +35,7 @@ ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_nam void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right, const Names & key_names_right); void createMissedColumns(Block & block); -void joinTotals(const Block & totals, const Block & columns_to_add, const TableJoin & table_join, Block & block); +void joinTotals(Block left_totals, Block right_totals, const TableJoin & table_join, Block & out_block); void addDefaultValues(IColumn & column, const DataTypePtr & type, size_t count); diff --git a/src/Interpreters/replaceAliasColumnsInQuery.cpp b/src/Interpreters/replaceAliasColumnsInQuery.cpp index 3f789ec3d4f..604ba3590ae 100644 --- a/src/Interpreters/replaceAliasColumnsInQuery.cpp +++ b/src/Interpreters/replaceAliasColumnsInQuery.cpp @@ -6,12 +6,13 @@ namespace DB { -void replaceAliasColumnsInQuery( +bool replaceAliasColumnsInQuery( ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context) { ColumnAliasesVisitor::Data aliases_column_data(columns, array_join_result_to_source, context); ColumnAliasesVisitor aliases_column_visitor(aliases_column_data); aliases_column_visitor.visit(ast); + return aliases_column_data.changed; } } diff --git a/src/Interpreters/replaceAliasColumnsInQuery.h b/src/Interpreters/replaceAliasColumnsInQuery.h index fadebe3c9e6..5d9207ad11b 100644 --- a/src/Interpreters/replaceAliasColumnsInQuery.h +++ b/src/Interpreters/replaceAliasColumnsInQuery.h @@ -10,7 +10,8 @@ namespace DB class ColumnsDescription; -void replaceAliasColumnsInQuery( +/// Replace storage alias columns in select query if possible. Return true if the query is changed. +bool replaceAliasColumnsInQuery( ASTPtr & ast, const ColumnsDescription & columns, const NameToNameMap & array_join_result_to_source, ContextPtr context); } diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index 95a3a362030..5c50a8cd82e 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -17,6 +17,7 @@ public: AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...' QueryPlan, /// 'EXPLAIN SELECT ...' QueryPipeline, /// 'EXPLAIN PIPELINE ...' + QueryEstimates, /// 'EXPLAIN ESTIMATE ...' }; explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {} @@ -76,6 +77,7 @@ private: case AnalyzedSyntax: return "EXPLAIN SYNTAX"; case QueryPlan: return "EXPLAIN"; case QueryPipeline: return "EXPLAIN PIPELINE"; + case QueryEstimates: return "EXPLAIN ESTIMATE"; } __builtin_unreachable(); diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 84a2e1070d6..7699d380623 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -438,4 +438,19 @@ ASTPtr & ASTSelectQuery::getExpression(Expression expr) return children[positions[expr]]; } +void ASTSelectQuery::setFinal() // NOLINT method can be made const +{ + auto & tables_in_select_query = tables()->as(); + + if (tables_in_select_query.children.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Tables list is empty, it's a bug"); + + auto & tables_element = tables_in_select_query.children[0]->as(); + + if (!tables_element.table_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no table expression, it's a bug"); + + tables_element.table_expression->as().final = true; +} + } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 3fc8efb5311..db4d7e76320 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -93,6 +93,8 @@ public: void addTableFunction(ASTPtr & table_function_ptr); void updateTreeHashImpl(SipHash & hash_state) const override; + void setFinal(); + protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index dc548164157..b4ba0523239 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -19,6 +19,7 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_syntax("SYNTAX"); ParserKeyword s_pipeline("PIPELINE"); ParserKeyword s_plan("PLAN"); + ParserKeyword s_estimates("ESTIMATE"); if (s_explain.ignore(pos, expected)) { @@ -32,6 +33,8 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected kind = ASTExplainQuery::ExplainKind::QueryPipeline; else if (s_plan.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::QueryPlan; //-V1048 + else if (s_estimates.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::QueryEstimates; //-V1048 } else return false; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 81922bdde80..c4ec8736a81 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -41,6 +41,7 @@ #include #include +#include namespace DB { @@ -48,8 +49,34 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; + extern const int CANNOT_COMPILE_REGEXP; } +class AvroSerializerTraits +{ +public: + explicit AvroSerializerTraits(const FormatSettings & settings_) + : string_to_string_regexp(settings_.avro.string_column_pattern) + { + if (!string_to_string_regexp.ok()) + throw DB::Exception( + "Avro: cannot compile re2: " + settings_.avro.string_column_pattern + ", error: " + string_to_string_regexp.error() + + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", + DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + } + + bool isStringAsString(const String & column_name) + { + return RE2::FullMatch(column_name, string_to_string_regexp); + } + + ~AvroSerializerTraits() = default; + +private: + const RE2 string_to_string_regexp; +}; + + class OutputStreamWriteBufferAdapter : public avro::OutputStream { public: @@ -75,7 +102,7 @@ private: }; -AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment) +AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment, const String & column_name) { ++type_name_increment; @@ -161,11 +188,20 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF }}; } case TypeIndex::String: - return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) - { - const StringRef & s = assert_cast(column).getDataAt(row_num); - encoder.encodeBytes(reinterpret_cast(s.data), s.size); - }}; + if (traits->isStringAsString(column_name)) + return {avro::StringSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeString(s.toString()); + } + }; + else + return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeBytes(reinterpret_cast(s.data), s.size); + } + }; case TypeIndex::FixedString: { auto size = data_type->getSizeOfValueInMemory(); @@ -223,7 +259,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::Array: { const auto & array_type = assert_cast(*data_type); - auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment, column_name); auto schema = avro::ArraySchema(nested_mapping.schema); return {schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) { @@ -249,7 +285,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::Nullable: { auto nested_type = removeNullable(data_type); - auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment, column_name); if (nested_type->getTypeId() == TypeIndex::Nothing) { return nested_mapping; @@ -278,7 +314,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::LowCardinality: { const auto & nested_type = removeLowCardinality(data_type); - auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment, column_name); return {nested_mapping.schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) { const auto & col = assert_cast(column); @@ -294,7 +330,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF } -AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) +AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr traits_) + : traits(std::move(traits_)) { avro::RecordSchema record_schema("row"); @@ -303,7 +340,7 @@ AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) { try { - auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment); + auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment, column.name); serialize_fns.push_back(field_mapping.serialize); //TODO: verify name starts with A-Za-z_ record_schema.addField(column.name, field_mapping.schema); @@ -314,7 +351,7 @@ AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) throw; } } - schema.setSchema(record_schema); + valid_schema.setSchema(record_schema); } void AvroSerializer::serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder) @@ -350,7 +387,7 @@ AvroRowOutputFormat::AvroRowOutputFormat( WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) : IRowOutputFormat(header_, out_, params_) , settings(settings_) - , serializer(header_.getColumnsWithTypeAndName()) + , serializer(header_.getColumnsWithTypeAndName(), std::make_unique(settings)) , file_writer( std::make_unique(out_), serializer.getSchema(), diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index 8d0581d3307..fa4cedf1cc2 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -18,11 +18,13 @@ namespace DB { class WriteBuffer; +class AvroSerializerTraits; + class AvroSerializer { public: - AvroSerializer(const ColumnsWithTypeAndName & columns); - const avro::ValidSchema & getSchema() const { return schema; } + AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr); + const avro::ValidSchema & getSchema() const { return valid_schema; } void serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder); private: @@ -34,10 +36,11 @@ private: }; /// Type names for different complex types (e.g. enums, fixed strings) must be unique. We use simple incremental number to give them different names. - static SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment); + /*static*/ SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment, const String & column_name); std::vector serialize_fns; - avro::ValidSchema schema; + avro::ValidSchema valid_schema; + std::unique_ptr traits; }; class AvroRowOutputFormat : public IRowOutputFormat diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 0f73349c271..6fdcc544a18 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -1,7 +1,11 @@ #include -#include +#include +#include #include +#include #include +#include + namespace DB { @@ -13,8 +17,18 @@ using namespace MySQLProtocol::ProtocolText; MySQLOutputFormat::MySQLOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) : IOutputFormat(header_, out_) - , format_settings(settings_) + , client_capabilities(settings_.mysql_wire.client_capabilities) { + /// MySQlWire is a special format that is usually used as output format for MySQL protocol connections. + /// In this case we have a correct `sequence_id` stored in `settings_.mysql_wire`. + /// But it's also possible to specify MySQLWire as output format for clickhouse-client or clickhouse-local. + /// There is no `sequence_id` stored in `settings_.mysql_wire` in this case, so we create a dummy one. + sequence_id = settings_.mysql_wire.sequence_id ? settings_.mysql_wire.sequence_id : &dummy_sequence_id; +} + +void MySQLOutputFormat::setContext(ContextPtr context_) +{ + context = context_; } void MySQLOutputFormat::initialize() @@ -23,6 +37,7 @@ void MySQLOutputFormat::initialize() return; initialized = true; + const auto & header = getPort(PortKind::Main).getHeader(); data_types = header.getDataTypes(); @@ -30,6 +45,8 @@ void MySQLOutputFormat::initialize() for (const auto & type : data_types) serializations.emplace_back(type->getDefaultSerialization()); + packet_endpoint = MySQLProtocol::PacketEndpoint::create(out, *sequence_id); + if (header.columns()) { packet_endpoint->sendPacket(LengthEncodedNumber(header.columns())); @@ -40,7 +57,7 @@ void MySQLOutputFormat::initialize() packet_endpoint->sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); } - if (!(getContext()->mysql.client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) + if (!(client_capabilities & Capability::CLIENT_DEPRECATE_EOF)) { packet_endpoint->sendPacket(EOFPacket(0, 0)); } @@ -50,7 +67,6 @@ void MySQLOutputFormat::initialize() void MySQLOutputFormat::consume(Chunk chunk) { - initialize(); for (size_t i = 0; i < chunk.getNumRows(); i++) @@ -78,11 +94,9 @@ void MySQLOutputFormat::finalize() const auto & header = getPort(PortKind::Main).getHeader(); if (header.columns() == 0) - packet_endpoint->sendPacket( - OKPacket(0x0, getContext()->mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); - else if (getContext()->mysql.client_capabilities & CLIENT_DEPRECATE_EOF) - packet_endpoint->sendPacket( - OKPacket(0xfe, getContext()->mysql.client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + packet_endpoint->sendPacket(OKPacket(0x0, client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); + else if (client_capabilities & CLIENT_DEPRECATE_EOF) + packet_endpoint->sendPacket(OKPacket(0xfe, client_capabilities, affected_rows, 0, 0, "", human_readable_info), true); else packet_endpoint->sendPacket(EOFPacket(0, 0), true); } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.h b/src/Processors/Formats/Impl/MySQLOutputFormat.h index 7d67df3015e..a8e1ada3d6a 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.h +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.h @@ -3,11 +3,9 @@ #include #include -#include -#include -#include -#include -#include +#include +#include + namespace DB { @@ -15,6 +13,7 @@ namespace DB class IColumn; class IDataType; class WriteBuffer; +struct FormatSettings; /** A stream for outputting data in a binary line-by-line format. */ @@ -25,24 +24,21 @@ public: String getName() const override { return "MySQLOutputFormat"; } - void setContext(ContextPtr context_) - { - context = context_; - packet_endpoint = std::make_unique(out, const_cast(getContext()->mysql.sequence_id)); /// TODO: fix it - } + void setContext(ContextPtr context_); void consume(Chunk) override; void finalize() override; void flush() override; void doWritePrefix() override { initialize(); } +private: void initialize(); -private: bool initialized = false; - - std::unique_ptr packet_endpoint; - FormatSettings format_settings; + uint32_t client_capabilities = 0; + uint8_t * sequence_id = nullptr; + uint8_t dummy_sequence_id = 0; + MySQLProtocol::PacketEndpointPtr packet_endpoint; DataTypes data_types; Serializations serializations; }; diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index dafaf9bed72..5cf83bd3bb3 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -13,6 +12,7 @@ #include #include + namespace DB { diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index 1585f2532ff..358d31a6dff 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index b06d6628dcb..736d7eb37c1 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -70,7 +70,7 @@ FilledJoinStep::FilledJoinStep(const DataStream & input_stream_, JoinPtr join_, void FilledJoinStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) { bool default_totals = false; - if (!pipeline.hasTotals() && join->hasTotals()) + if (!pipeline.hasTotals() && join->getTotals()) { pipeline.addDefaultTotals(); default_totals = true; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 44c5c48975c..bc3b8458531 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace DB @@ -434,4 +435,59 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett QueryPlanOptimizations::optimizeTree(optimization_settings, *root, nodes); } +void QueryPlan::explainEstimate(MutableColumns & columns) +{ + checkInitialized(); + + struct EstimateCounters + { + std::string database_name; + std::string table_name; + UInt64 parts = 0; + UInt64 rows = 0; + UInt64 marks = 0; + + EstimateCounters(const std::string & database, const std::string & table) : database_name(database), table_name(table) + { + } + }; + + using CountersPtr = std::shared_ptr; + std::unordered_map counters; + using processNodeFuncType = std::function; + processNodeFuncType process_node = [&counters, &process_node] (const Node * node) + { + if (!node) + return; + if (const auto * step = dynamic_cast(node->step.get())) + { + const auto & id = step->getStorageID(); + auto key = id.database_name + "." + id.table_name; + auto it = counters.find(key); + if (it == counters.end()) + { + it = counters.insert({key, std::make_shared(id.database_name, id.table_name)}).first; + } + it->second->parts += step->getSelectedParts(); + it->second->rows += step->getSelectedRows(); + it->second->marks += step->getSelectedMarks(); + } + for (const auto * child : node->children) + process_node(child); + }; + process_node(root); + + for (const auto & counter : counters) + { + size_t index = 0; + const auto & database_name = counter.second->database_name; + const auto & table_name = counter.second->table_name; + columns[index++]->insertData(database_name.c_str(), database_name.size()); + columns[index++]->insertData(table_name.c_str(), table_name.size()); + columns[index++]->insert(counter.second->parts); + columns[index++]->insert(counter.second->rows); + columns[index++]->insert(counter.second->marks); + } +} + } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 4c75f00cf4d..95034d34c9c 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -85,6 +86,7 @@ public: JSONBuilder::ItemPtr explainPlan(const ExplainPlanOptions & options); void explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & options); void explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptions & options); + void explainEstimate(MutableColumns & columns); /// Set upper limit for the recommend number of threads. Will be applied to the newly-created pipelines. /// TODO: make it in a better way. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 2dc8246cde7..2983663d0ce 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -47,6 +47,9 @@ struct ReadFromMergeTree::AnalysisResult IndexStats index_stats; Names column_names_to_read; ReadFromMergeTree::ReadType read_type = ReadFromMergeTree::ReadType::Default; + UInt64 selected_rows = 0; + UInt64 selected_marks = 0; + UInt64 selected_parts = 0; }; static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & context) @@ -829,7 +832,8 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::selectRangesToRead(MergeTre log, requested_num_streams, result.index_stats, - true); + true /* use_skip_indexes */, + true /* check_limits */); size_t sum_marks_pk = total_marks_pk; for (const auto & stat : result.index_stats) @@ -838,13 +842,17 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::selectRangesToRead(MergeTre size_t sum_marks = 0; size_t sum_ranges = 0; + size_t sum_rows = 0; for (const auto & part : result.parts_with_ranges) { sum_ranges += part.ranges.size(); sum_marks += part.getMarksCount(); + sum_rows += part.getRowsCount(); } - + result.selected_parts = result.parts_with_ranges.size(); + result.selected_marks = sum_marks; + result.selected_rows = sum_rows; LOG_DEBUG( log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges", @@ -882,6 +890,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build return; } + selected_marks = result.selected_marks; + selected_rows = result.selected_rows; + selected_parts = result.selected_parts; /// Projection, that needed to drop columns, which have appeared by execution /// of some extra expressions, and to allow execute the same expressions later. /// NOTE: It may lead to double computation of expressions. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index a5184d28593..b82e027420b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -80,6 +80,10 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeIndexes(JSONBuilder::JSONMap & map) const override; + const StorageID getStorageID() const { return data.getStorageID(); } + UInt64 getSelectedParts() const { return selected_parts; } + UInt64 getSelectedRows() const { return selected_rows; } + UInt64 getSelectedMarks() const { return selected_marks; } private: const MergeTreeReaderSettings reader_settings; @@ -106,6 +110,9 @@ private: std::shared_ptr max_block_numbers_to_read; Poco::Logger * log; + UInt64 selected_parts = 0; + UInt64 selected_rows = 0; + UInt64 selected_marks = 0; Pipe read(RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache); Pipe readFromPool(RangesInDataParts parts_with_ranges, Names required_columns, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp new file mode 100644 index 00000000000..8c0a7050397 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -0,0 +1,228 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ALL_CONNECTION_TRIES_FAILED; +} + +static ActionsDAGPtr getConvertingDAG(const Block & block, const Block & header) +{ + /// Convert header structure to expected. + /// Also we ignore constants from result and replace it with constants from header. + /// It is needed for functions like `now64()` or `randConstant()` because their values may be different. + return ActionsDAG::makeConvertingActions( + block.getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); +} + +void addConvertingActions(QueryPlan & plan, const Block & header) +{ + if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header)) + return; + + auto convert_actions_dag = getConvertingDAG(plan.getCurrentDataStream().header, header); + auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); + plan.addStep(std::move(converting)); +} + +static void addConvertingActions(Pipe & pipe, const Block & header) +{ + if (blocksHaveEqualStructure(pipe.getHeader(), header)) + return; + + auto convert_actions = std::make_shared(getConvertingDAG(pipe.getHeader(), header)); + pipe.addSimpleTransform([&](const Block & cur_header, Pipe::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header, convert_actions); + }); +} + +static String formattedAST(const ASTPtr & ast) +{ + if (!ast) + return {}; + WriteBufferFromOwnString buf; + formatAST(*ast, buf, false, true); + return buf.str(); +} + +static std::unique_ptr createLocalPlan( + const ASTPtr & query_ast, + const Block & header, + ContextPtr context, + QueryProcessingStage::Enum processed_stage) +{ + checkStackSize(); + + auto query_plan = std::make_unique(); + + InterpreterSelectQuery interpreter(query_ast, context, SelectQueryOptions(processed_stage)); + interpreter.buildQueryPlan(*query_plan); + + addConvertingActions(*query_plan, header); + + return query_plan; +} + + +ReadFromRemote::ReadFromRemote( + ClusterProxy::IStreamFactory::Shards shards_, + Block header_, + QueryProcessingStage::Enum stage_, + StorageID main_table_, + ASTPtr table_func_ptr_, + ContextPtr context_, + ThrottlerPtr throttler_, + Scalars scalars_, + Tables external_tables_, + Poco::Logger * log_) + : ISourceStep(DataStream{.header = std::move(header_)}) + , shards(std::move(shards_)) + , stage(stage_) + , main_table(std::move(main_table_)) + , table_func_ptr(std::move(table_func_ptr_)) + , context(std::move(context_)) + , throttler(std::move(throttler_)) + , scalars(std::move(scalars_)) + , external_tables(std::move(external_tables_)) + , log(log_) +{ +} + +void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard) +{ + bool add_agg_info = stage == QueryProcessingStage::WithMergeableState; + bool add_totals = false; + bool add_extremes = false; + bool async_read = context->getSettingsRef().async_socket_for_remote; + if (stage == QueryProcessingStage::Complete) + { + add_totals = shard.query->as().group_by_with_totals; + add_extremes = context->getSettingsRef().extremes; + } + + auto lazily_create_stream = [ + pool = shard.pool, shard_num = shard.shard_num, query = shard.query, header = shard.header, + context = context, throttler = throttler, + main_table = main_table, table_func_ptr = table_func_ptr, + scalars = scalars, external_tables = external_tables, + stage = stage, local_delay = shard.local_delay, + add_agg_info, add_totals, add_extremes, async_read]() + -> Pipe + { + auto current_settings = context->getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover( + current_settings).getSaturated( + current_settings.max_execution_time); + std::vector try_results; + try + { + if (table_func_ptr) + try_results = pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); + else + try_results = pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, main_table.getQualifiedName()); + } + catch (const Exception & ex) + { + if (ex.code() == ErrorCodes::ALL_CONNECTION_TRIES_FAILED) + LOG_WARNING(&Poco::Logger::get("ClusterProxy::SelectStreamFactory"), + "Connections to remote replicas of local shard {} failed, will use stale local replica", shard_num); + else + throw; + } + + double max_remote_delay = 0.0; + for (const auto & try_result : try_results) + { + if (!try_result.is_up_to_date) + max_remote_delay = std::max(try_result.staleness, max_remote_delay); + } + + if (try_results.empty() || local_delay < max_remote_delay) + { + auto plan = createLocalPlan(query, header, context, stage); + return QueryPipeline::getPipe(std::move(*plan->buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(context), + BuildQueryPipelineSettings::fromContext(context)))); + } + else + { + std::vector connections; + connections.reserve(try_results.size()); + for (auto & try_result : try_results) + connections.emplace_back(std::move(try_result.entry)); + + String query_string = formattedAST(query); + + auto remote_query_executor = std::make_shared( + std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage); + + return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read); + } + }; + + pipes.emplace_back(createDelayedPipe(shard.header, lazily_create_stream, add_totals, add_extremes)); + pipes.back().addInterpreterContext(context); + addConvertingActions(pipes.back(), output_stream->header); +} + +void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard) +{ + bool add_agg_info = stage == QueryProcessingStage::WithMergeableState; + bool add_totals = false; + bool add_extremes = false; + bool async_read = context->getSettingsRef().async_socket_for_remote; + if (stage == QueryProcessingStage::Complete) + { + add_totals = shard.query->as().group_by_with_totals; + add_extremes = context->getSettingsRef().extremes; + } + + String query_string = formattedAST(shard.query); + + auto remote_query_executor = std::make_shared( + shard.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage); + remote_query_executor->setLogger(log); + + remote_query_executor->setPoolMode(PoolMode::GET_MANY); + if (!table_func_ptr) + remote_query_executor->setMainTable(main_table); + + pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read)); + pipes.back().addInterpreterContext(context); + addConvertingActions(pipes.back(), output_stream->header); +} + +void ReadFromRemote::initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) +{ + Pipes pipes; + for (const auto & shard : shards) + { + if (shard.lazy) + addLazyPipe(pipes, shard); + else + addPipe(pipes, shard); + } + + auto pipe = Pipe::unitePipes(std::move(pipes)); + pipeline.init(std::move(pipe)); +} + +} diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h new file mode 100644 index 00000000000..61099299c36 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -0,0 +1,57 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace DB +{ + +class ConnectionPoolWithFailover; +using ConnectionPoolWithFailoverPtr = std::shared_ptr; + +class Throttler; +using ThrottlerPtr = std::shared_ptr; + +/// Reading step from remote servers. +/// Unite query results from several shards. +class ReadFromRemote final : public ISourceStep +{ +public: + ReadFromRemote( + ClusterProxy::IStreamFactory::Shards shards_, + Block header_, + QueryProcessingStage::Enum stage_, + StorageID main_table_, + ASTPtr table_func_ptr_, + ContextPtr context_, + ThrottlerPtr throttler_, + Scalars scalars_, + Tables external_tables_, + Poco::Logger * log_); + + String getName() const override { return "ReadFromRemote"; } + + void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override; + +private: + ClusterProxy::IStreamFactory::Shards shards; + QueryProcessingStage::Enum stage; + + StorageID main_table; + ASTPtr table_func_ptr; + + ContextPtr context; + + ThrottlerPtr throttler; + Scalars scalars; + Tables external_tables; + + Poco::Logger * log; + + void addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard); + void addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard); +}; + +} diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h index 2e8cf007623..9649385909c 100644 --- a/src/Processors/Sources/SourceFromInputStream.h +++ b/src/Processors/Sources/SourceFromInputStream.h @@ -1,6 +1,9 @@ #pragma once + #include #include +#include + namespace DB { diff --git a/src/Processors/Sources/SourceWithProgress.h b/src/Processors/Sources/SourceWithProgress.h index 78e56eafb52..49728be01e3 100644 --- a/src/Processors/Sources/SourceWithProgress.h +++ b/src/Processors/Sources/SourceWithProgress.h @@ -1,12 +1,16 @@ #pragma once #include -#include #include #include +#include + namespace DB { +class QueryStatus; +class EnabledQuota; + /// Adds progress to ISource. /// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread. class ISourceWithProgress : public ISource diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index 86051019235..15b725341c5 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -1,6 +1,5 @@ #include -#include #include #include @@ -10,6 +9,7 @@ #include #include + namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 31b2da46ab3..e402fd788bc 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -1,8 +1,9 @@ #include #include -#include -#include +#include #include +#include + namespace DB { @@ -159,19 +160,16 @@ void JoiningTransform::transform(Chunk & chunk) Block block; if (on_totals) { - /// We have to make chunk empty before return - /// In case of using `arrayJoin` we can get more or less rows than one - auto cols = chunk.detachColumns(); - for (auto & col : cols) - col = col->cloneResized(1); - block = inputs.front().getHeader().cloneWithColumns(std::move(cols)); + const auto & left_totals = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); + const auto & right_totals = join->getTotals(); /// Drop totals if both out stream and joined stream doesn't have ones. /// See comment in ExpressionTransform.h - if (default_totals && !join->hasTotals()) + if (default_totals && !right_totals) return; - join->joinTotals(block); + block = outputs.front().getHeader().cloneEmpty(); + JoinCommon::joinTotals(left_totals, right_totals, join->getTableJoin(), block); } else block = readExecute(chunk); @@ -183,11 +181,9 @@ void JoiningTransform::transform(Chunk & chunk) Block JoiningTransform::readExecute(Chunk & chunk) { Block res; - // std::cerr << "=== Chunk rows " << chunk.getNumRows() << " cols " << chunk.getNumColumns() << std::endl; if (!not_processed) { - // std::cerr << "!not_processed " << std::endl; if (chunk.hasColumns()) res = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -196,7 +192,6 @@ Block JoiningTransform::readExecute(Chunk & chunk) } else if (not_processed->empty()) /// There's not processed data inside expression. { - // std::cerr << "not_processed->empty() " << std::endl; if (chunk.hasColumns()) res = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -205,12 +200,10 @@ Block JoiningTransform::readExecute(Chunk & chunk) } else { - // std::cerr << "not not_processed->empty() " << std::endl; res = std::move(not_processed->block); join->joinBlock(res, not_processed); } - // std::cerr << "Res block rows " << res.rows() << " cols " << res.columns() << std::endl; return res; } diff --git a/src/Processors/Transforms/SortingTransform.h b/src/Processors/Transforms/SortingTransform.h index 9178991f324..0f7cb4347a4 100644 --- a/src/Processors/Transforms/SortingTransform.h +++ b/src/Processors/Transforms/SortingTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include #include diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index 5809f382e0e..d42543d311a 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -70,7 +70,7 @@ private: /// They are added or not added to the current_totals, depending on the totals_mode. Chunk overflow_aggregates; - /// Here, total values are accumulated. After the work is finished, they will be placed in IBlockInputStream::totals. + /// Here, total values are accumulated. After the work is finished, they will be placed in totals. MutableColumns current_totals; }; diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 611b03ebf72..d7211f9edd7 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -139,7 +139,9 @@ public: } const Columns & inputAt(const RowNumber & x) const - { return const_cast(this)->inputAt(x); } + { + return const_cast(this)->inputAt(x); + } auto & blockAt(const uint64_t block_number) { @@ -149,13 +151,19 @@ public: } const auto & blockAt(const uint64_t block_number) const - { return const_cast(this)->blockAt(block_number); } + { + return const_cast(this)->blockAt(block_number); + } auto & blockAt(const RowNumber & x) - { return blockAt(x.block); } + { + return blockAt(x.block); + } const auto & blockAt(const RowNumber & x) const - { return const_cast(this)->blockAt(x); } + { + return const_cast(this)->blockAt(x); + } size_t blockRowsNumber(const RowNumber & x) const { @@ -225,10 +233,14 @@ public: } RowNumber blocksEnd() const - { return RowNumber{first_block_number + blocks.size(), 0}; } + { + return RowNumber{first_block_number + blocks.size(), 0}; + } RowNumber blocksBegin() const - { return RowNumber{first_block_number, 0}; } + { + return RowNumber{first_block_number, 0}; + } public: /* diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 86a40685d1f..3e51d9a77af 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -126,6 +126,7 @@ SRCS( QueryPlan/QueryPlan.cpp QueryPlan/ReadFromMergeTree.cpp QueryPlan/ReadFromPreparedSource.cpp + QueryPlan/ReadFromRemote.cpp QueryPlan/ReadNothingStep.cpp QueryPlan/RollupStep.cpp QueryPlan/SettingQuotaAndLimitsStep.cpp diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 82e5ed4d0db..1f776ddb6bc 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1150,7 +1150,7 @@ namespace { io.onException(); - LOG_ERROR(log, "Code: {}, e.displayText() = {}, Stack trace:\n\n{}", exception.code(), exception.displayText(), exception.getStackTraceString()); + LOG_ERROR(log, getExceptionMessage(exception, true)); if (responder && !responder_finished) { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index ad38cfb341a..8e0bed4b4c2 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index beace5dd576..375f248d939 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -73,13 +73,13 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so : Poco::Net::TCPServerConnection(socket_) , server(server_) , log(&Poco::Logger::get("MySQLHandler")) - , connection_context(Context::createCopy(server.context())) , connection_id(connection_id_) + , connection_context(Context::createCopy(server.context())) , auth_plugin(new MySQLProtocol::Authentication::Native41()) { - server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; + server_capabilities = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; if (ssl_enabled) - server_capability_flags |= CLIENT_SSL; + server_capabilities |= CLIENT_SSL; replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); @@ -95,14 +95,15 @@ void MySQLHandler::run() connection_context->getClientInfo().interface = ClientInfo::Interface::MYSQL; connection_context->setDefaultFormat("MySQLWire"); connection_context->getClientInfo().connection_id = connection_id; + connection_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; in = std::make_shared(socket()); out = std::make_shared(socket()); - packet_endpoint = std::make_shared(*in, *out, connection_context->mysql.sequence_id); + packet_endpoint = MySQLProtocol::PacketEndpoint::create(*in, *out, sequence_id); try { - Handshake handshake(server_capability_flags, connection_id, VERSION_STRING + String("-") + VERSION_NAME, + Handshake handshake(server_capabilities, connection_id, VERSION_STRING + String("-") + VERSION_NAME, auth_plugin->getName(), auth_plugin->getAuthPluginData(), CharacterSet::utf8_general_ci); packet_endpoint->sendPacket(handshake, true); @@ -110,11 +111,8 @@ void MySQLHandler::run() HandshakeResponse handshake_response; finishHandshake(handshake_response); - connection_context->mysql.client_capabilities = handshake_response.capability_flags; - if (handshake_response.max_packet_size) - connection_context->mysql.max_packet_size = handshake_response.max_packet_size; - if (!connection_context->mysql.max_packet_size) - connection_context->mysql.max_packet_size = MAX_PACKET_LENGTH; + client_capabilities = handshake_response.capability_flags; + max_packet_size = handshake_response.max_packet_size ? handshake_response.max_packet_size : MAX_PACKET_LENGTH; LOG_TRACE(log, "Capabilities: {}, max_packet_size: {}, character_set: {}, user: {}, auth_response length: {}, database: {}, auth_plugin_name: {}", @@ -126,8 +124,7 @@ void MySQLHandler::run() handshake_response.database, handshake_response.auth_plugin_name); - client_capability_flags = handshake_response.capability_flags; - if (!(client_capability_flags & CLIENT_PROTOCOL_41)) + if (!(client_capabilities & CLIENT_PROTOCOL_41)) throw Exception("Required capability: CLIENT_PROTOCOL_41.", ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); authenticate(handshake_response.username, handshake_response.auth_plugin_name, handshake_response.auth_response); @@ -281,7 +278,7 @@ void MySQLHandler::comInitDB(ReadBuffer & payload) readStringUntilEOF(database, payload); LOG_DEBUG(log, "Setting current database to {}", database); connection_context->setCurrentDatabase(database); - packet_endpoint->sendPacket(OKPacket(0, client_capability_flags, 0, 0, 1), true); + packet_endpoint->sendPacket(OKPacket(0, client_capabilities, 0, 0, 1), true); } void MySQLHandler::comFieldList(ReadBuffer & payload) @@ -298,12 +295,12 @@ void MySQLHandler::comFieldList(ReadBuffer & payload) ); packet_endpoint->sendPacket(column_definition); } - packet_endpoint->sendPacket(OKPacket(0xfe, client_capability_flags, 0, 0, 0), true); + packet_endpoint->sendPacket(OKPacket(0xfe, client_capabilities, 0, 0, 0), true); } void MySQLHandler::comPing() { - packet_endpoint->sendPacket(OKPacket(0x0, client_capability_flags, 0, 0, 0), true); + packet_endpoint->sendPacket(OKPacket(0x0, client_capabilities, 0, 0, 0), true); } static bool isFederatedServerSetupSetCommand(const String & query); @@ -316,7 +313,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload) // As Clickhouse doesn't support these statements, we just send OK packet in response. if (isFederatedServerSetupSetCommand(query)) { - packet_endpoint->sendPacket(OKPacket(0x00, client_capability_flags, 0, 0, 0), true); + packet_endpoint->sendPacket(OKPacket(0x00, client_capabilities, 0, 0, 0), true); } else { @@ -350,15 +347,20 @@ void MySQLHandler::comQuery(ReadBuffer & payload) CurrentThread::QueryScope query_scope{query_context}; - executeQuery(should_replace ? replacement : payload, *out, false, query_context, - [&with_output](const String &, const String &, const String &, const String &) - { - with_output = true; - } - ); + FormatSettings format_settings; + format_settings.mysql_wire.client_capabilities = client_capabilities; + format_settings.mysql_wire.max_packet_size = max_packet_size; + format_settings.mysql_wire.sequence_id = &sequence_id; + + auto set_result_details = [&with_output](const String &, const String &, const String &, const String &) + { + with_output = true; + }; + + executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, format_settings); if (!with_output) - packet_endpoint->sendPacket(OKPacket(0x00, client_capability_flags, affected_rows, 0, 0), true); + packet_endpoint->sendPacket(OKPacket(0x00, client_capabilities, affected_rows, 0, 0), true); } } @@ -395,14 +397,14 @@ void MySQLHandlerSSL::finishHandshakeSSL( ReadBufferFromMemory payload(buf, pos); payload.ignore(PACKET_HEADER_SIZE); ssl_request.readPayloadWithUnpacked(payload); - connection_context->mysql.client_capabilities = ssl_request.capability_flags; - connection_context->mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; + client_capabilities = ssl_request.capability_flags; + max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; secure_connection = true; ss = std::make_shared(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext())); in = std::make_shared(*ss); out = std::make_shared(*ss); - connection_context->mysql.sequence_id = 2; - packet_endpoint = std::make_shared(*in, *out, connection_context->mysql.sequence_id); + sequence_id = 2; + packet_endpoint = MySQLProtocol::PacketEndpoint::create(*in, *out, sequence_id); packet_endpoint->receivePacket(packet); /// Reading HandshakeResponse from secure socket. } diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index e681ad2e6f6..96467797105 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -32,7 +32,7 @@ public: void run() final; -private: +protected: CurrentMetrics::Increment metric_increment{CurrentMetrics::MySQLConnection}; /// Enables SSL, if client requested. @@ -52,32 +52,25 @@ private: virtual void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function read_bytes, MySQLProtocol::ConnectionPhase::HandshakeResponse & packet); IServer & server; - -protected: Poco::Logger * log; - - ContextMutablePtr connection_context; - - std::shared_ptr packet_endpoint; - -private: UInt64 connection_id = 0; - size_t server_capability_flags = 0; - size_t client_capability_flags = 0; + uint32_t server_capabilities = 0; + uint32_t client_capabilities = 0; + size_t max_packet_size = 0; + uint8_t sequence_id = 0; -protected: - std::unique_ptr auth_plugin; + MySQLProtocol::PacketEndpointPtr packet_endpoint; + ContextMutablePtr connection_context; - std::shared_ptr in; - std::shared_ptr out; - - bool secure_connection = false; - -private: using ReplacementFn = std::function; using Replacements = std::unordered_map; Replacements replacements; + + std::unique_ptr auth_plugin; + std::shared_ptr in; + std::shared_ptr out; + bool secure_connection = false; }; #if USE_SSL diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 01887444c65..1e98ed2e134 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -55,6 +55,7 @@ void PostgreSQLHandler::run() connection_context->makeSessionContext(); connection_context->getClientInfo().interface = ClientInfo::Interface::POSTGRESQL; connection_context->setDefaultFormat("PostgreSQLWire"); + connection_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; try { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 108b7b8070a..c6319620899 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -149,7 +149,7 @@ void TCPHandler::runImpl() if (!DatabaseCatalog::instance().isDatabaseExist(default_database)) { Exception e("Database " + backQuote(default_database) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); - LOG_ERROR(log, "Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString()); + LOG_ERROR(log, getExceptionMessage(e, true)); sendException(e, connection_context->getSettingsRef().calculate_text_stack_trace); return; } @@ -422,7 +422,7 @@ void TCPHandler::runImpl() } const auto & e = *exception; - LOG_ERROR(log, "Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString()); + LOG_ERROR(log, getExceptionMessage(e, true)); sendException(*exception, send_exception_with_stack_trace); } } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 179204a1a0b..81e0a912274 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -168,7 +168,7 @@ ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, NamesAndAlias /// We are trying to find first column from end with name `column_name` or with a name beginning with `column_name` and ".". /// For example "fruits.bananas" /// names are considered the same if they completely match or `name_without_dot` matches the part of the name to the point -static auto getNameRange(const ColumnsDescription::Container & columns, const String & name_without_dot) +static auto getNameRange(const ColumnsDescription::ColumnsContainer & columns, const String & name_without_dot) { String name_with_dot = name_without_dot + "."; @@ -228,7 +228,7 @@ void ColumnsDescription::remove(const String & column_name) for (auto list_it = range.first; list_it != range.second;) { - removeSubcolumns(list_it->name, list_it->type); + removeSubcolumns(list_it->name); list_it = columns.get<0>().erase(list_it); } } @@ -303,7 +303,7 @@ void ColumnsDescription::flattenNested() } ColumnDescription column = std::move(*it); - removeSubcolumns(column.name, column.type); + removeSubcolumns(column.name); it = columns.get<0>().erase(it); const DataTypes & elements = type_tuple->getElements(); @@ -372,12 +372,7 @@ bool ColumnsDescription::hasNested(const String & column_name) const bool ColumnsDescription::hasSubcolumn(const String & column_name) const { - return subcolumns.find(column_name) != subcolumns.end(); -} - -bool ColumnsDescription::hasInStorageOrSubcolumn(const String & column_name) const -{ - return has(column_name) || hasSubcolumn(column_name); + return subcolumns.get<0>().count(column_name); } const ColumnDescription & ColumnsDescription::get(const String & column_name) const @@ -390,6 +385,50 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co return *it; } +static ColumnsDescription::GetFlags defaultKindToGetFlag(ColumnDefaultKind kind) +{ + switch (kind) + { + case ColumnDefaultKind::Default: + return ColumnsDescription::Ordinary; + case ColumnDefaultKind::Materialized: + return ColumnsDescription::Materialized; + case ColumnDefaultKind::Alias: + return ColumnsDescription::Aliases; + } + __builtin_unreachable(); +} + +NamesAndTypesList ColumnsDescription::getByNames(GetFlags flags, const Names & names, bool with_subcolumns) const +{ + NamesAndTypesList res; + for (const auto & name : names) + { + if (auto it = columns.get<1>().find(name); it != columns.get<1>().end()) + { + auto kind = defaultKindToGetFlag(it->default_desc.kind); + if (flags & kind) + { + res.emplace_back(name, it->type); + continue; + } + } + else if (with_subcolumns) + { + auto jt = subcolumns.get<0>().find(name); + if (jt != subcolumns.get<0>().end()) + { + res.push_back(*jt); + continue; + } + } + + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", name); + } + + return res; +} + NamesAndTypesList ColumnsDescription::getAllPhysical() const { @@ -409,29 +448,46 @@ Names ColumnsDescription::getNamesOfPhysical() const return ret; } -NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const +std::optional ColumnsDescription::tryGetColumnOrSubcolumn(GetFlags flags, const String & column_name) const +{ + auto it = columns.get<1>().find(column_name); + if (it != columns.get<1>().end() && (defaultKindToGetFlag(it->default_desc.kind) & flags)) + return NameAndTypePair(it->name, it->type); + + auto jt = subcolumns.get<0>().find(column_name); + if (jt != subcolumns.get<0>().end()) + return *jt; + + return {}; +} + +NameAndTypePair ColumnsDescription::getColumnOrSubcolumn(GetFlags flags, const String & column_name) const +{ + auto column = tryGetColumnOrSubcolumn(flags, column_name); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column or subcolumn {} in table.", column_name); + + return *column; +} + +std::optional ColumnsDescription::tryGetPhysical(const String & column_name) const { auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias) - throw Exception("There is no physical column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + return {}; + return NameAndTypePair(it->name, it->type); } -NameAndTypePair ColumnsDescription::getPhysicalOrSubcolumn(const String & column_name) const +NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const { - if (auto it = columns.get<1>().find(column_name); it != columns.get<1>().end() - && it->default_desc.kind != ColumnDefaultKind::Alias) - { - return NameAndTypePair(it->name, it->type); - } + auto column = tryGetPhysical(column_name); + if (!column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no physical column {} in table.", column_name); - if (auto it = subcolumns.find(column_name); it != subcolumns.end()) - { - return it->second; - } - - throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, - "There is no physical column or subcolumn {} in table.", column_name); + return *column; } bool ColumnsDescription::hasPhysical(const String & column_name) const @@ -440,32 +496,36 @@ bool ColumnsDescription::hasPhysical(const String & column_name) const return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias; } -bool ColumnsDescription::hasPhysicalOrSubcolumn(const String & column_name) const +bool ColumnsDescription::hasColumnOrSubcolumn(GetFlags flags, const String & column_name) const { - return hasPhysical(column_name) || subcolumns.find(column_name) != subcolumns.end(); + auto it = columns.get<1>().find(column_name); + return (it != columns.get<1>().end() + && (defaultKindToGetFlag(it->default_desc.kind) & flags)) + || hasSubcolumn(column_name); } -static NamesAndTypesList getWithSubcolumns(NamesAndTypesList && source_list) +void ColumnsDescription::addSubcolumnsToList(NamesAndTypesList & source_list) const { - NamesAndTypesList ret; for (const auto & col : source_list) { - ret.emplace_back(col.name, col.type); - for (const auto & subcolumn : col.type->getSubcolumnNames()) - ret.emplace_back(col.name, subcolumn, col.type, col.type->getSubcolumnType(subcolumn)); + auto range = subcolumns.get<1>().equal_range(col.name); + if (range.first != range.second) + source_list.insert(source_list.end(), range.first, range.second); } - - return ret; } NamesAndTypesList ColumnsDescription::getAllWithSubcolumns() const { - return getWithSubcolumns(getAll()); + auto columns_list = getAll(); + addSubcolumnsToList(columns_list); + return columns_list; } NamesAndTypesList ColumnsDescription::getAllPhysicalWithSubcolumns() const { - return getWithSubcolumns(getAllPhysical()); + auto columns_list = getAllPhysical(); + addSubcolumnsToList(columns_list); + return columns_list; } bool ColumnsDescription::hasDefaults() const @@ -591,14 +651,15 @@ void ColumnsDescription::addSubcolumns(const String & name_in_storage, const Dat throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add subcolumn {}: column with this name already exists", subcolumn.name); - subcolumns[subcolumn.name] = subcolumn; + subcolumns.get<0>().insert(std::move(subcolumn)); } } -void ColumnsDescription::removeSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage) +void ColumnsDescription::removeSubcolumns(const String & name_in_storage) { - for (const auto & subcolumn_name : type_in_storage->getSubcolumnNames()) - subcolumns.erase(name_in_storage + "." + subcolumn_name); + auto range = subcolumns.get<1>().equal_range(name_in_storage); + if (range.first != range.second) + subcolumns.get<1>().erase(range.first, range.second); } Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context) diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index f1887d772ca..44f895c89ce 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include #include @@ -77,6 +79,18 @@ public: auto begin() const { return columns.begin(); } auto end() const { return columns.end(); } + enum GetFlags : UInt8 + { + Ordinary = 1, + Materialized = 2, + Aliases = 4, + + AllPhysical = Ordinary | Materialized, + All = AllPhysical | Aliases, + }; + + NamesAndTypesList getByNames(GetFlags flags, const Names & names, bool with_subcolumns) const; + NamesAndTypesList getOrdinary() const; NamesAndTypesList getMaterialized() const; NamesAndTypesList getAliases() const; @@ -91,7 +105,6 @@ public: bool has(const String & column_name) const; bool hasNested(const String & column_name) const; bool hasSubcolumn(const String & column_name) const; - bool hasInStorageOrSubcolumn(const String & column_name) const; const ColumnDescription & get(const String & column_name) const; template @@ -113,10 +126,15 @@ public: } Names getNamesOfPhysical() const; + bool hasPhysical(const String & column_name) const; - bool hasPhysicalOrSubcolumn(const String & column_name) const; + bool hasColumnOrSubcolumn(GetFlags flags, const String & column_name) const; + NameAndTypePair getPhysical(const String & column_name) const; - NameAndTypePair getPhysicalOrSubcolumn(const String & column_name) const; + NameAndTypePair getColumnOrSubcolumn(GetFlags flags, const String & column_name) const; + + std::optional tryGetPhysical(const String & column_name) const; + std::optional tryGetColumnOrSubcolumn(GetFlags flags, const String & column_name) const; ColumnDefaults getDefaults() const; /// TODO: remove bool hasDefault(const String & column_name) const; @@ -143,21 +161,27 @@ public: } /// Keep the sequence of columns and allow to lookup by name. - using Container = boost::multi_index_container< + using ColumnsContainer = boost::multi_index_container< ColumnDescription, boost::multi_index::indexed_by< boost::multi_index::sequenced<>, boost::multi_index::ordered_unique>>>; -private: - Container columns; + using SubcolumnsContainter = boost::multi_index_container< + NameAndTypePair, + boost::multi_index::indexed_by< + boost::multi_index::hashed_unique>, + boost::multi_index::hashed_non_unique>>>; - using SubcolumnsContainer = std::unordered_map; - SubcolumnsContainer subcolumns; +private: + ColumnsContainer columns; + SubcolumnsContainter subcolumns; void modifyColumnOrder(const String & column_name, const String & after_column, bool first); + void addSubcolumnsToList(NamesAndTypesList & source_list) const; + void addSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage); - void removeSubcolumns(const String & name_in_storage, const DataTypePtr & type_in_storage); + void removeSubcolumns(const String & name_in_storage); }; /// Validate default expressions and corresponding types compatibility, i.e. diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 17c0eec5c49..be20a2d3db6 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -330,6 +331,13 @@ namespace CheckingCompressedReadBuffer checking_in(in); remote.writePrepared(checking_in); } + + uint64_t doubleToUInt64(double d) + { + if (d >= std::numeric_limits::max()) + return std::numeric_limits::max(); + return static_cast(d); + } } @@ -345,15 +353,15 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor( , disk(disk_) , relative_path(relative_path_) , path(fs::path(disk->getPath()) / relative_path / "") - , should_batch_inserts(storage.getContext()->getSettingsRef().distributed_directory_monitor_batch_inserts) - , split_batch_on_failure(storage.getContext()->getSettingsRef().distributed_directory_monitor_split_batch_on_failure) + , should_batch_inserts(storage.getDistributedSettingsRef().monitor_batch_inserts) + , split_batch_on_failure(storage.getDistributedSettingsRef().monitor_split_batch_on_failure) , dir_fsync(storage.getDistributedSettingsRef().fsync_directories) , min_batched_block_size_rows(storage.getContext()->getSettingsRef().min_insert_block_size_rows) , min_batched_block_size_bytes(storage.getContext()->getSettingsRef().min_insert_block_size_bytes) , current_batch_file_path(path + "current_batch.txt") - , default_sleep_time(storage.getContext()->getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()) + , default_sleep_time(storage.getDistributedSettingsRef().monitor_sleep_time_ms.totalMilliseconds()) , sleep_time(default_sleep_time) - , max_sleep_time(storage.getContext()->getSettingsRef().distributed_directory_monitor_max_sleep_time_ms.totalMilliseconds()) + , max_sleep_time(storage.getDistributedSettingsRef().monitor_max_sleep_time_ms.totalMilliseconds()) , log(&Poco::Logger::get(getLoggerName())) , monitor_blocker(monitor_blocker_) , metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0) @@ -431,9 +439,14 @@ void StorageDistributedDirectoryMonitor::run() do_sleep = true; ++status.error_count; - sleep_time = std::min( - std::chrono::milliseconds{Int64(default_sleep_time.count() * std::exp2(status.error_count))}, - max_sleep_time); + + UInt64 q = doubleToUInt64(std::exp2(status.error_count)); + std::chrono::milliseconds new_sleep_time(default_sleep_time.count() * q); + if (new_sleep_time.count() < 0) + sleep_time = max_sleep_time; + else + sleep_time = std::min(new_sleep_time, max_sleep_time); + tryLogCurrentException(getLoggerName().data()); status.last_exception = std::current_exception(); } @@ -763,8 +776,8 @@ struct StorageDistributedDirectoryMonitor::Batch else { std::vector files(file_index_to_path.size()); - for (const auto & [index, name] : file_index_to_path) - files.push_back(name); + for (const auto && file_info : file_index_to_path | boost::adaptors::indexed()) + files[file_info.index()] = file_info.value().second; e.addMessage(fmt::format("While sending batch {}", fmt::join(files, "\n"))); throw; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 9a50cec5986..c0d7541eacc 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -111,6 +111,7 @@ DistributedBlockOutputStream::DistributedBlockOutputStream( if (settings.max_distributed_depth && context->getClientInfo().distributed_depth > settings.max_distributed_depth) throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); context->getClientInfo().distributed_depth += 1; + random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; } @@ -156,9 +157,6 @@ void DistributedBlockOutputStream::write(const Block & block) void DistributedBlockOutputStream::writeAsync(const Block & block) { - const Settings & settings = context->getSettingsRef(); - bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; - if (random_shard_insert) { writeAsyncImpl(block, storage.getRandomShardIndex(cluster->getShardsInfo())); @@ -264,11 +262,19 @@ void DistributedBlockOutputStream::waitForJobs() } } - size_t jobs_count = remote_jobs_count + local_jobs_count; size_t num_finished_jobs = finished_jobs_count; + if (random_shard_insert) + { + if (finished_jobs_count != 1) + LOG_WARNING(log, "Expected 1 writing jobs when doing random shard insert, but finished {}", num_finished_jobs); + } + else + { + size_t jobs_count = remote_jobs_count + local_jobs_count; - if (num_finished_jobs < jobs_count) - LOG_WARNING(log, "Expected {} writing jobs, but finished only {}", jobs_count, num_finished_jobs); + if (num_finished_jobs < jobs_count) + LOG_WARNING(log, "Expected {} writing jobs, but finished only {}", jobs_count, num_finished_jobs); + } } @@ -401,7 +407,6 @@ void DistributedBlockOutputStream::writeSync(const Block & block) { const Settings & settings = context->getSettingsRef(); const auto & shards_info = cluster->getShardsInfo(); - bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; size_t start = 0; size_t end = shards_info.size(); @@ -410,20 +415,13 @@ void DistributedBlockOutputStream::writeSync(const Block & block) start = settings.insert_shard_id - 1; end = settings.insert_shard_id; } - else if (random_shard_insert) - { - start = storage.getRandomShardIndex(shards_info); - end = start + 1; - } - - size_t num_shards = end - start; if (!pool) { /// Deferred initialization. Only for sync insertion. initWritingJobs(block, start, end); - size_t jobs_count = remote_jobs_count + local_jobs_count; + size_t jobs_count = random_shard_insert ? 1 : (remote_jobs_count + local_jobs_count); size_t max_threads = std::min(settings.max_distributed_connections, jobs_count); pool.emplace(/* max_threads_= */ max_threads, /* max_free_threads_= */ max_threads, @@ -440,12 +438,20 @@ void DistributedBlockOutputStream::writeSync(const Block & block) watch_current_block.restart(); + if (random_shard_insert) + { + start = storage.getRandomShardIndex(shards_info); + end = start + 1; + } + + size_t num_shards = end - start; + if (num_shards > 1) { auto current_selector = createSelector(block); - /// Prepare row numbers for each shard - for (size_t shard_index : collections::range(0, num_shards)) + /// Prepare row numbers for needed shards + for (size_t shard_index : collections::range(start, end)) per_shard_jobs[shard_index].shard_current_block_permutation.resize(0); for (size_t i = 0; i < block.rows(); ++i) @@ -456,7 +462,7 @@ void DistributedBlockOutputStream::writeSync(const Block & block) { /// Run jobs in parallel for each block and wait them finished_jobs_count = 0; - for (size_t shard_index : collections::range(0, shards_info.size())) + for (size_t shard_index : collections::range(start, end)) for (JobReplica & job : per_shard_jobs[shard_index].replicas_jobs) pool->scheduleOrThrowOnError(runWritingJob(job, block, num_shards)); } diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h index 0ae57ce053d..8e6e914cb29 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -94,6 +94,7 @@ private: size_t inserted_rows = 0; bool insert_sync; + bool random_shard_insert; bool allow_materialized; /// Sync-related stuff diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h index 7296fa11ffd..8cc942cab02 100644 --- a/src/Storages/Distributed/DistributedSettings.h +++ b/src/Storages/Distributed/DistributedSettings.h @@ -21,6 +21,11 @@ class ASTStorage; M(UInt64, bytes_to_throw_insert, 0, "If more than this number of compressed bytes will be pending for async INSERT, an exception will be thrown. 0 - do not throw.", 0) \ M(UInt64, bytes_to_delay_insert, 0, "If more than this number of compressed bytes will be pending for async INSERT, the query will be delayed. 0 - do not delay.", 0) \ M(UInt64, max_delay_to_insert, 60, "Max delay of inserting data into Distributed table in seconds, if there are a lot of pending bytes for async send.", 0) \ + /** Directory monitor settings */ \ + M(UInt64, monitor_batch_inserts, 0, "Default - distributed_directory_monitor_batch_inserts", 0) \ + M(UInt64, monitor_split_batch_on_failure, 0, "Default - distributed_directory_monitor_split_batch_on_failure", 0) \ + M(Milliseconds, monitor_sleep_time_ms, 0, "Default - distributed_directory_monitor_sleep_time_ms", 0) \ + M(Milliseconds, monitor_max_sleep_time_ms, 0, "Default - distributed_directory_monitor_max_sleep_time_ms", 0) \ DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 578da239c20..d04462a6750 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -27,6 +26,7 @@ #include #include + namespace fs = std::filesystem; namespace DB diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 2d6109bd7af..5d1771f48b7 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -272,6 +272,10 @@ public: throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + /// Returns true if FINAL modifier must be added to SELECT query depending on required columns. + /// It's needed for ReplacingMergeTree wrappers such as MaterializedMySQL and MaterializedPostrgeSQL + virtual bool needRewriteQueryWithFinal(const Names & /*column_names*/) const { return false; } + /** Read a set of columns from the table. * Accepts a list of columns to read, as well as a description of the query, * from which information can be extracted about how to retrieve data diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 15dd5b553b0..ed232be640a 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index ff6101fce07..e30da82416d 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -15,7 +16,8 @@ #include #include #include - +#include +#include namespace fs = std::filesystem; @@ -40,6 +42,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int S3_ERROR; extern const int INCORRECT_PART_TYPE; + extern const int ZERO_COPY_REPLICATION_ERROR; } namespace DataPartsExchange @@ -52,7 +55,7 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS = 2; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE = 3; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION = 4; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID = 5; -constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_S3_COPY = 6; +constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY = 6; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION = 7; @@ -168,27 +171,27 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) writeUUIDText(part->uuid, out); - bool try_use_s3_copy = false; + String remote_fs_metadata = parse(params.get("remote_fs_metadata", "")); + std::regex re("\\s*,\\s*"); + Strings capability( + std::sregex_token_iterator(remote_fs_metadata.begin(), remote_fs_metadata.end(), re, -1), + std::sregex_token_iterator()); - if (data_settings->allow_s3_zero_copy_replication - && client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_S3_COPY) - { /// if source and destination are in the same S3 storage we try to use S3 CopyObject request first - int send_s3_metadata = parse(params.get("send_s3_metadata", "0")); - if (send_s3_metadata == 1) + if (data_settings->allow_remote_fs_zero_copy_replication && + client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) + { + auto disk = part->volume->getDisk(); + auto disk_type = DiskType::toString(disk->getType()); + if (disk->supportZeroCopyReplication() && std::find(capability.begin(), capability.end(), disk_type) != capability.end()) { - auto disk = part->volume->getDisk(); - if (disk->getType() == DB::DiskType::Type::S3) - { - try_use_s3_copy = true; - } + /// Send metadata if the receiver's capability covers the source disk type. + response.addCookie({"remote_fs_metadata", disk_type}); + sendPartFromDiskRemoteMeta(part, out); + return; } } - if (try_use_s3_copy) - { - response.addCookie({"send_s3_metadata", "1"}); - sendPartS3Metadata(part, out); - } - else if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) + + if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) { const auto & projections = part->getProjectionParts(); writeBinary(projections.size(), out); @@ -323,7 +326,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( return data_checksums; } -void Service::sendPartS3Metadata(const MergeTreeData::DataPartPtr & part, WriteBuffer & out) +void Service::sendPartFromDiskRemoteMeta(const MergeTreeData::DataPartPtr & part, WriteBuffer & out) { /// We'll take a list of files from the list of checksums. MergeTreeData::DataPart::Checksums checksums = part->checksums; @@ -333,8 +336,8 @@ void Service::sendPartS3Metadata(const MergeTreeData::DataPartPtr & part, WriteB checksums.files[file_name] = {}; auto disk = part->volume->getDisk(); - if (disk->getType() != DB::DiskType::Type::S3) - throw Exception("S3 disk is not S3 anymore", ErrorCodes::LOGICAL_ERROR); + if (!disk->supportZeroCopyReplication()) + throw Exception(fmt::format("disk {} doesn't support zero-copy replication", disk->getName()), ErrorCodes::LOGICAL_ERROR); part->storage.lockSharedData(*part); @@ -351,9 +354,9 @@ void Service::sendPartS3Metadata(const MergeTreeData::DataPartPtr & part, WriteB fs::path metadata(metadata_file); if (!fs::exists(metadata)) - throw Exception("S3 metadata '" + file_name + "' is not exists", ErrorCodes::CORRUPTED_DATA); + throw Exception("Remote metadata '" + file_name + "' is not exists", ErrorCodes::CORRUPTED_DATA); if (!fs::is_regular_file(metadata)) - throw Exception("S3 metadata '" + file_name + "' is not a file", ErrorCodes::CORRUPTED_DATA); + throw Exception("Remote metadata '" + file_name + "' is not a file", ErrorCodes::CORRUPTED_DATA); UInt64 file_size = fs::file_size(metadata); writeStringBinary(it.first, out); @@ -399,8 +402,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( bool to_detached, const String & tmp_prefix_, std::optional * tagger_ptr, - bool try_use_s3_copy, - const DiskPtr disk_s3) + bool try_zero_copy, + DiskPtr disk) { if (blocker.isCancelled()) throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); @@ -421,30 +424,34 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( {"compress", "false"} }); - if (try_use_s3_copy && disk_s3 && disk_s3->getType() != DB::DiskType::Type::S3) - throw Exception("Try to fetch shared s3 part on non-s3 disk", ErrorCodes::LOGICAL_ERROR); - - Disks disks_s3; - - if (!data_settings->allow_s3_zero_copy_replication) - try_use_s3_copy = false; - - if (try_use_s3_copy) + Strings capability; + if (try_zero_copy && data_settings->allow_remote_fs_zero_copy_replication) { - if (disk_s3) - disks_s3.push_back(disk_s3); - else + if (!disk) { - disks_s3 = data.getDisksByType(DiskType::Type::S3); - - if (disks_s3.empty()) - try_use_s3_copy = false; + DiskType::Type zero_copy_disk_types[] = {DiskType::Type::S3, DiskType::Type::HDFS}; + for (auto disk_type: zero_copy_disk_types) + { + Disks disks = data.getDisksByType(disk_type); + if (!disks.empty()) + { + capability.push_back(DiskType::toString(disk_type)); + } + } + } + else if (disk->supportZeroCopyReplication()) + { + capability.push_back(DiskType::toString(disk->getType())); } } - - if (try_use_s3_copy) + if (!capability.empty()) { - uri.addQueryParameter("send_s3_metadata", "1"); + const String & remote_fs_metadata = boost::algorithm::join(capability, ", "); + uri.addQueryParameter("remote_fs_metadata", remote_fs_metadata); + } + else + { + try_zero_copy = false; } Poco::Net::HTTPBasicCredentials creds{}; @@ -467,73 +474,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( int server_protocol_version = parse(in.getResponseCookie("server_protocol_version", "0")); - int send_s3 = parse(in.getResponseCookie("send_s3_metadata", "0")); - - if (send_s3 == 1) - { - if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_S3_COPY) - throw Exception("Got 'send_s3_metadata' cookie with old protocol version", ErrorCodes::LOGICAL_ERROR); - if (!try_use_s3_copy) - throw Exception("Got 'send_s3_metadata' cookie when was not requested", ErrorCodes::LOGICAL_ERROR); - - size_t sum_files_size = 0; - readBinary(sum_files_size, in); - IMergeTreeDataPart::TTLInfos ttl_infos; - String ttl_infos_string; - readBinary(ttl_infos_string, in); - ReadBufferFromString ttl_infos_buffer(ttl_infos_string); - assertString("ttl format version: 1\n", ttl_infos_buffer); - ttl_infos.read(ttl_infos_buffer); - - ReservationPtr reservation - = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); - if (!reservation) - reservation - = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); - if (reservation) - { - /// When we have multi-volume storage, one of them was chosen, depends on TTL, free space, etc. - /// Chosen one may be S3 or not. - DiskPtr disk = reservation->getDisk(); - if (disk && disk->getType() == DiskType::Type::S3) - { - for (const auto & d : disks_s3) - { - if (d->getPath() == disk->getPath()) - { - Disks disks_tmp = { disk }; - disks_s3.swap(disks_tmp); - break; - } - } - } - } - - String part_type = "Wide"; - readStringBinary(part_type, in); - if (part_type == "InMemory") - throw Exception("Got 'send_s3_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); - - UUID part_uuid = UUIDHelpers::Nil; - - /// Always true due to values of constants. But we keep this condition just in case. - if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) //-V547 - readUUIDText(part_uuid, in); - - try - { - return downloadPartToS3(part_name, replica_path, to_detached, tmp_prefix_, std::move(disks_s3), in, throttler); - } - catch (const Exception & e) - { - if (e.code() != ErrorCodes::S3_ERROR) - throw; - /// Try again but without S3 copy - return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, - user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false); - } - } - ReservationPtr reservation; size_t sum_files_size = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) @@ -547,24 +487,29 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( ReadBufferFromString ttl_infos_buffer(ttl_infos_string); assertString("ttl format version: 1\n", ttl_infos_buffer); ttl_infos.read(ttl_infos_buffer); - reservation - = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); - if (!reservation) + if (!disk) + { reservation - = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); + = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, &ttl_infos, true); + if (!reservation) + reservation + = data.reserveSpacePreferringTTLRules(metadata_snapshot, sum_files_size, ttl_infos, std::time(nullptr), 0, true); + } } - else + else if (!disk) { reservation = data.balancedReservation(metadata_snapshot, sum_files_size, 0, part_name, part_info, {}, tagger_ptr, nullptr); if (!reservation) reservation = data.reserveSpace(sum_files_size); } } - else + else if (!disk) { /// We don't know real size of part because sender server version is too old reservation = data.makeEmptyReservationOnLargestDisk(); } + if (!disk) + disk = reservation->getDisk(); bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); @@ -577,8 +522,35 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) readUUIDText(part_uuid, in); + String remote_fs_metadata = parse(in.getResponseCookie("remote_fs_metadata", "")); + if (!remote_fs_metadata.empty()) + { + if (!try_zero_copy) + throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); + if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) + throw Exception(fmt::format("Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")), ErrorCodes::LOGICAL_ERROR); + if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) + throw Exception(fmt::format("Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version), ErrorCodes::LOGICAL_ERROR); + if (part_type == "InMemory") + throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); + + try + { + return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix_, disk, in, throttler); + } + catch (const Exception & e) + { + if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR) + throw; + LOG_WARNING(log, e.message() + " Will retry fetching part without zero-copy."); + /// Try again but without zero-copy + return fetchPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, + user, password, interserver_scheme, throttler, to_detached, tmp_prefix_, nullptr, false, disk); + } + } + auto storage_id = data.getStorageID(); - String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(reservation->getDisk())) / part_name / ""; + String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; auto entry = data.getContext()->getReplicatedFetchList().insert( storage_id.getDatabaseName(), storage_id.getTableName(), part_info.partition_id, part_name, new_part_path, @@ -586,15 +558,14 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( in.setNextCallback(ReplicatedFetchReadCallback(*entry)); - size_t projections = 0; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) readBinary(projections, in); MergeTreeData::DataPart::Checksums checksums; return part_type == "InMemory" - ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, std::move(reservation), in, projections, throttler) - : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, reservation->getDisk(), in, projections, checksums, throttler); + ? downloadPartToMemory(part_name, part_uuid, metadata_snapshot, context, disk, in, projections, throttler) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, sync, disk, in, projections, checksums, throttler); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( @@ -602,12 +573,12 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( const UUID & part_uuid, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - ReservationPtr reservation, + DiskPtr disk, PooledReadWriteBufferFromHTTP & in, size_t projections, ThrottlerPtr throttler) { - auto volume = std::make_shared("volume_" + part_name, reservation->getDisk(), 0); + auto volume = std::make_shared("volume_" + part_name, disk, 0); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, part_name, volume); @@ -794,31 +765,24 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( return new_data_part; } -MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToS3( +MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( const String & part_name, const String & replica_path, bool to_detached, const String & tmp_prefix_, - const Disks & disks_s3, + DiskPtr disk, PooledReadWriteBufferFromHTTP & in, ThrottlerPtr throttler) { - if (disks_s3.empty()) - throw Exception("No S3 disks anymore", ErrorCodes::LOGICAL_ERROR); - String part_id; readStringBinary(part_id, in); - DiskPtr disk = disks_s3[0]; - - for (const auto & disk_s3 : disks_s3) + if (!disk->supportZeroCopyReplication() || !disk->checkUniqueId(part_id)) { - if (disk_s3->checkUniqueId(part_id)) - { - disk = disk_s3; - break; - } + throw Exception(fmt::format("Part {} unique id {} doesn't exist on {}.", part_name, part_id, disk->getName()), ErrorCodes::ZERO_COPY_REPLICATION_ERROR); } + LOG_DEBUG(log, "Downloading Part {} unique id {} metadata onto disk {}.", + part_name, part_id, disk->getName()); static const String TMP_PREFIX = "tmp_fetch_"; String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_; diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index eb776c33f0f..0c12cc51cc7 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -50,7 +50,7 @@ private: int client_protocol_version, const std::map> & projections = {}); - void sendPartS3Metadata(const MergeTreeData::DataPartPtr & part, WriteBuffer & out); + void sendPartFromDiskRemoteMeta(const MergeTreeData::DataPartPtr & part, WriteBuffer & out); /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, /// so Service will never access dangling reference to storage @@ -81,8 +81,8 @@ public: bool to_detached = false, const String & tmp_prefix_ = "", std::optional * tagger_ptr = nullptr, - bool try_use_s3_copy = true, - const DiskPtr disk_s3 = nullptr); + bool try_zero_copy = true, + DiskPtr dest_disk = nullptr); /// You need to stop the data transfer. ActionBlocker blocker; @@ -115,17 +115,17 @@ private: const UUID & part_uuid, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, - ReservationPtr reservation, + DiskPtr disk, PooledReadWriteBufferFromHTTP & in, size_t projections, ThrottlerPtr throttler); - MergeTreeData::MutableDataPartPtr downloadPartToS3( + MergeTreeData::MutableDataPartPtr downloadPartToDiskRemoteMeta( const String & part_name, const String & replica_path, bool to_detached, const String & tmp_prefix_, - const Disks & disks_s3, + DiskPtr disk, PooledReadWriteBufferFromHTTP & in, ThrottlerPtr throttler); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 8fe6a0a484b..ea965f10b23 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,12 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Dis Field max_val; serialization->deserializeBinary(max_val, *file); + // NULL_LAST + if (min_val.isNull()) + min_val = PositiveInfinity(); + if (max_val.isNull()) + max_val = PositiveInfinity(); + hyperrectangle.emplace_back(min_val, true, max_val, true); } initialized = true; @@ -132,14 +139,19 @@ void IMergeTreeDataPart::MinMaxIndex::update(const Block & block, const Names & FieldRef min_value; FieldRef max_value; const ColumnWithTypeAndName & column = block.getByName(column_names[i]); - column.column->getExtremes(min_value, max_value); + if (const auto * column_nullable = typeid_cast(column.column.get())) + column_nullable->getExtremesNullLast(min_value, max_value); + else + column.column->getExtremes(min_value, max_value); if (!initialized) hyperrectangle.emplace_back(min_value, true, max_value, true); else { - hyperrectangle[i].left = std::min(hyperrectangle[i].left, min_value); - hyperrectangle[i].right = std::max(hyperrectangle[i].right, max_value); + hyperrectangle[i].left + = applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].left, min_value) ? hyperrectangle[i].left : min_value; + hyperrectangle[i].right + = applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].right, max_value) ? max_value : hyperrectangle[i].right; } } @@ -1103,7 +1115,7 @@ void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_ std::optional IMergeTreeDataPart::keepSharedDataInDecoupledStorage() const { - /// NOTE: It's needed for S3 zero-copy replication + /// NOTE: It's needed for zero-copy replication if (force_keep_shared_data) return true; @@ -1501,16 +1513,11 @@ SerializationPtr IMergeTreeDataPart::getSerializationForColumn(const NameAndType String IMergeTreeDataPart::getUniqueId() const { - String id; - auto disk = volume->getDisk(); + if (!disk->supportZeroCopyReplication()) + throw Exception(fmt::format("Disk {} doesn't support zero-copy replication", disk->getName()), ErrorCodes::LOGICAL_ERROR); - if (disk->getType() == DB::DiskType::Type::S3) - id = disk->getUniqueId(fs::path(getFullRelativePath()) / "checksums.txt"); - - if (id.empty()) - throw Exception("Can't get unique S3 object", ErrorCodes::LOGICAL_ERROR); - + String id = disk->getUniqueId(fs::path(getFullRelativePath()) / "checksums.txt"); return id; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 54fcfc1adc9..8b7a15e5da0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include #include @@ -19,6 +17,7 @@ #include + namespace zkutil { class ZooKeeper; @@ -374,7 +373,7 @@ public: void loadProjections(bool require_columns_checksums, bool check_consistency); - /// Return set of metadat file names without checksums. For example, + /// Return set of metadata file names without checksums. For example, /// columns.txt or checksums.txt itself. NameSet getFileNamesWithoutChecksums() const; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 14187564536..4efd3d669eb 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -33,6 +33,7 @@ IMergeTreeReader::IMergeTreeReader( : data_part(data_part_) , avg_value_size_hints(avg_value_size_hints_) , columns(columns_) + , part_columns(data_part->getColumns()) , uncompressed_cache(uncompressed_cache_) , mark_cache(mark_cache_) , settings(settings_) @@ -41,15 +42,15 @@ IMergeTreeReader::IMergeTreeReader( , all_mark_ranges(all_mark_ranges_) , alter_conversions(storage.getAlterConversionsForPart(data_part)) { - auto part_columns = data_part->getColumns(); if (settings.convert_nested_to_subcolumns) { columns = Nested::convertToSubcolumns(columns); part_columns = Nested::collect(part_columns); } - for (const NameAndTypePair & column_from_part : part_columns) - columns_from_part[column_from_part.name] = column_from_part.type; + columns_from_part.set_empty_key(StringRef()); + for (const auto & column_from_part : part_columns) + columns_from_part.emplace(column_from_part.name, &column_from_part.type); } IMergeTreeReader::~IMergeTreeReader() = default; @@ -226,18 +227,19 @@ NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & requ if (it == columns_from_part.end()) return required_column; + const auto & type = *it->second; if (required_column.isSubcolumn()) { auto subcolumn_name = required_column.getSubcolumnName(); - auto subcolumn_type = it->second->tryGetSubcolumnType(subcolumn_name); + auto subcolumn_type = type->tryGetSubcolumnType(subcolumn_name); if (!subcolumn_type) return required_column; - return {it->first, subcolumn_name, it->second, subcolumn_type}; + return {String(it->first), subcolumn_name, type, subcolumn_type}; } - return {it->first, it->second}; + return {String(it->first), type}; } void IMergeTreeReader::performRequiredConversions(Columns & res_columns) diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 0771bc3d5cb..ab412e48822 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -72,6 +73,7 @@ protected: /// Columns that are read. NamesAndTypesList columns; + NamesAndTypesList part_columns; UncompressedCache * uncompressed_cache; MarkCache * mark_cache; @@ -92,7 +94,12 @@ private: MergeTreeData::AlterConversions alter_conversions; /// Actual data type of columns in part - std::unordered_map columns_from_part; + +#if !defined(ARCADIA_BUILD) + google::dense_hash_map columns_from_part; +#else + google::sparsehash::dense_hash_map columns_from_part; +#endif }; } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 476032e66aa..e58d4ecfc07 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -43,15 +43,8 @@ String Range::toString() const { WriteBufferFromOwnString str; - if (!left_bounded) - str << "(-inf, "; - else - str << (left_included ? '[' : '(') << applyVisitor(FieldVisitorToString(), left) << ", "; - - if (!right_bounded) - str << "+inf)"; - else - str << applyVisitor(FieldVisitorToString(), right) << (right_included ? ']' : ')'); + str << (left_included ? '[' : '(') << applyVisitor(FieldVisitorToString(), left) << ", "; + str << applyVisitor(FieldVisitorToString(), right) << (right_included ? ']' : ')'); return str.str(); } @@ -205,6 +198,38 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } }, + { + "nullIn", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_IN_SET; + return true; + } + }, + { + "notNullIn", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_NOT_IN_SET; + return true; + } + }, + { + "globalNullIn", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_IN_SET; + return true; + } + }, + { + "globalNotNullIn", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_NOT_IN_SET; + return true; + } + }, { "empty", [] (RPNElement & out, const Field & value) @@ -291,6 +316,26 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } + }, + { + "isNotNull", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_IS_NOT_NULL; + // isNotNull means (-Inf, +Inf), which is the default Range + out.range = Range(); + return true; + } + }, + { + "isNull", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_IS_NULL; + // When using NULL_LAST, isNull means [+Inf, +Inf] + out.range = Range(Field(PositiveInfinity{})); + return true; + } } }; @@ -304,6 +349,14 @@ static const std::map inverse_relations = { {"lessOrEquals", "greater"}, {"in", "notIn"}, {"notIn", "in"}, + {"globalIn", "globalNotIn"}, + {"globalNotIn", "globalIn"}, + {"nullIn", "notNullIn"}, + {"notNullIn", "nullIn"}, + {"globalNullIn", "globalNotNullIn"}, + {"globalNullNotIn", "globalNullIn"}, + {"isNull", "isNotNull"}, + {"isNotNull", "isNull"}, {"like", "notLike"}, {"notLike", "like"}, {"empty", "notEmpty"}, @@ -620,7 +673,6 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (key_subexpr_names.count(expr_name) == 0) return false; - /// TODO Nullable index is not yet landed. if (out_value.isNull()) return false; @@ -745,7 +797,6 @@ bool KeyCondition::canConstantBeWrappedByFunctions( const auto & sample_block = key_expr->getSampleBlock(); - /// TODO Nullable index is not yet landed. if (out_value.isNull()) return false; @@ -1147,7 +1198,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out) { - /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key, + /** Functions < > = != <= >= in `notIn` isNull isNotNull, where one argument is a constant, and the other is one of columns of key, * or itself, wrapped in a chain of possibly-monotonic functions, * or constant expression - number. */ @@ -1192,8 +1243,8 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, /// If we use this key condition to prune partitions by single value, we cannot relax conditions for NOT. if (single_point - && (func_name == "notLike" || func_name == "notIn" || func_name == "globalNotIn" || func_name == "notEquals" - || func_name == "notEmpty")) + && (func_name == "notLike" || func_name == "notIn" || func_name == "globalNotIn" || func_name == "notNullIn" + || func_name == "globalNotNullIn" || func_name == "notEquals" || func_name == "notEmpty")) strict_condition = true; if (functionIsInOrGlobalInOperator(func_name)) @@ -1504,6 +1555,8 @@ KeyCondition::Description KeyCondition::getDescription() const else if ( element.function == RPNElement::FUNCTION_IN_RANGE || element.function == RPNElement::FUNCTION_NOT_IN_RANGE + || element.function == RPNElement::FUNCTION_IS_NULL + || element.function == RPNElement::FUNCTION_IS_NOT_NULL || element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET) { @@ -1668,11 +1721,13 @@ KeyCondition::Description KeyCondition::getDescription() const * over at least one hyperrectangle from which this range consists. */ +FieldRef negativeInfinity(NegativeInfinity{}), positiveInfinity(PositiveInfinity{}); + template static BoolMask forAnyHyperrectangle( size_t key_size, - const FieldRef * key_left, - const FieldRef * key_right, + const FieldRef * left_keys, + const FieldRef * right_keys, bool left_bounded, bool right_bounded, std::vector & hyperrectangle, @@ -1688,10 +1743,10 @@ static BoolMask forAnyHyperrectangle( /// Let's go through the matching elements of the key. while (prefix_size < key_size) { - if (key_left[prefix_size] == key_right[prefix_size]) + if (left_keys[prefix_size] == right_keys[prefix_size]) { /// Point ranges. - hyperrectangle[prefix_size] = Range(key_left[prefix_size]); + hyperrectangle[prefix_size] = Range(left_keys[prefix_size]); ++prefix_size; } else @@ -1705,11 +1760,11 @@ static BoolMask forAnyHyperrectangle( if (prefix_size + 1 == key_size) { if (left_bounded && right_bounded) - hyperrectangle[prefix_size] = Range(key_left[prefix_size], true, key_right[prefix_size], true); + hyperrectangle[prefix_size] = Range(left_keys[prefix_size], true, right_keys[prefix_size], true); else if (left_bounded) - hyperrectangle[prefix_size] = Range::createLeftBounded(key_left[prefix_size], true); + hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], true); else if (right_bounded) - hyperrectangle[prefix_size] = Range::createRightBounded(key_right[prefix_size], true); + hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], true); return callback(hyperrectangle); } @@ -1717,11 +1772,11 @@ static BoolMask forAnyHyperrectangle( /// (x1 .. x2) x (-inf .. +inf) if (left_bounded && right_bounded) - hyperrectangle[prefix_size] = Range(key_left[prefix_size], false, key_right[prefix_size], false); + hyperrectangle[prefix_size] = Range(left_keys[prefix_size], false, right_keys[prefix_size], false); else if (left_bounded) - hyperrectangle[prefix_size] = Range::createLeftBounded(key_left[prefix_size], false); + hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], false); else if (right_bounded) - hyperrectangle[prefix_size] = Range::createRightBounded(key_right[prefix_size], false); + hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], false); for (size_t i = prefix_size + 1; i < key_size; ++i) hyperrectangle[i] = Range(); @@ -1741,8 +1796,8 @@ static BoolMask forAnyHyperrectangle( if (left_bounded) { - hyperrectangle[prefix_size] = Range(key_left[prefix_size]); - result = result | forAnyHyperrectangle(key_size, key_left, key_right, true, false, hyperrectangle, prefix_size + 1, initial_mask, callback); + hyperrectangle[prefix_size] = Range(left_keys[prefix_size]); + result = result | forAnyHyperrectangle(key_size, left_keys, right_keys, true, false, hyperrectangle, prefix_size + 1, initial_mask, callback); if (result.isComplete()) return result; } @@ -1751,8 +1806,8 @@ static BoolMask forAnyHyperrectangle( if (right_bounded) { - hyperrectangle[prefix_size] = Range(key_right[prefix_size]); - result = result | forAnyHyperrectangle(key_size, key_left, key_right, false, true, hyperrectangle, prefix_size + 1, initial_mask, callback); + hyperrectangle[prefix_size] = Range(right_keys[prefix_size]); + result = result | forAnyHyperrectangle(key_size, left_keys, right_keys, false, true, hyperrectangle, prefix_size + 1, initial_mask, callback); if (result.isComplete()) return result; } @@ -1763,37 +1818,31 @@ static BoolMask forAnyHyperrectangle( BoolMask KeyCondition::checkInRange( size_t used_key_size, - const FieldRef * left_key, - const FieldRef * right_key, + const FieldRef * left_keys, + const FieldRef * right_keys, const DataTypes & data_types, - bool right_bounded, BoolMask initial_mask) const { std::vector key_ranges(used_key_size, Range()); -/* std::cerr << "Checking for: ["; - for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_key[i]); - std::cerr << " ... "; + // std::cerr << "Checking for: ["; + // for (size_t i = 0; i != used_key_size; ++i) + // std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), left_keys[i]); + // std::cerr << " ... "; - if (right_bounded) - { - for (size_t i = 0; i != used_key_size; ++i) - std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_key[i]); - std::cerr << "]\n"; - } - else - std::cerr << "+inf)\n";*/ + // for (size_t i = 0; i != used_key_size; ++i) + // std::cerr << (i != 0 ? ", " : "") << applyVisitor(FieldVisitorToString(), right_keys[i]); + // std::cerr << "]\n"; - return forAnyHyperrectangle(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, initial_mask, + return forAnyHyperrectangle(used_key_size, left_keys, right_keys, true, true, key_ranges, 0, initial_mask, [&] (const std::vector & key_ranges_hyperrectangle) { auto res = checkInHyperrectangle(key_ranges_hyperrectangle, data_types); -/* std::cerr << "Hyperrectangle: "; - for (size_t i = 0, size = key_ranges.size(); i != size; ++i) - std::cerr << (i != 0 ? " x " : "") << key_ranges[i].toString(); - std::cerr << ": " << res.can_be_true << "\n";*/ + // std::cerr << "Hyperrectangle: "; + // for (size_t i = 0, size = key_ranges.size(); i != size; ++i) + // std::cerr << (i != 0 ? " x " : "") << key_ranges[i].toString(); + // std::cerr << ": " << res.can_be_true << "\n"; return res; }); @@ -1821,6 +1870,8 @@ std::optional KeyCondition::applyMonotonicFunctionsChainToRange( /// If we apply function to open interval, we can get empty intervals in result. /// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001'). /// To avoid this we make range left and right included. + /// Any function that treats NULL specially is not monotonic. + /// Thus we can safely use isNull() as an -Inf/+Inf indicator here. if (!key_range.left.isNull()) { key_range.left = applyFunction(func, current_type, key_range.left); @@ -1836,7 +1887,7 @@ std::optional KeyCondition::applyMonotonicFunctionsChainToRange( current_type = func->getResultType(); if (!monotonicity.is_positive) - key_range.swapLeftAndRight(); + key_range.invert(); } return key_range; } @@ -1961,6 +2012,17 @@ BoolMask KeyCondition::checkInHyperrectangle( if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE) rpn_stack.back() = !rpn_stack.back(); } + else if ( + element.function == RPNElement::FUNCTION_IS_NULL + || element.function == RPNElement::FUNCTION_IS_NOT_NULL) + { + const Range * key_range = &hyperrectangle[element.key_column]; + + /// No need to apply monotonic functions as nulls are kept. + bool intersects = element.range.intersectsRange(*key_range); + bool contains = element.range.containsRange(*key_range); + rpn_stack.emplace_back(intersects, !contains); + } else if ( element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET) @@ -2015,43 +2077,13 @@ BoolMask KeyCondition::checkInHyperrectangle( } -BoolMask KeyCondition::checkInRange( - size_t used_key_size, - const FieldRef * left_key, - const FieldRef * right_key, - const DataTypes & data_types, - BoolMask initial_mask) const -{ - return checkInRange(used_key_size, left_key, right_key, data_types, true, initial_mask); -} - - bool KeyCondition::mayBeTrueInRange( size_t used_key_size, - const FieldRef * left_key, - const FieldRef * right_key, + const FieldRef * left_keys, + const FieldRef * right_keys, const DataTypes & data_types) const { - return checkInRange(used_key_size, left_key, right_key, data_types, true, BoolMask::consider_only_can_be_true).can_be_true; -} - - -BoolMask KeyCondition::checkAfter( - size_t used_key_size, - const FieldRef * left_key, - const DataTypes & data_types, - BoolMask initial_mask) const -{ - return checkInRange(used_key_size, left_key, nullptr, data_types, false, initial_mask); -} - - -bool KeyCondition::mayBeTrueAfter( - size_t used_key_size, - const FieldRef * left_key, - const DataTypes & data_types) const -{ - return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true; + return checkInRange(used_key_size, left_keys, right_keys, data_types, BoolMask::consider_only_can_be_true).can_be_true; } String KeyCondition::RPNElement::toString() const { return toString("column " + std::to_string(key_column), false); } @@ -2121,6 +2153,15 @@ String KeyCondition::RPNElement::toString(const std::string_view & column_name, buf << ")"; return buf.str(); } + case FUNCTION_IS_NULL: + case FUNCTION_IS_NOT_NULL: + { + buf << "("; + print_wrapped_column(buf); + buf << (function == FUNCTION_IS_NULL ? " isNull" : " isNotNull"); + buf << ")"; + return buf.str(); + } case ALWAYS_FALSE: return "false"; case ALWAYS_TRUE: @@ -2162,6 +2203,8 @@ bool KeyCondition::unknownOrAlwaysTrue(bool unknown_any) const || element.function == RPNElement::FUNCTION_IN_RANGE || element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET + || element.function == RPNElement::FUNCTION_IS_NULL + || element.function == RPNElement::FUNCTION_IS_NOT_NULL || element.function == RPNElement::ALWAYS_FALSE) { rpn_stack.push_back(false); @@ -2205,6 +2248,8 @@ size_t KeyCondition::getMaxKeyColumn() const { if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE || element.function == RPNElement::FUNCTION_IN_RANGE + || element.function == RPNElement::FUNCTION_IS_NULL + || element.function == RPNElement::FUNCTION_IS_NOT_NULL || element.function == RPNElement::FUNCTION_IN_SET || element.function == RPNElement::FUNCTION_NOT_IN_SET) { diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index c957c65fc40..edae921bfda 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -55,25 +55,24 @@ private: static bool less(const Field & lhs, const Field & rhs); public: - FieldRef left; /// the left border, if any - FieldRef right; /// the right border, if any - bool left_bounded = false; /// bounded at the left - bool right_bounded = false; /// bounded at the right - bool left_included = false; /// includes the left border, if any - bool right_included = false; /// includes the right border, if any + FieldRef left = NegativeInfinity{}; /// the left border + FieldRef right = PositiveInfinity{}; /// the right border + bool left_included = false; /// includes the left border + bool right_included = false; /// includes the right border - /// The whole unversum. + /// The whole universe (not null). Range() {} /// One point. Range(const FieldRef & point) - : left(point), right(point), left_bounded(true), right_bounded(true), left_included(true), right_included(true) {} + : left(point), right(point), left_included(true), right_included(true) {} /// A bounded two-sided range. Range(const FieldRef & left_, bool left_included_, const FieldRef & right_, bool right_included_) - : left(left_), right(right_), - left_bounded(true), right_bounded(true), - left_included(left_included_), right_included(right_included_) + : left(left_) + , right(right_) + , left_included(left_included_) + , right_included(right_included_) { shrinkToIncludedIfPossible(); } @@ -82,9 +81,11 @@ public: { Range r; r.right = right_point; - r.right_bounded = true; r.right_included = right_included; r.shrinkToIncludedIfPossible(); + // Special case for [-Inf, -Inf] + if (r.right.isNegativeInfinity() && right_included) + r.left_included = true; return r; } @@ -92,9 +93,11 @@ public: { Range r; r.left = left_point; - r.left_bounded = true; r.left_included = left_included; r.shrinkToIncludedIfPossible(); + // Special case for [+Inf, +Inf] + if (r.left.isPositiveInfinity() && left_included) + r.right_included = true; return r; } @@ -104,7 +107,7 @@ public: */ void shrinkToIncludedIfPossible() { - if (left.isExplicit() && left_bounded && !left_included) + if (left.isExplicit() && !left_included) { if (left.getType() == Field::Types::UInt64 && left.get() != std::numeric_limits::max()) { @@ -117,7 +120,7 @@ public: left_included = true; } } - if (right.isExplicit() && right_bounded && !right_included) + if (right.isExplicit() && !right_included) { if (right.getType() == Field::Types::UInt64 && right.get() != std::numeric_limits::min()) { @@ -132,12 +135,7 @@ public: } } - bool empty() const - { - return left_bounded && right_bounded - && (less(right, left) - || ((!left_included || !right_included) && !less(left, right))); - } + bool empty() const { return less(right, left) || ((!left_included || !right_included) && !less(left, right)); } /// x contained in the range bool contains(const FieldRef & x) const @@ -148,35 +146,23 @@ public: /// x is to the left bool rightThan(const FieldRef & x) const { - return (left_bounded - ? !(less(left, x) || (left_included && equals(x, left))) - : false); + return less(left, x) || (left_included && equals(x, left)); } /// x is to the right bool leftThan(const FieldRef & x) const { - return (right_bounded - ? !(less(x, right) || (right_included && equals(x, right))) - : false); + return less(x, right) || (right_included && equals(x, right)); } bool intersectsRange(const Range & r) const { /// r to the left of me. - if (r.right_bounded - && left_bounded - && (less(r.right, left) - || ((!left_included || !r.right_included) - && equals(r.right, left)))) + if (less(r.right, left) || ((!left_included || !r.right_included) && equals(r.right, left))) return false; /// r to the right of me. - if (r.left_bounded - && right_bounded - && (less(right, r.left) /// ...} {... - || ((!right_included || !r.left_included) /// ...) [... or ...] (... - && equals(r.left, right)))) + if (less(right, r.left) || ((!right_included || !r.left_included) && equals(r.left, right))) return false; return true; @@ -185,30 +171,23 @@ public: bool containsRange(const Range & r) const { /// r starts to the left of me. - if (left_bounded - && (!r.left_bounded - || less(r.left, left) - || (r.left_included - && !left_included - && equals(r.left, left)))) + if (less(r.left, left) || (r.left_included && !left_included && equals(r.left, left))) return false; /// r ends right of me. - if (right_bounded - && (!r.right_bounded - || less(right, r.right) - || (r.right_included - && !right_included - && equals(r.right, right)))) + if (less(right, r.right) || (r.right_included && !right_included && equals(r.right, right))) return false; return true; } - void swapLeftAndRight() + void invert() { std::swap(left, right); - std::swap(left_bounded, right_bounded); + if (left.isPositiveInfinity()) + left = NegativeInfinity{}; + if (right.isNegativeInfinity()) + right = PositiveInfinity{}; std::swap(left_included, right_included); } @@ -247,16 +226,8 @@ public: /// one of the resulting mask components (see BoolMask::consider_only_can_be_XXX). BoolMask checkInRange( size_t used_key_size, - const FieldRef * left_key, - const FieldRef* right_key, - const DataTypes & data_types, - BoolMask initial_mask = BoolMask(false, false)) const; - - /// Are the condition and its negation valid in a semi-infinite (not limited to the right) key range. - /// left_key must contain all the fields in the sort_descr in the appropriate order. - BoolMask checkAfter( - size_t used_key_size, - const FieldRef * left_key, + const FieldRef * left_keys, + const FieldRef * right_keys, const DataTypes & data_types, BoolMask initial_mask = BoolMask(false, false)) const; @@ -264,15 +235,8 @@ public: /// This is more efficient than checkInRange(...).can_be_true. bool mayBeTrueInRange( size_t used_key_size, - const FieldRef * left_key, - const FieldRef * right_key, - const DataTypes & data_types) const; - - /// Same as checkAfter, but calculate only may_be_true component of a result. - /// This is more efficient than checkAfter(...).can_be_true. - bool mayBeTrueAfter( - size_t used_key_size, - const FieldRef * left_key, + const FieldRef * left_keys, + const FieldRef * right_keys, const DataTypes & data_types) const; /// Checks that the index can not be used @@ -338,6 +302,8 @@ private: FUNCTION_NOT_IN_RANGE, FUNCTION_IN_SET, FUNCTION_NOT_IN_SET, + FUNCTION_IS_NULL, + FUNCTION_IS_NOT_NULL, FUNCTION_UNKNOWN, /// Can take any value. /// Operators of the logical expression. FUNCTION_NOT, diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 8da9b002e16..532dc48ec1e 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -1,12 +1,12 @@ #pragma once -#include #include #include #include #include + namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index b8698ae3e01..93594dd4357 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -35,16 +35,16 @@ bool injectRequiredColumnsRecursively( /// stages. checkStackSize(); - if (storage_columns.hasPhysicalOrSubcolumn(column_name)) + auto column_in_storage = storage_columns.tryGetColumnOrSubcolumn(ColumnsDescription::AllPhysical, column_name); + if (column_in_storage) { - auto column_in_storage = storage_columns.getPhysicalOrSubcolumn(column_name); - auto column_name_in_part = column_in_storage.getNameInStorage(); + auto column_name_in_part = column_in_storage->getNameInStorage(); if (alter_conversions.isColumnRenamed(column_name_in_part)) column_name_in_part = alter_conversions.getColumnOldName(column_name_in_part); auto column_in_part = NameAndTypePair( - column_name_in_part, column_in_storage.getSubcolumnName(), - column_in_storage.getTypeInStorage(), column_in_storage.type); + column_name_in_part, column_in_storage->getSubcolumnName(), + column_in_storage->getTypeInStorage(), column_in_storage->type); /// column has files and hence does not require evaluation if (part->hasColumnFiles(column_in_part)) @@ -93,7 +93,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada for (size_t i = 0; i < columns.size(); ++i) { /// We are going to fetch only physical columns - if (!storage_columns.hasPhysicalOrSubcolumn(columns[i])) + if (!storage_columns.hasColumnOrSubcolumn(ColumnsDescription::AllPhysical, columns[i])) throw Exception("There is no physical column or subcolumn " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); have_at_least_one_physical_column |= injectRequiredColumnsRecursively( @@ -310,9 +310,9 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (check_columns) { - const NamesAndTypesList & physical_columns = metadata_snapshot->getColumns().getAllWithSubcolumns(); - result.pre_columns = physical_columns.addTypes(pre_column_names); - result.columns = physical_columns.addTypes(column_names); + const auto & columns = metadata_snapshot->getColumns(); + result.pre_columns = columns.getByNames(ColumnsDescription::All, pre_column_names, true); + result.columns = columns.getByNames(ColumnsDescription::All, column_names, true); } else { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 073b05d35c2..cfe62fefb06 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -104,6 +104,7 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; extern const int CORRUPTED_DATA; extern const int BAD_TYPE_OF_FIELD; extern const int BAD_ARGUMENTS; @@ -125,12 +126,36 @@ namespace ErrorCodes extern const int TOO_MANY_SIMULTANEOUS_QUERIES; } - -static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key) +static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) { const auto & pk_sample_block = metadata.getPrimaryKey().sample_block; if (!pk_sample_block.has(metadata.sampling_key.column_names[0]) && !allow_sampling_expression_not_in_primary_key) throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); + + if (!check_sample_column_is_correct) + return; + + const auto & sampling_key = metadata.getSamplingKey(); + DataTypePtr sampling_column_type = sampling_key.data_types[0]; + + bool is_correct_sample_condition = false; + if (sampling_key.data_types.size() == 1) + { + if (typeid_cast(sampling_column_type.get())) + is_correct_sample_condition = true; + else if (typeid_cast(sampling_column_type.get())) + is_correct_sample_condition = true; + else if (typeid_cast(sampling_column_type.get())) + is_correct_sample_condition = true; + else if (typeid_cast(sampling_column_type.get())) + is_correct_sample_condition = true; + } + + if (!is_correct_sample_condition) + throw Exception( + "Invalid sampling column type in storage parameters: " + sampling_column_type->getName() + + ". Must be one unsigned integer type", + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); } MergeTreeData::MergeTreeData( @@ -200,7 +225,8 @@ MergeTreeData::MergeTreeData( if (metadata_.sampling_key.definition_ast != nullptr) { /// This is for backward compatibility. - checkSampleExpression(metadata_, attach || settings->compatibility_allow_sampling_expression_not_in_primary_key); + checkSampleExpression(metadata_, attach || settings->compatibility_allow_sampling_expression_not_in_primary_key, + settings->check_sample_column_is_correct); } checkTTLExpressions(metadata_, metadata_); @@ -270,19 +296,17 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name, bool allow_nullable_key) { - for (const auto & action : expr.getActions()) - { - if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); + if (expr.hasArrayJoin()) + throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); - if (action.node->type == ActionsDAG::ActionType::FUNCTION) - { - IFunctionBase & func = *action.node->function_base; - if (!func.isDeterministic()) - throw Exception(key_name + " key cannot contain non-deterministic functions, " - "but contains function " + func.getName(), - ErrorCodes::BAD_ARGUMENTS); - } + try + { + expr.assertDeterministic(); + } + catch (Exception & e) + { + e.addMessage(fmt::format("for {} key", key_name)); + throw; } for (const ColumnWithTypeAndName & element : sample_block) @@ -1673,7 +1697,8 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context "ALTER MODIFY SAMPLE BY is not supported for default-partitioned tables created with the old syntax", ErrorCodes::BAD_ARGUMENTS); - checkSampleExpression(new_metadata, getSettings()->compatibility_allow_sampling_expression_not_in_primary_key); + checkSampleExpression(new_metadata, getSettings()->compatibility_allow_sampling_expression_not_in_primary_key, + getSettings()->check_sample_column_is_correct); } if (command.type == AlterCommand::ADD_INDEX && !is_custom_partitioned) { @@ -2748,19 +2773,17 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) if (active_part_it == data_parts_by_info.end()) throw Exception("Cannot swap part '" + part_copy->name + "', no such active part.", ErrorCodes::NO_SUCH_DATA_PART); - /// We do not check allow_s3_zero_copy_replication here because data may be shared - /// when allow_s3_zero_copy_replication turned on and off again + /// We do not check allow_remote_fs_zero_copy_replication here because data may be shared + /// when allow_remote_fs_zero_copy_replication turned on and off again original_active_part->force_keep_shared_data = false; - if (original_active_part->volume->getDisk()->getType() == DiskType::Type::S3) + if (original_active_part->volume->getDisk()->supportZeroCopyReplication() && + part_copy->volume->getDisk()->supportZeroCopyReplication() && + original_active_part->getUniqueId() == part_copy->getUniqueId()) { - if (part_copy->volume->getDisk()->getType() == DiskType::Type::S3 - && original_active_part->getUniqueId() == part_copy->getUniqueId()) - { - /// May be when several volumes use the same S3 storage - original_active_part->force_keep_shared_data = true; - } + /// May be when several volumes use the same S3/HDFS storage + original_active_part->force_keep_shared_data = true; } modifyPartState(original_active_part, DataPartState::DeleteOnDestroy); @@ -3831,16 +3854,20 @@ bool MergeTreeData::mayBenefitFromIndexForIn( for (const auto & index : metadata_snapshot->getSecondaryIndices()) if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item)) return true; - if (metadata_snapshot->selected_projection - && metadata_snapshot->selected_projection->isPrimaryKeyColumnPossiblyWrappedInFunctions(item)) - return true; + for (const auto & projection : metadata_snapshot->getProjections()) + { + if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(item)) + return true; + } } /// The tuple itself may be part of the primary key, so check that as a last resort. if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot)) return true; - if (metadata_snapshot->selected_projection - && metadata_snapshot->selected_projection->isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand)) - return true; + for (const auto & projection : metadata_snapshot->getProjections()) + { + if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand)) + return true; + } return false; } else @@ -3849,10 +3876,11 @@ bool MergeTreeData::mayBenefitFromIndexForIn( if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand)) return true; - if (metadata_snapshot->selected_projection - && metadata_snapshot->selected_projection->isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand)) - return true; - + for (const auto & projection : metadata_snapshot->getProjections()) + { + if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand)) + return true; + } return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot); } } @@ -3892,7 +3920,7 @@ static void selectBestProjection( candidate.required_columns, metadata_snapshot, candidate.desc->metadata, - query_info, // TODO syntax_analysis_result set in index + query_info, query_context, settings.max_threads, max_added_blocks); @@ -3910,7 +3938,7 @@ static void selectBestProjection( required_columns, metadata_snapshot, metadata_snapshot, - query_info, // TODO syntax_analysis_result set in index + query_info, query_context, settings.max_threads, max_added_blocks); @@ -3929,7 +3957,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info) const { const auto & settings = query_context->getSettingsRef(); - if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections) + if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return false; const auto & query_ptr = query_info.query; @@ -4168,7 +4196,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( analysis_result.required_columns, metadata_snapshot, metadata_snapshot, - query_info, // TODO syntax_analysis_result set in index + query_info, query_context, settings.max_threads, max_added_blocks); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 55739a4d009..02d1f5e264e 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -813,11 +813,11 @@ public: bool scheduleDataMovingJob(IBackgroundJobExecutor & executor); bool areBackgroundMovesNeeded() const; - /// Lock part in zookeeper for use common S3 data in several nodes + /// Lock part in zookeeper for shared data in several nodes /// Overridden in StorageReplicatedMergeTree virtual void lockSharedData(const IMergeTreeDataPart &) const {} - /// Unlock common S3 data part in zookeeper + /// Unlock shared data part in zookeeper /// Overridden in StorageReplicatedMergeTree virtual bool unlockSharedData(const IMergeTreeDataPart &) const { return true; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 2666ba1518f..8fccfbb1f90 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -395,10 +395,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, auto disk = data_part->volume->getDisk(); String escaped_name = escapeForFileName(name); - String mrk_path = fullPath(disk, part_path + escaped_name + marks_file_extension); - String bin_path = fullPath(disk, part_path + escaped_name + DATA_FILE_EXTENSION); - DB::ReadBufferFromFile mrk_in(mrk_path); - DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0, 0, nullptr); + String mrk_path = part_path + escaped_name + marks_file_extension; + String bin_path = part_path + escaped_name + DATA_FILE_EXTENSION; + auto mrk_in = disk->readFile(mrk_path); + DB::CompressedReadBufferFromFile bin_in(disk->readFile(bin_path)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; @@ -407,15 +407,15 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, size_t mark_num; const auto & serialization = serializations[name]; - for (mark_num = 0; !mrk_in.eof(); ++mark_num) + for (mark_num = 0; !mrk_in->eof(); ++mark_num) { if (mark_num > index_granularity.getMarksCount()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); - DB::readBinary(offset_in_compressed_file, mrk_in); - DB::readBinary(offset_in_decompressed_block, mrk_in); + DB::readBinary(offset_in_compressed_file, *mrk_in); + DB::readBinary(offset_in_decompressed_block, *mrk_in); if (settings.can_use_adaptive_granularity) - DB::readBinary(index_granularity_rows, mrk_in); + DB::readBinary(index_granularity_rows, *mrk_in); else index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; @@ -424,7 +424,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, if (index_granularity_rows != 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "We ran out of binary data but still have non empty mark #{} with rows number {}", mark_num, index_granularity_rows); - if (!mrk_in.eof()) + if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark #{} must be last, but we still have some to read", mark_num); break; @@ -486,7 +486,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, } } - if (!mrk_in.eof()) + if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Still have something in marks stream, last mark #{} index granularity size {}, last rows {}", mark_num, index_granularity.getMarksCount(), index_granularity_rows); if (!bin_in.eof()) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 0a05eeb966e..0b5351dcf01 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -178,7 +178,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( Pipe projection_pipe; Pipe ordinary_pipe; - const auto & given_select = query_info.query->as(); if (!projection_parts.empty()) { LOG_DEBUG(log, "projection required columns: {}", fmt::join(query_info.projection->required_columns, ", ")); @@ -226,22 +225,28 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( if (!normal_parts.empty()) { auto storage_from_base_parts_of_projection = StorageFromMergeTreeDataPart::create(std::move(normal_parts)); - auto ast = query_info.projection->desc->query_ast->clone(); - auto & select = ast->as(); - if (given_select.where()) - select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.where()->clone()); - if (given_select.prewhere()) - select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.prewhere()->clone()); - - // After overriding the group by clause, we finish the possible aggregations directly - if (processed_stage >= QueryProcessingStage::Enum::WithMergeableState && given_select.groupBy()) - select.setExpression(ASTSelectQuery::Expression::GROUP_BY, given_select.groupBy()->clone()); auto interpreter = InterpreterSelectQuery( - ast, + query_info.query, context, storage_from_base_parts_of_projection, nullptr, - SelectQueryOptions{processed_stage}.ignoreAggregation().ignoreProjections()); + SelectQueryOptions{processed_stage}.projectionQuery()); + + QueryPlan ordinary_query_plan; + interpreter.buildQueryPlan(ordinary_query_plan); + + const auto & expressions = interpreter.getAnalysisResult(); + if (processed_stage == QueryProcessingStage::Enum::FetchColumns && expressions.before_where) + { + auto where_step = std::make_unique( + ordinary_query_plan.getCurrentDataStream(), + expressions.before_where, + expressions.where_column_name, + expressions.remove_where_filter); + where_step->setStepDescription("WHERE"); + ordinary_query_plan.addStep(std::move(where_step)); + } + ordinary_pipe = QueryPipeline::getPipe(interpreter.execute().pipeline); } @@ -757,7 +762,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Poco::Logger * log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, - bool use_skip_indexes) + bool use_skip_indexes, + bool check_limits) { RangesInDataParts parts_with_ranges(parts.size()); const Settings & settings = context->getSettingsRef(); @@ -885,7 +891,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd if (!ranges.ranges.empty()) { - if (limits.max_rows || leaf_limits.max_rows) + if (check_limits && (limits.max_rows || leaf_limits.max_rows)) { /// Fail fast if estimated number of rows to read exceeds the limit auto current_rows_estimate = ranges.getRowsCount(); @@ -1150,7 +1156,8 @@ size_t MergeTreeDataSelectExecutor::estimateNumMarksToRead( log, num_streams, index_stats, - false); + true /* use_skip_indexes */, + false /* check_limits */); return index_stats.back().num_granules_after; } @@ -1290,6 +1297,9 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field) { field = {index_columns.get(), row, column}; + // NULL_LAST + if (field.isNull()) + field = PositiveInfinity{}; }; } else @@ -1297,6 +1307,9 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( create_field_ref = [&index](size_t row, size_t column, FieldRef & field) { index[column]->get(row, field); + // NULL_LAST + if (field.isNull()) + field = PositiveInfinity{}; }; } @@ -1309,21 +1322,22 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( if (range.end == marks_count && !has_final_mark) { for (size_t i = 0; i < used_key_size; ++i) + { create_field_ref(range.begin, i, index_left[i]); - - return key_condition.mayBeTrueAfter( - used_key_size, index_left.data(), primary_key.data_types); + index_right[i] = PositiveInfinity{}; + } } - - if (has_final_mark && range.end == marks_count) - range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition. - - for (size_t i = 0; i < used_key_size; ++i) + else { - create_field_ref(range.begin, i, index_left[i]); - create_field_ref(range.end, i, index_right[i]); - } + if (has_final_mark && range.end == marks_count) + range.end -= 1; /// Remove final empty mark. It's useful only for primary key condition. + for (size_t i = 0; i < used_key_size; ++i) + { + create_field_ref(range.begin, i, index_left[i]); + create_field_ref(range.end, i, index_right[i]); + } + } return key_condition.mayBeTrueInRange( used_key_size, index_left.data(), index_right.data(), primary_key.data_types); }; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index bd2a79f0aee..de5ca1f0138 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -174,6 +174,7 @@ public: /// Filter parts using primary key and secondary indexes. /// For every part, select mark ranges to read. + /// If 'check_limits = true' it will throw exception if the amount of data exceed the limits from settings. static RangesInDataParts filterPartsByPrimaryKeyAndSkipIndexes( MergeTreeData::DataPartsVector && parts, StorageMetadataPtr metadata_snapshot, @@ -184,7 +185,8 @@ public: Poco::Logger * log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, - bool use_skip_indexes); + bool use_skip_indexes, + bool check_limits); /// Create expression for sampling. /// Also, calculate _sample_factor if needed. diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 099d561cf80..e29a6198252 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -38,22 +39,8 @@ void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const { const DataTypePtr & type = index_sample_block.getByPosition(i).type; auto serialization = type->getDefaultSerialization(); - - if (!type->isNullable()) - { - serialization->serializeBinary(hyperrectangle[i].left, ostr); - serialization->serializeBinary(hyperrectangle[i].right, ostr); - } - else - { - bool is_null = hyperrectangle[i].left.isNull() || hyperrectangle[i].right.isNull(); // one is enough - writeBinary(is_null, ostr); - if (!is_null) - { - serialization->serializeBinary(hyperrectangle[i].left, ostr); - serialization->serializeBinary(hyperrectangle[i].right, ostr); - } - } + serialization->serializeBinary(hyperrectangle[i].left, ostr); + serialization->serializeBinary(hyperrectangle[i].right, ostr); } } @@ -63,32 +50,18 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr) Field min_val; Field max_val; - for (size_t i = 0; i < index_sample_block.columns(); ++i) { const DataTypePtr & type = index_sample_block.getByPosition(i).type; auto serialization = type->getDefaultSerialization(); + serialization->deserializeBinary(min_val, istr); + serialization->deserializeBinary(max_val, istr); - if (!type->isNullable()) - { - serialization->deserializeBinary(min_val, istr); - serialization->deserializeBinary(max_val, istr); - } - else - { - bool is_null; - readBinary(is_null, istr); - if (!is_null) - { - serialization->deserializeBinary(min_val, istr); - serialization->deserializeBinary(max_val, istr); - } - else - { - min_val = Null(); - max_val = Null(); - } - } + // NULL_LAST + if (min_val.isNull()) + min_val = PositiveInfinity(); + if (max_val.isNull()) + max_val = PositiveInfinity(); hyperrectangle.emplace_back(min_val, true, max_val, true); } } @@ -117,8 +90,11 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s for (size_t i = 0; i < index_sample_block.columns(); ++i) { auto index_column_name = index_sample_block.getByPosition(i).name; - const auto & column = block.getByName(index_column_name).column; - column->cut(*pos, rows_read)->getExtremes(field_min, field_max); + const auto & column = block.getByName(index_column_name).column->cut(*pos, rows_read); + if (const auto * column_nullable = typeid_cast(column.get())) + column_nullable->getExtremesNullLast(field_min, field_max); + else + column->getExtremes(field_min, field_max); if (hyperrectangle.size() <= i) { @@ -126,8 +102,10 @@ void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, s } else { - hyperrectangle[i].left = std::min(hyperrectangle[i].left, field_min); - hyperrectangle[i].right = std::max(hyperrectangle[i].right, field_max); + hyperrectangle[i].left + = applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].left, field_min) ? hyperrectangle[i].left : field_min; + hyperrectangle[i].right + = applyVisitor(FieldVisitorAccurateLess(), hyperrectangle[i].right, field_max) ? field_max : hyperrectangle[i].right; } } @@ -156,9 +134,6 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr if (!granule) throw Exception( "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); - for (const auto & range : granule->hyperrectangle) - if (range.left.isNull() || range.right.isNull()) - return true; return condition.checkInHyperrectangle(granule->hyperrectangle, index_data_types).can_be_true; } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 8c027eb2089..0d457971dc6 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -43,6 +43,16 @@ namespace UInt8 type = Field::Types::Null; hash.update(type); } + void operator() (const NegativeInfinity &) const + { + UInt8 type = Field::Types::NegativeInfinity; + hash.update(type); + } + void operator() (const PositiveInfinity &) const + { + UInt8 type = Field::Types::PositiveInfinity; + hash.update(type); + } void operator() (const UInt64 & x) const { UInt8 type = Field::Types::UInt64; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 5b77ac9ec4a..15e7ed4c1d0 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -195,16 +195,14 @@ MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEnt throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); auto settings = data->getSettings(); - auto part = moving_part.part; - LOG_TRACE(log, "Cloning part {}", part->name); - auto disk = moving_part.reserved_space->getDisk(); + LOG_DEBUG(log, "Cloning part {} from {} to {}", part->name, part->volume->getDisk()->getName(), disk->getName()); + const String directory_to_move = "moving"; - if (settings->allow_s3_zero_copy_replication) + if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) { - /// Try to fetch part from S3 without copy and fallback to default copy - /// if it's not possible + /// Try zero-copy replication and fallback to default copy if it's not possible moving_part.part->assertOnDisk(); String path_to_clone = fs::path(data->getRelativeDataPath()) / directory_to_move / ""; String relative_path = part->relative_path; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index e8ea56b6531..f5ae5162676 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -231,6 +231,19 @@ std::vector MergeTreeReadPool::fillPerPartInfo( auto [required_columns, required_pre_columns, should_reorder] = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns); + if (predict_block_size_bytes) + { + const auto & required_column_names = required_columns.getNames(); + const auto & required_pre_column_names = required_pre_columns.getNames(); + NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + + per_part_size_predictor.emplace_back(std::make_unique( + part.data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block)); + } + else + per_part_size_predictor.emplace_back(nullptr); + /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & required_column_names = required_columns.getNames(); per_part_column_name_set.emplace_back(required_column_names.begin(), required_column_names.end()); @@ -240,14 +253,6 @@ std::vector MergeTreeReadPool::fillPerPartInfo( per_part_should_reorder.push_back(should_reorder); parts_with_idx.push_back({ part.data_part, part.part_index_in_query }); - - if (predict_block_size_bytes) - { - per_part_size_predictor.emplace_back(std::make_unique( - part.data_part, column_names, sample_block)); - } - else - per_part_size_predictor.emplace_back(nullptr); } return per_part_sum_marks; diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 81833b76735..d546b2a95af 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -94,9 +94,17 @@ try MarkRanges mark_ranges_for_task = { all_mark_ranges.back() }; all_mark_ranges.pop_back(); - auto size_predictor = (preferred_block_size_bytes == 0) - ? nullptr - : std::make_unique(data_part, ordered_names, metadata_snapshot->getSampleBlock()); + std::unique_ptr size_predictor; + if (preferred_block_size_bytes) + { + const auto & required_column_names = task_columns.columns.getNames(); + const auto & required_pre_column_names = task_columns.pre_columns.getNames(); + NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + + size_predictor = std::make_unique( + data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock()); + } task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index b807c2d912c..4a6bd0c5577 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -1,11 +1,12 @@ #pragma once -#include + #include #include #include #include #include + namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index ce342a69fe0..1e4b61e13d9 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -72,9 +72,17 @@ try storage, metadata_snapshot, data_part, required_columns, prewhere_info, check_columns); - auto size_predictor = (preferred_block_size_bytes == 0) - ? nullptr - : std::make_unique(data_part, ordered_names, metadata_snapshot->getSampleBlock()); + std::unique_ptr size_predictor; + if (preferred_block_size_bytes) + { + const auto & required_column_names = task_columns.columns.getNames(); + const auto & required_pre_column_names = task_columns.pre_columns.getNames(); + NameSet complete_column_names(required_column_names.begin(), required_column_names.end()); + complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end()); + + size_predictor = std::make_unique( + data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock()); + } /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & column_names = task_columns.columns.getNames(); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index b63107b6dbf..05efd4bc342 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -1,11 +1,11 @@ #pragma once -#include #include #include #include #include #include + namespace DB { diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 2a3c7ed00a1..c854ca4e305 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -43,8 +43,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( NamesAndTypesList columns_for_reader; if (take_column_types_from_storage) { - const NamesAndTypesList & physical_columns = metadata_snapshot->getColumns().getAllPhysical(); - columns_for_reader = physical_columns.addTypes(columns_to_read); + columns_for_reader = metadata_snapshot->getColumns().getByNames(ColumnsDescription::AllPhysical, columns_to_read, false); } else { diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 888ca80e015..d018059c248 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -57,6 +57,7 @@ struct Settings; M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ + M(UInt64, merge_selecting_sleep_ms, 5000, "Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ @@ -74,7 +75,7 @@ struct Settings; M(Seconds, prefer_fetch_merged_part_time_threshold, 3600, "If time passed after replication log entry creation exceeds this threshold and sum size of parts is greater than \"prefer_fetch_merged_part_size_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ M(UInt64, prefer_fetch_merged_part_size_threshold, 10ULL * 1024 * 1024 * 1024, "If sum size of parts exceeds this threshold and time passed after replication log entry creation is greater than \"prefer_fetch_merged_part_time_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ M(Seconds, execute_merges_on_single_replica_time_threshold, 0, "When greater than zero only a single replica starts the merge immediately, others wait up to that amount of time to download the result instead of doing merges locally. If the chosen replica doesn't finish the merge during that amount of time, fallback to standard behavior happens.", 0) \ - M(Seconds, s3_execute_merges_on_single_replica_time_threshold, 3 * 60 * 60, "When greater than zero only a single replica starts the merge immediatelys when merged part on S3 storage and 'allow_s3_zero_copy_replication' is enabled.", 0) \ + M(Seconds, remote_fs_execute_merges_on_single_replica_time_threshold, 3 * 60 * 60, "When greater than zero only a single replica starts the merge immediatelys when merged part on shared storage and 'allow_remote_fs_zero_copy_replication' is enabled.", 0) \ M(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \ M(Bool, always_fetch_merged_part, 0, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \ M(UInt64, max_suspicious_broken_parts, 10, "Max broken parts, if more - deny automatic deletion.", 0) \ @@ -123,13 +124,14 @@ struct Settings; M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \ M(String, storage_policy, "default", "Name of storage disk policy", 0) \ M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ - M(Bool, allow_s3_zero_copy_replication, false, "Allow Zero-copy replication over S3", 0) \ + M(Bool, allow_remote_fs_zero_copy_replication, false, "Allow Zero-copy replication over remote fs", 0) \ M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \ M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \ M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \ M(UInt64, max_concurrent_queries, 0, "Max number of concurrently executed queries related to the MergeTree table (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ + M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ \ /** Experimental/work in progress feature. Unsafe for production. */ \ M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp index 65da6080e86..13e05681fd9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp @@ -56,9 +56,9 @@ bool ReplicatedMergeTreeMergeStrategyPicker::shouldMergeOnSingleReplica(const Re } -bool ReplicatedMergeTreeMergeStrategyPicker::shouldMergeOnSingleReplicaS3Shared(const ReplicatedMergeTreeLogEntryData & entry) const +bool ReplicatedMergeTreeMergeStrategyPicker::shouldMergeOnSingleReplicaShared(const ReplicatedMergeTreeLogEntryData & entry) const { - time_t threshold = s3_execute_merges_on_single_replica_time_threshold; + time_t threshold = remote_fs_execute_merges_on_single_replica_time_threshold; return ( threshold > 0 /// feature turned on && entry.type == ReplicatedMergeTreeLogEntry::MERGE_PARTS /// it is a merge log entry @@ -100,24 +100,25 @@ std::optional ReplicatedMergeTreeMergeStrategyPicker::pickReplicaToExecu void ReplicatedMergeTreeMergeStrategyPicker::refreshState() { - auto threshold = storage.getSettings()->execute_merges_on_single_replica_time_threshold.totalSeconds(); - auto threshold_s3 = 0; - if (storage.getSettings()->allow_s3_zero_copy_replication) - threshold_s3 = storage.getSettings()->s3_execute_merges_on_single_replica_time_threshold.totalSeconds(); + const auto settings = storage.getSettings(); + auto threshold = settings->execute_merges_on_single_replica_time_threshold.totalSeconds(); + auto threshold_init = 0; + if (settings->allow_remote_fs_zero_copy_replication) + threshold_init = settings->remote_fs_execute_merges_on_single_replica_time_threshold.totalSeconds(); if (threshold == 0) /// we can reset the settings w/o lock (it's atomic) execute_merges_on_single_replica_time_threshold = threshold; - if (threshold_s3 == 0) - s3_execute_merges_on_single_replica_time_threshold = threshold_s3; - if (threshold == 0 && threshold_s3 == 0) + if (threshold_init == 0) + remote_fs_execute_merges_on_single_replica_time_threshold = threshold_init; + if (threshold == 0 && threshold_init == 0) return; auto now = time(nullptr); /// the setting was already enabled, and last state refresh was done recently if (((threshold != 0 && execute_merges_on_single_replica_time_threshold != 0) - || (threshold_s3 != 0 && s3_execute_merges_on_single_replica_time_threshold != 0)) + || (threshold_init != 0 && remote_fs_execute_merges_on_single_replica_time_threshold != 0)) && now - last_refresh_time < REFRESH_STATE_MINIMUM_INTERVAL_SECONDS) return; @@ -146,15 +147,15 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState() LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use execute_merges_on_single_replica_time_threshold!"); /// we can reset the settings w/o lock (it's atomic) execute_merges_on_single_replica_time_threshold = 0; - s3_execute_merges_on_single_replica_time_threshold = 0; + remote_fs_execute_merges_on_single_replica_time_threshold = 0; return; } std::lock_guard lock(mutex); if (threshold != 0) /// Zeros already reset execute_merges_on_single_replica_time_threshold = threshold; - if (threshold_s3 != 0) - s3_execute_merges_on_single_replica_time_threshold = threshold_s3; + if (threshold_init != 0) + remote_fs_execute_merges_on_single_replica_time_threshold = threshold_init; last_refresh_time = now; current_replica_index = current_replica_index_tmp; active_replicas = active_replicas_tmp; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h index 8adf206676a..f6a19173f77 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h @@ -52,9 +52,9 @@ public: /// and we may need to do a fetch (or postpone) instead of merge bool shouldMergeOnSingleReplica(const ReplicatedMergeTreeLogEntryData & entry) const; - /// return true if s3_execute_merges_on_single_replica_time_threshold feature is active + /// return true if remote_fs_execute_merges_on_single_replica_time_threshold feature is active /// and we may need to do a fetch (or postpone) instead of merge - bool shouldMergeOnSingleReplicaS3Shared(const ReplicatedMergeTreeLogEntryData & entry) const; + bool shouldMergeOnSingleReplicaShared(const ReplicatedMergeTreeLogEntryData & entry) const; /// returns the replica name /// and it's not current replica should do the merge @@ -72,7 +72,7 @@ private: uint64_t getEntryHash(const ReplicatedMergeTreeLogEntryData & entry) const; std::atomic execute_merges_on_single_replica_time_threshold = 0; - std::atomic s3_execute_merges_on_single_replica_time_threshold = 0; + std::atomic remote_fs_execute_merges_on_single_replica_time_threshold = 0; std::atomic last_refresh_time = 0; std::mutex mutex; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index aaa76009d74..aa30a469062 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -7,8 +7,14 @@ #include #include #include +#include +namespace CurrentMetrics +{ + extern const Metric BackgroundPoolTask; +} + namespace DB { @@ -886,7 +892,6 @@ bool ReplicatedMergeTreeQueue::checkReplaceRangeCanBeRemoved(const MergeTreePart if (entry_ptr->replace_range_entry == current.replace_range_entry) /// same partition, don't want to drop ourselves return false; - if (!part_info.contains(MergeTreePartInfo::fromPartName(entry_ptr->replace_range_entry->drop_range_part_name, format_version))) return false; @@ -1140,16 +1145,18 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (!ignore_max_size && sum_parts_size_in_bytes > max_source_parts_size) { - const char * format_str = "Not executing log entry {} of type {} for part {}" - " because source parts size ({}) is greater than the current maximum ({})."; + size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundPoolTask].load(std::memory_order_relaxed); + size_t thread_pool_size = data.getContext()->getSettingsRef().background_pool_size; + size_t free_threads = thread_pool_size - busy_threads_in_pool; + size_t required_threads = data_settings->number_of_free_entries_in_pool_to_execute_mutation; + out_postpone_reason = fmt::format("Not executing log entry {} of type {} for part {}" + " because source parts size ({}) is greater than the current maximum ({})." + " {} free of {} threads, required {} free threads.", + entry.znode_name, entry.typeToString(), entry.new_part_name, + ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size), + free_threads, thread_pool_size, required_threads); - LOG_DEBUG(log, format_str, entry.znode_name, - entry.typeToString(), entry.new_part_name, - ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size)); - - out_postpone_reason = fmt::format(format_str, entry.znode_name, - entry.typeToString(), entry.new_part_name, - ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size)); + LOG_DEBUG(log, out_postpone_reason); return false; } diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 9cc2787697d..15beb94404b 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -47,6 +47,7 @@ public: QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); } + bool supportsPrewhere() const override { return true; } bool supportsIndexForIn() const override { return true; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 70251a940cc..e24e252bf01 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -256,6 +256,12 @@ NamesAndTypesList StorageMaterializedPostgreSQL::getVirtuals() const } +bool StorageMaterializedPostgreSQL::needRewriteQueryWithFinal(const Names & column_names) const +{ + return needRewriteQueryWithFinalForStorage(column_names, getNested()); +} + + Pipe StorageMaterializedPostgreSQL::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -327,6 +333,16 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d return make_decimal_expression("Decimal256"); } + if (which.isDateTime64()) + { + auto ast_expression = std::make_shared(); + + ast_expression->name = "DateTime64"; + ast_expression->arguments = std::make_shared(); + ast_expression->arguments->children.emplace_back(std::make_shared(UInt32(6))); + return ast_expression; + } + return std::make_shared(data_type->getName()); } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 5d18a0b16b7..becb4f6ba10 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -82,6 +82,8 @@ public: NamesAndTypesList getVirtuals() const override; + bool needRewriteQueryWithFinal(const Names & column_names) const override; + Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -119,6 +121,8 @@ public: /// for current table, set has_nested = true. StoragePtr prepare(); + bool supportsFinal() const override { return true; } + protected: StorageMaterializedPostgreSQL( const StorageID & table_id_, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 369f4e9eca9..fbbb4a64689 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index fb96bb01936..36a40beca36 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -16,6 +16,14 @@ namespace DB { +bool needRewriteQueryWithFinalForStorage(const Names & column_names, const StoragePtr & storage) +{ + const StorageMetadataPtr & metadata = storage->getInMemoryMetadataPtr(); + Block header = metadata->getSampleBlock(); + ColumnWithTypeAndName & version_column = header.getByPosition(header.columns() - 1); + return std::find(column_names.begin(), column_names.end(), version_column.name) == column_names.end(); +} + Pipe readFinalFromNestedStorage( StoragePtr nested_storage, const Names & column_names, @@ -32,20 +40,6 @@ Pipe readFinalFromNestedStorage( Block nested_header = nested_metadata->getSampleBlock(); ColumnWithTypeAndName & sign_column = nested_header.getByPosition(nested_header.columns() - 2); - ColumnWithTypeAndName & version_column = nested_header.getByPosition(nested_header.columns() - 1); - - if (ASTSelectQuery * select_query = query_info.query->as(); select_query && !column_names_set.count(version_column.name)) - { - auto & tables_in_select_query = select_query->tables()->as(); - - if (!tables_in_select_query.children.empty()) - { - auto & tables_element = tables_in_select_query.children[0]->as(); - - if (tables_element.table_expression) - tables_element.table_expression->as().final = true; - } - } String filter_column_name; Names require_columns_name = column_names; @@ -59,9 +53,6 @@ Pipe readFinalFromNestedStorage( expressions->children.emplace_back(makeASTFunction("equals", sign_column_name, fetch_sign_value)); filter_column_name = expressions->children.back()->getColumnName(); - - for (const auto & column_name : column_names) - expressions->children.emplace_back(std::make_shared(column_name)); } Pipe pipe = nested_storage->read(require_columns_name, nested_metadata, query_info, context, processed_stage, max_block_size, num_streams); diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.h b/src/Storages/ReadFinalForExternalReplicaStorage.h index b54592159ef..f09a115919d 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.h +++ b/src/Storages/ReadFinalForExternalReplicaStorage.h @@ -13,6 +13,8 @@ namespace DB { +bool needRewriteQueryWithFinalForStorage(const Names & column_names, const StoragePtr & storage); + Pipe readFinalFromNestedStorage( StoragePtr nested_storage, const Names & column_names, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index fc308667db9..cf2c4d72f59 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -156,6 +156,7 @@ struct SelectQueryInfo /// If not null, it means we choose a projection to execute current query. std::optional projection; bool ignore_projections = false; + bool is_projection_query = false; }; } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index a433cd248c7..0b1956a7e17 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -32,6 +31,7 @@ #include #include + namespace ProfileEvents { extern const Event StorageBufferFlush; @@ -137,7 +137,7 @@ public: BufferSource(const Names & column_names_, StorageBuffer::Buffer & buffer_, const StorageBuffer & storage, const StorageMetadataPtr & metadata_snapshot) : SourceWithProgress( metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID())) - , column_names_and_types(metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(column_names_)) + , column_names_and_types(metadata_snapshot->getColumns().getByNames(ColumnsDescription::All, column_names_, true)) , buffer(buffer_) {} String getName() const override { return "Buffer"; } @@ -242,8 +242,8 @@ void StorageBuffer::read( { const auto & dest_columns = destination_metadata_snapshot->getColumns(); const auto & our_columns = metadata_snapshot->getColumns(); - return dest_columns.hasPhysicalOrSubcolumn(column_name) && - dest_columns.getPhysicalOrSubcolumn(column_name).type->equals(*our_columns.getPhysicalOrSubcolumn(column_name).type); + auto dest_columm = dest_columns.tryGetColumnOrSubcolumn(ColumnsDescription::AllPhysical, column_name); + return dest_columm && dest_columm->type->equals(*our_columns.getColumnOrSubcolumn(ColumnsDescription::AllPhysical, column_name).type); }); if (dst_has_same_structure) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index f4d6ec5c6f7..5d87fd25f93 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,26 +290,27 @@ void replaceConstantExpressions( /// - QueryProcessingStage::WithMergeableStateAfterAggregation /// - QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit /// - none (in this case regular WithMergeableState should be used) -std::optional getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, bool extremes, const Block & sharding_key_block) +std::optional getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, bool extremes, const Names & sharding_key_columns) { const auto & select = query_info.query->as(); - auto sharding_block_has = [&](const auto & exprs, size_t limit = SIZE_MAX) -> bool + auto sharding_block_has = [&](const auto & exprs) -> bool { - size_t i = 0; + std::unordered_set expr_columns; for (auto & expr : exprs) { - ++i; - if (i > limit) - break; - auto id = expr->template as(); if (!id) - return false; - /// TODO: if GROUP BY contains multiIf()/if() it should contain only columns from sharding_key - if (!sharding_key_block.has(id->name())) + continue; + expr_columns.emplace(id->name()); + } + + for (const auto & column : sharding_key_columns) + { + if (!expr_columns.contains(column)) return false; } + return true; }; @@ -343,7 +344,7 @@ std::optional getOptimizedQueryProcessingStage(const } else { - if (!sharding_block_has(group_by->children, 1)) + if (!sharding_block_has(group_by->children)) return {}; } @@ -547,8 +548,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( has_sharding_key && (settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic)) { - Block sharding_key_block = sharding_key_expr->getSampleBlock(); - auto stage = getOptimizedQueryProcessingStage(query_info, settings.extremes, sharding_key_block); + auto stage = getOptimizedQueryProcessingStage(query_info, settings.extremes, sharding_key_expr->getRequiredColumns()); if (stage) { LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage)); @@ -602,25 +602,25 @@ void StorageDistributed::read( return; } - const Scalars & scalars = local_context->hasQueryContext() ? local_context->getQueryContext()->getScalars() : Scalars{}; - bool has_virtual_shard_num_column = std::find(column_names.begin(), column_names.end(), "_shard_num") != column_names.end(); if (has_virtual_shard_num_column && !isVirtualColumn("_shard_num", metadata_snapshot)) has_virtual_shard_num_column = false; - ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr - ? ClusterProxy::SelectStreamFactory( - header, processed_stage, remote_table_function_ptr, scalars, has_virtual_shard_num_column, local_context->getExternalTables()) - : ClusterProxy::SelectStreamFactory( + StorageID main_table = StorageID::createEmpty(); + if (!remote_table_function_ptr) + main_table = StorageID{remote_database, remote_table}; + + ClusterProxy::SelectStreamFactory select_stream_factory = + ClusterProxy::SelectStreamFactory( header, processed_stage, - StorageID{remote_database, remote_table}, - scalars, - has_virtual_shard_num_column, - local_context->getExternalTables()); + has_virtual_shard_num_column); - ClusterProxy::executeQuery(query_plan, select_stream_factory, log, - modified_query_ast, local_context, query_info, + ClusterProxy::executeQuery( + query_plan, header, processed_stage, + main_table, remote_table_function_ptr, + select_stream_factory, log, modified_query_ast, + local_context, query_info, sharding_key_expr, sharding_key_column_name, query_info.cluster); @@ -1093,7 +1093,7 @@ ClusterPtr StorageDistributed::skipUnusedShards( size_t limit = local_context->getSettingsRef().optimize_skip_unused_shards_limit; if (!limit || limit > SSIZE_MAX) { - throw Exception("optimize_skip_unused_shards_limit out of range (0, {}]", ErrorCodes::ARGUMENT_OUT_OF_BOUND, SSIZE_MAX); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "optimize_skip_unused_shards_limit out of range (0, {}]", SSIZE_MAX); } // To interpret limit==0 as limit is reached ++limit; @@ -1292,8 +1292,11 @@ void registerStorageDistributed(StorageFactory & factory) String cluster_name = getClusterNameAndMakeLiteral(engine_args[0]); - engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); + const ContextPtr & context = args.getContext(); + const ContextPtr & local_context = args.getLocalContext(); + + engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], local_context); + engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], local_context); String remote_database = engine_args[1]->as().value.safeGet(); String remote_table = engine_args[2]->as().value.safeGet(); @@ -1304,7 +1307,7 @@ void registerStorageDistributed(StorageFactory & factory) /// Check that sharding_key exists in the table and has numeric type. if (sharding_key) { - auto sharding_expr = buildShardingKeyExpression(sharding_key, args.getContext(), args.columns.getAllPhysical(), true); + auto sharding_expr = buildShardingKeyExpression(sharding_key, context, args.columns.getAllPhysical(), true); const Block & block = sharding_expr->getSampleBlock(); if (block.columns() != 1) @@ -1335,6 +1338,16 @@ void registerStorageDistributed(StorageFactory & factory) "bytes_to_throw_insert cannot be less or equal to bytes_to_delay_insert (since it is handled first)"); } + /// Set default values from the distributed_directory_monitor_* global context settings. + if (!distributed_settings.monitor_batch_inserts.changed) + distributed_settings.monitor_batch_inserts = context->getSettingsRef().distributed_directory_monitor_batch_inserts; + if (!distributed_settings.monitor_split_batch_on_failure.changed) + distributed_settings.monitor_split_batch_on_failure = context->getSettingsRef().distributed_directory_monitor_split_batch_on_failure; + if (!distributed_settings.monitor_sleep_time_ms.changed) + distributed_settings.monitor_sleep_time_ms = Poco::Timespan(context->getSettingsRef().distributed_directory_monitor_sleep_time_ms); + if (!distributed_settings.monitor_max_sleep_time_ms.changed) + distributed_settings.monitor_max_sleep_time_ms = Poco::Timespan(context->getSettingsRef().distributed_directory_monitor_max_sleep_time_ms); + return StorageDistributed::create( args.table_id, args.columns, @@ -1343,7 +1356,7 @@ void registerStorageDistributed(StorageFactory & factory) remote_database, remote_table, cluster_name, - args.getContext(), + context, sharding_key, storage_policy, args.relative_data_path, diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index efd59255c9e..73c734500df 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include @@ -363,7 +362,7 @@ public: } else { - nested_buffer = std::make_unique(current_path); + nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); method = chooseCompressionMethod(current_path, storage->compression_method); } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 28574d6fdf1..dad83f64c70 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -320,23 +320,31 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns( { Block res; - auto all_columns = getColumns().getAllWithSubcolumns(); - std::unordered_map columns_map; - columns_map.reserve(all_columns.size()); +#if !defined(ARCADIA_BUILD) + google::dense_hash_map virtuals_map; +#else + google::sparsehash::dense_hash_map virtuals_map; +#endif - for (const auto & elem : all_columns) - columns_map.emplace(elem.name, elem.type); + virtuals_map.set_empty_key(StringRef()); /// Virtual columns must be appended after ordinary, because user can /// override them. for (const auto & column : virtuals) - columns_map.emplace(column.name, column.type); + virtuals_map.emplace(column.name, &column.type); for (const auto & name : column_names) { - auto it = columns_map.find(name); - if (it != columns_map.end()) - res.insert({it->second->createColumn(), it->second, it->first}); + auto column = getColumns().tryGetColumnOrSubcolumn(ColumnsDescription::All, name); + if (column) + { + res.insert({column->type->createColumn(), column->type, column->name}); + } + else if (auto it = virtuals_map.find(name); it != virtuals_map.end()) + { + const auto & type = *it->second; + res.insert({type->createColumn(), type, name}); + } else throw Exception( "Column " + backQuote(name) + " not found in table " + (storage_id.empty() ? "" : storage_id.getNameForLogs()), @@ -508,26 +516,31 @@ namespace void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const { - NamesAndTypesList available_columns = getColumns().getAllPhysicalWithSubcolumns(); - available_columns.insert(available_columns.end(), virtuals.begin(), virtuals.end()); - - const String list_of_columns = listOfColumns(available_columns); - if (column_names.empty()) - throw Exception("Empty list of columns queried. There are columns: " + list_of_columns, ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); - - const auto columns_map = getColumnsMap(available_columns); + { + auto list_of_columns = listOfColumns(getColumns().getAllPhysicalWithSubcolumns()); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, + "Empty list of columns queried. There are columns: {}", list_of_columns); + } + const auto virtuals_map = getColumnsMap(virtuals); auto unique_names = initUniqueStrings(); + for (const auto & name : column_names) { - if (columns_map.end() == columns_map.find(name)) - throw Exception( - "There is no column with name " + backQuote(name) + " in table " + storage_id.getNameForLogs() + ". There are columns: " + list_of_columns, - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + bool has_column = getColumns().hasColumnOrSubcolumn(ColumnsDescription::AllPhysical, name) || virtuals_map.count(name); + + if (!has_column) + { + auto list_of_columns = listOfColumns(getColumns().getAllPhysicalWithSubcolumns()); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column with name {} in table {}. There are columns: {}", + backQuote(name), storage_id.getNameForLogs(), list_of_columns); + } if (unique_names.end() != unique_names.find(name)) - throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); + throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE, "Column {} queried more than once", name); + unique_names.insert(name); } } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 861cb5866ee..d0d60f608d7 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -28,7 +28,6 @@ struct StorageInMemoryMetadata ConstraintsDescription constraints; /// Table projections. Currently supported for MergeTree only. ProjectionsDescription projections; - mutable const ProjectionDescription * selected_projection{}; /// PARTITION BY expression. Currently supported for MergeTree only. KeyDescription partition_key; /// PRIMARY KEY expression. If absent, than equal to order_by_ast. diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 63b440aff08..f25d356c722 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -3,7 +3,6 @@ #include -#include #include #include #include diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index c3061ce9c51..ce7993a1e3a 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 61fbbbc3086..d3eb4bff6d1 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -16,7 +16,6 @@ #include -#include #include #include @@ -28,6 +27,7 @@ #include #include +#include #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" @@ -659,7 +659,7 @@ Pipe StorageLog::read( auto lock_timeout = getLockTimeout(context); loadMarks(lock_timeout); - auto all_columns = metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(column_names); + auto all_columns = metadata_snapshot->getColumns().getByNames(ColumnsDescription::All, column_names, true); all_columns = Nested::convertToSubcolumns(all_columns); std::shared_lock lock(rwlock, lock_timeout); @@ -719,6 +719,34 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr contex } +IStorage::ColumnSizeByName StorageLog::getColumnSizes() const +{ + std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + ColumnSizeByName column_sizes; + FileChecker::Map file_sizes = file_checker.getFileSizes(); + + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) + { + ISerialization::StreamCallback stream_callback = [&, this] (const ISerialization::SubstreamPath & substream_path) + { + String stream_name = ISerialization::getFileNameForStream(column, substream_path); + ColumnSize & size = column_sizes[column.name]; + auto it = files.find(stream_name); + if (it != files.end()) + size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; + }; + + ISerialization::SubstreamPath substream_path; + auto serialization = column.type->getDefaultSerialization(); + serialization->enumerateStreams(stream_callback, substream_path); + } + + return column_sizes; +} + void registerStorageLog(StorageFactory & factory) { StorageFactory::StorageFeatures features{ diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 6fea00edefd..799bad26c7c 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -45,6 +45,7 @@ public: bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } bool supportsSubcolumns() const override { return true; } + ColumnSizeByName getColumnSizes() const override; protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), @@ -87,7 +88,7 @@ private: DiskPtr disk; String table_path; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Files files; diff --git a/src/Storages/StorageMaterializeMySQL.cpp b/src/Storages/StorageMaterializeMySQL.cpp index 5b371fe3fb8..220d77b104a 100644 --- a/src/Storages/StorageMaterializeMySQL.cpp +++ b/src/Storages/StorageMaterializeMySQL.cpp @@ -36,6 +36,11 @@ StorageMaterializeMySQL::StorageMaterializeMySQL(const StoragePtr & nested_stora setInMemoryMetadata(in_memory_metadata); } +bool StorageMaterializeMySQL::needRewriteQueryWithFinal(const Names & column_names) const +{ + return needRewriteQueryWithFinalForStorage(column_names, nested_storage); +} + Pipe StorageMaterializeMySQL::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -47,6 +52,7 @@ Pipe StorageMaterializeMySQL::read( { /// If the background synchronization thread has exception. rethrowSyncExceptionIfNeed(database); + return readFinalFromNestedStorage(nested_storage, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } diff --git a/src/Storages/StorageMaterializeMySQL.h b/src/Storages/StorageMaterializeMySQL.h index 45221ed5b76..b0b7a877630 100644 --- a/src/Storages/StorageMaterializeMySQL.h +++ b/src/Storages/StorageMaterializeMySQL.h @@ -24,6 +24,8 @@ public: StorageMaterializeMySQL(const StoragePtr & nested_storage_, const IDatabase * database_); + bool needRewriteQueryWithFinal(const Names & column_names) const override; + Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 76fa4b8e20b..477708e77b1 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 9e1ae24fc75..91b19ccc844 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -1,8 +1,6 @@ #include #include -#include - #include #include #include @@ -35,7 +33,7 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID())) - , column_names_and_types(metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(std::move(column_names_))) + , column_names_and_types(metadata_snapshot->getColumns().getByNames(ColumnsDescription::All, column_names_, true)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 2d5bbfc712d..43838b1d8c5 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -388,6 +388,13 @@ Pipe StorageMerge::createSources( return pipe; } + if (!modified_select.final() && storage->needRewriteQueryWithFinal(real_column_names)) + { + /// NOTE: It may not work correctly in some cases, because query was analyzed without final. + /// However, it's needed for MaterializeMySQL and it's unlikely that someone will use it with Merge tables. + modified_select.setFinal(); + } + auto storage_stage = storage->getQueryProcessingStage(modified_context, QueryProcessingStage::Complete, metadata_snapshot, modified_query_info); if (processed_stage <= storage_stage) @@ -676,14 +683,16 @@ void StorageMerge::convertingSourceStream( auto convert_actions_dag = ActionsDAG::makeConvertingActions(pipe.getHeader().getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); - auto actions = std::make_shared(convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); + auto actions = std::make_shared( + convert_actions_dag, + ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); + pipe.addSimpleTransform([&](const Block & stream_header) { return std::make_shared(stream_header, actions); }); } - auto where_expression = query->as()->where(); if (!where_expression) diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 211a626e8d4..1f28f4f19f9 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -234,6 +234,10 @@ public: else if (which.isFloat64()) nested_column = ColumnFloat64::create(); else if (which.isDate()) nested_column = ColumnUInt16::create(); else if (which.isDateTime()) nested_column = ColumnUInt32::create(); + else if (which.isDateTime64()) + { + nested_column = ColumnDecimal::create(0, 6); + } else if (which.isDecimal32()) { const auto & type = typeid_cast *>(nested.get()); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index beb987ae605..d44b86fe9bb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -155,7 +155,6 @@ namespace ActionLocks static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; -static const auto MERGE_SELECTING_SLEEP_MS = 5 * 1000; static const auto MUTATIONS_FINALIZING_SLEEP_MS = 1 * 1000; static const auto MUTATIONS_FINALIZING_IDLE_SLEEP_MS = 5 * 1000; @@ -607,11 +606,14 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() zookeeper->createIfNotExists(zookeeper_path + "/mutations", String()); zookeeper->createIfNotExists(replica_path + "/mutation_pointer", String()); - /// Nodes for zero-copy S3 replication - if (storage_settings.get()->allow_s3_zero_copy_replication) + /// Nodes for remote fs zero-copy replication + const auto settings = getSettings(); + if (settings->allow_remote_fs_zero_copy_replication) { zookeeper->createIfNotExists(zookeeper_path + "/zero_copy_s3", String()); zookeeper->createIfNotExists(zookeeper_path + "/zero_copy_s3/shared", String()); + zookeeper->createIfNotExists(zookeeper_path + "/zero_copy_hdfs", String()); + zookeeper->createIfNotExists(zookeeper_path + "/zero_copy_hdfs/shared", String()); } /// Part movement. @@ -1728,23 +1730,19 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) future_merged_part.updatePath(*this, reserved_space); future_merged_part.merge_type = entry.merge_type; - if (storage_settings_ptr->allow_s3_zero_copy_replication) + if (reserved_space->getDisk()->supportZeroCopyReplication() + && storage_settings_ptr->allow_remote_fs_zero_copy_replication + && merge_strategy_picker.shouldMergeOnSingleReplicaShared(entry)) { - if (auto disk = reserved_space->getDisk(); disk->getType() == DB::DiskType::Type::S3) - { - if (merge_strategy_picker.shouldMergeOnSingleReplicaS3Shared(entry)) - { - if (!replica_to_execute_merge_picked) - replica_to_execute_merge = merge_strategy_picker.pickReplicaToExecuteMerge(entry); + if (!replica_to_execute_merge_picked) + replica_to_execute_merge = merge_strategy_picker.pickReplicaToExecuteMerge(entry); - if (replica_to_execute_merge) - { - LOG_DEBUG(log, - "Prefer fetching part {} from replica {} due s3_execute_merges_on_single_replica_time_threshold", - entry.new_part_name, replica_to_execute_merge.value()); - return false; - } - } + if (replica_to_execute_merge) + { + LOG_DEBUG(log, + "Prefer fetching part {} from replica {} due to remote_fs_execute_merges_on_single_replica_time_threshold", + entry.new_part_name, replica_to_execute_merge.value()); + return false; } } @@ -2168,7 +2166,7 @@ bool StorageReplicatedMergeTree::executeFetchShared( { if (source_replica.empty()) { - LOG_INFO(log, "No active replica has part {} on S3.", new_part_name); + LOG_INFO(log, "No active replica has part {} on shared storage.", new_part_name); return false; } @@ -2783,6 +2781,16 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo } } + { + /// Check "is_lost" version after retrieving queue and parts. + /// If version has changed, then replica most likely has been dropped and parts set is inconsistent, + /// so throw exception and retry cloning. + Coordination::Stat is_lost_stat_new; + zookeeper->get(fs::path(source_path) / "is_lost", &is_lost_stat_new); + if (is_lost_stat_new.version != source_is_lost_stat.version) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Cannot clone {}, because it suddenly become lost", source_replica); + } + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove_from_zk); auto local_active_parts = getDataParts(); @@ -3347,7 +3355,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() if (create_result != CreateMergeEntryResult::Ok && create_result != CreateMergeEntryResult::LogUpdated) { - merge_selecting_task->scheduleAfter(MERGE_SELECTING_SLEEP_MS); + merge_selecting_task->scheduleAfter(storage_settings_ptr->merge_selecting_sleep_ms); } else { @@ -7201,10 +7209,9 @@ void StorageReplicatedMergeTree::lockSharedData(const IMergeTreeDataPart & part) if (!part.volume) return; DiskPtr disk = part.volume->getDisk(); - if (!disk) - return; - if (disk->getType() != DB::DiskType::Type::S3) + if (!disk || !disk->supportZeroCopyReplication()) return; + String zero_copy = fmt::format("zero_copy_{}", DiskType::toString(disk->getType())); zkutil::ZooKeeperPtr zookeeper = tryGetZooKeeper(); if (!zookeeper) @@ -7213,7 +7220,7 @@ void StorageReplicatedMergeTree::lockSharedData(const IMergeTreeDataPart & part) String id = part.getUniqueId(); boost::replace_all(id, "/", "_"); - String zookeeper_node = fs::path(zookeeper_path) / "zero_copy_s3" / "shared" / part.name / id / replica_name; + String zookeeper_node = fs::path(zookeeper_path) / zero_copy / "shared" / part.name / id / replica_name; LOG_TRACE(log, "Set zookeeper lock {}", zookeeper_node); @@ -7242,10 +7249,9 @@ bool StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & par if (!part.volume) return true; DiskPtr disk = part.volume->getDisk(); - if (!disk) - return true; - if (disk->getType() != DB::DiskType::Type::S3) + if (!disk || !disk->supportZeroCopyReplication()) return true; + String zero_copy = fmt::format("zero_copy_{}", DiskType::toString(disk->getType())); zkutil::ZooKeeperPtr zookeeper = tryGetZooKeeper(); if (!zookeeper) @@ -7254,7 +7260,7 @@ bool StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & par String id = part.getUniqueId(); boost::replace_all(id, "/", "_"); - String zookeeper_part_node = fs::path(zookeeper_path) / "zero_copy_s3" / "shared" / part.name; + String zookeeper_part_node = fs::path(zookeeper_path) / zero_copy / "shared" / part.name; String zookeeper_part_uniq_node = fs::path(zookeeper_part_node) / id; String zookeeper_node = fs::path(zookeeper_part_uniq_node) / replica_name; @@ -7289,16 +7295,14 @@ bool StorageReplicatedMergeTree::tryToFetchIfShared( const DiskPtr & disk, const String & path) { - const auto data_settings = getSettings(); - if (!data_settings->allow_s3_zero_copy_replication) + const auto settings = getSettings(); + auto disk_type = disk->getType(); + if (!(disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)) return false; - if (disk->getType() != DB::DiskType::Type::S3) - return false; + String replica = getSharedDataReplica(part, disk_type); - String replica = getSharedDataReplica(part); - - /// We can't fetch part when none replicas have this part on S3 + /// We can't fetch part when none replicas have this part on a same type remote disk if (replica.empty()) return false; @@ -7307,7 +7311,7 @@ bool StorageReplicatedMergeTree::tryToFetchIfShared( String StorageReplicatedMergeTree::getSharedDataReplica( - const IMergeTreeDataPart & part) const + const IMergeTreeDataPart & part, DiskType::Type disk_type) const { String best_replica; @@ -7315,7 +7319,8 @@ String StorageReplicatedMergeTree::getSharedDataReplica( if (!zookeeper) return best_replica; - String zookeeper_part_node = fs::path(zookeeper_path) / "zero_copy_s3" / "shared" / part.name; + String zero_copy = fmt::format("zero_copy_{}", DiskType::toString(disk_type)); + String zookeeper_part_node = fs::path(zookeeper_path) / zero_copy / "shared" / part.name; Strings ids; zookeeper->tryGetChildren(zookeeper_part_node, ids); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 28dd3c760a8..800f419cb76 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -225,10 +225,10 @@ public: /// Fetch part only when it stored on shared storage like S3 bool executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); - /// Lock part in zookeeper for use common S3 data in several nodes + /// Lock part in zookeeper for use shared data in several nodes void lockSharedData(const IMergeTreeDataPart & part) const override; - /// Unlock common S3 data part in zookeeper + /// Unlock shared data part in zookeeper /// Return true if data unlocked /// Return false if data is still used by another node bool unlockSharedData(const IMergeTreeDataPart & part) const override; @@ -236,8 +236,8 @@ public: /// Fetch part only if some replica has it on shared storage like S3 bool tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; - /// Get best replica having this partition on S3 - String getSharedDataReplica(const IMergeTreeDataPart & part) const; + /// Get best replica having this partition on a same type remote disk + String getSharedDataReplica(const IMergeTreeDataPart & part, DiskType::Type disk_type) const; inline String getReplicaName() const { return replica_name; } diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp new file mode 100644 index 00000000000..c900c7b7e09 --- /dev/null +++ b/src/Storages/StorageSQLite.cpp @@ -0,0 +1,174 @@ +#include "StorageSQLite.h" + +#if USE_SQLITE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SQLITE_ENGINE_ERROR; +} + +StorageSQLite::StorageSQLite( + const StorageID & table_id_, + SQLitePtr sqlite_db_, + const String & remote_table_name_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_) + : IStorage(table_id_) + , WithContext(context_->getGlobalContext()) + , remote_table_name(remote_table_name_) + , global_context(context_) + , sqlite_db(sqlite_db_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); +} + + +Pipe StorageSQLite::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context_, + QueryProcessingStage::Enum, + size_t max_block_size, + unsigned int) +{ + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); + + String query = transformQueryForExternalDatabase( + query_info, + metadata_snapshot->getColumns().getOrdinary(), + IdentifierQuotingStyle::DoubleQuotes, + "", + remote_table_name, + context_); + + Block sample_block; + for (const String & column_name : column_names) + { + auto column_data = metadata_snapshot->getColumns().getPhysical(column_name); + sample_block.insert({column_data.type, column_data.name}); + } + + return Pipe(std::make_shared( + std::make_shared(sqlite_db, query, sample_block, max_block_size))); +} + + +class SQLiteBlockOutputStream : public IBlockOutputStream +{ +public: + explicit SQLiteBlockOutputStream( + const StorageSQLite & storage_, + const StorageMetadataPtr & metadata_snapshot_, + StorageSQLite::SQLitePtr sqlite_db_, + const String & remote_table_name_) + : storage{storage_} + , metadata_snapshot(metadata_snapshot_) + , sqlite_db(sqlite_db_) + , remote_table_name(remote_table_name_) + { + } + + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } + + void write(const Block & block) override + { + WriteBufferFromOwnString sqlbuf; + + sqlbuf << "INSERT INTO "; + sqlbuf << doubleQuoteString(remote_table_name); + sqlbuf << " ("; + + for (auto it = block.begin(); it != block.end(); ++it) + { + if (it != block.begin()) + sqlbuf << ", "; + sqlbuf << quoteString(it->name); + } + + sqlbuf << ") VALUES "; + + auto writer = FormatFactory::instance().getOutputStream("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.getContext()); + writer->write(block); + + sqlbuf << ";"; + + char * err_message = nullptr; + int status = sqlite3_exec(sqlite_db.get(), sqlbuf.str().c_str(), nullptr, nullptr, &err_message); + + if (status != SQLITE_OK) + { + String err_msg(err_message); + sqlite3_free(err_message); + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, + "Failed to execute sqlite INSERT query. Status: {}. Message: {}", + status, err_msg); + } + } + +private: + const StorageSQLite & storage; + StorageMetadataPtr metadata_snapshot; + StorageSQLite::SQLitePtr sqlite_db; + String remote_table_name; +}; + + +BlockOutputStreamPtr StorageSQLite::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr) +{ + return std::make_shared(*this, metadata_snapshot, sqlite_db, remote_table_name); +} + + +void registerStorageSQLite(StorageFactory & factory) +{ + factory.registerStorage("SQLite", [](const StorageFactory::Arguments & args) -> StoragePtr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.size() != 2) + throw Exception("SQLite database requires 2 arguments: database path, table name", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); + + const auto database_path = engine_args[0]->as().value.safeGet(); + const auto table_name = engine_args[1]->as().value.safeGet(); + + auto sqlite_db = openSQLiteDB(database_path, args.getContext()); + + return StorageSQLite::create(args.table_id, sqlite_db, + table_name, args.columns, args.constraints, args.getContext()); + }, + { + .source_access_type = AccessType::SQLITE, + }); +} + +} + +#endif diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h new file mode 100644 index 00000000000..63b7a6fd415 --- /dev/null +++ b/src/Storages/StorageSQLite.h @@ -0,0 +1,53 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_SQLITE +#include +#include + +#include // Y_IGNORE + + +namespace DB +{ + +class StorageSQLite final : public shared_ptr_helper, public IStorage, public WithContext +{ +friend struct shared_ptr_helper; + +public: + using SQLitePtr = std::shared_ptr; + + StorageSQLite( + const StorageID & table_id_, + SQLitePtr sqlite_db_, + const String & remote_table_name_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_); + + std::string getName() const override { return "SQLite"; } + + Pipe read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; + +private: + String remote_table_name; + ContextPtr global_context; + SQLitePtr sqlite_db; +}; + +} + +#endif diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 36b10dfd2bb..5648c030adc 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 689b1307f4d..25000816d1f 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -22,7 +23,6 @@ #include -#include #include #include @@ -488,7 +488,7 @@ Pipe StorageTinyLog::read( { metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); - auto all_columns = metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(column_names); + auto all_columns = metadata_snapshot->getColumns().getByNames(ColumnsDescription::All, column_names, true); // When reading, we lock the entire storage, because we only have one file // per column and can't modify it concurrently. @@ -523,6 +523,34 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, ContextPtr co return file_checker.check(); } +IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const +{ + std::shared_lock lock(rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)); + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + + ColumnSizeByName column_sizes; + FileChecker::Map file_sizes = file_checker.getFileSizes(); + + for (const auto & column : getInMemoryMetadata().getColumns().getAllPhysical()) + { + ISerialization::StreamCallback stream_callback = [&, this] (const ISerialization::SubstreamPath & substream_path) + { + String stream_name = ISerialization::getFileNameForStream(column, substream_path); + ColumnSize & size = column_sizes[column.name]; + auto it = files.find(stream_name); + if (it != files.end()) + size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; + }; + + ISerialization::SubstreamPath substream_path; + auto serialization = column.type->getDefaultSerialization(); + serialization->enumerateStreams(stream_callback, substream_path); + } + + return column_sizes; +} + void StorageTinyLog::truncate( const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) { diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 71763a6403e..849b0731a47 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -45,6 +45,7 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override; + ColumnSizeByName getColumnSizes() const override; protected: StorageTinyLog( DiskPtr disk_, @@ -71,7 +72,7 @@ private: Files files; FileChecker file_checker; - std::shared_timed_mutex rwlock; + mutable std::shared_timed_mutex rwlock; Poco::Logger * log; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8f65147bb11..0058b58f537 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -98,7 +98,7 @@ protected: Names cols_required_for_sorting_key; Names cols_required_for_primary_key; Names cols_required_for_sampling; - MergeTreeData::ColumnSizeByName column_sizes; + IStorage::ColumnSizeByName column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 545f2c8be9a..3a88cc96639 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index 1aca7e45190..d90ca27cbc0 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -40,8 +40,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co Field min, max; bool read_only = false; - if (settings_constraints) - settings_constraints->get(setting_name, min, max, read_only); + settings_constraints->get(setting_name, min, max, read_only); /// These two columns can accept strings only. if (!min.isNull()) diff --git a/src/Storages/System/StorageSystemWarnings.cpp b/src/Storages/System/StorageSystemWarnings.cpp new file mode 100644 index 00000000000..76b35e9b555 --- /dev/null +++ b/src/Storages/System/StorageSystemWarnings.cpp @@ -0,0 +1,21 @@ +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemWarnings::getNamesAndTypes() +{ + return { + {"message", std::make_shared()}, + }; +} + +void StorageSystemWarnings::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + for (const auto & warning : context->getWarnings()) + res_columns[0]->insert(warning); +} + +} diff --git a/src/Storages/System/StorageSystemWarnings.h b/src/Storages/System/StorageSystemWarnings.h new file mode 100644 index 00000000000..087c4016aff --- /dev/null +++ b/src/Storages/System/StorageSystemWarnings.h @@ -0,0 +1,27 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +/** Implements system.warnings table that contains warnings about server configuration + * to be displayed in clickhouse-client. + */ +class StorageSystemWarnings final : public shared_ptr_helper, + public IStorageSystemOneBlock { +public: + std::string getName() const override { return "SystemWarnings"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + friend struct shared_ptr_helper; + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const override; +}; +} diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 1a8aac3b277..d19aef47616 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 7da65b09d6d..b3cc254a392 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) @@ -116,6 +117,7 @@ void attachSystemTablesLocal(IDatabase & system_database) attach(system_database, "user_directories"); attach(system_database, "privileges"); attach(system_database, "errors"); + attach(system_database, "warnings"); attach(system_database, "data_skipping_indices"); #if !defined(ARCADIA_BUILD) attach(system_database, "licenses"); diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 0b302ee437a..333f35774e2 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -67,6 +67,11 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory); void registerStorageExternalDistributed(StorageFactory & factory); #endif +#if USE_SQLITE +void registerStorageSQLite(StorageFactory & factory); +#endif + + void registerStorages() { auto & factory = StorageFactory::instance(); @@ -128,6 +133,10 @@ void registerStorages() #if USE_MYSQL || USE_LIBPQXX registerStorageExternalDistributed(factory); #endif + + #if USE_SQLITE + registerStorageSQLite(factory); + #endif } } diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 0f09b973a63..c001d933558 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -143,6 +143,7 @@ SRCS( StorageMySQL.cpp StorageNull.cpp StorageReplicatedMergeTree.cpp + StorageSQLite.cpp StorageSet.cpp StorageStripeLog.cpp StorageTinyLog.cpp @@ -210,6 +211,7 @@ SRCS( System/StorageSystemTables.cpp System/StorageSystemUserDirectories.cpp System/StorageSystemUsers.cpp + System/StorageSystemWarnings.cpp System/StorageSystemZeros.cpp System/StorageSystemZooKeeper.cpp System/attachSystemTables.cpp diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 3c4ab0edbab..90271f94cbe 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -10,11 +10,13 @@ #include #include -#include #include #include +#include + + namespace DB { diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp new file mode 100644 index 00000000000..e9edcb3d1d4 --- /dev/null +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -0,0 +1,89 @@ +#include + +#if USE_SQLITE + +#include +#include + +#include +#include +#include "registerTableFunctions.h" + +#include +#include + +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; + extern const int SQLITE_ENGINE_ERROR; +} + + +StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const +{ + auto columns = getActualTableStructure(context); + + auto storage = StorageSQLite::create(StorageID(getDatabaseName(), table_name), + sqlite_db, + remote_table_name, + columns, ConstraintsDescription{}, context); + + storage->startup(); + return storage; +} + + +ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */) const +{ + auto columns = fetchSQLiteTableStructure(sqlite_db.get(), remote_table_name); + + if (!columns) + throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, "Failed to fetch table structure for {}", remote_table_name); + + return ColumnsDescription{*columns}; +} + + +void TableFunctionSQLite::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + const auto & func_args = ast_function->as(); + + if (!func_args.arguments) + throw Exception("Table function 'sqlite' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + + ASTs & args = func_args.arguments->children; + + if (args.size() != 2) + throw Exception("SQLite database requires 2 arguments: database path, table name", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + database_path = args[0]->as().value.safeGet(); + remote_table_name = args[1]->as().value.safeGet(); + + sqlite_db = openSQLiteDB(database_path, context); +} + + +void registerTableFunctionSQLite(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/TableFunctions/TableFunctionSQLite.h b/src/TableFunctions/TableFunctionSQLite.h new file mode 100644 index 00000000000..4649dac016f --- /dev/null +++ b/src/TableFunctions/TableFunctionSQLite.h @@ -0,0 +1,36 @@ +#pragma once +#if !defined(ARCADIA_BUILD) +#include "config_core.h" +#endif + +#if USE_SQLITE +#include +#include + + +namespace DB +{ + +class TableFunctionSQLite : public ITableFunction +{ +public: + static constexpr auto name = "sqlite"; + std::string getName() const override { return name; } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, ContextPtr context, + const std::string & table_name, ColumnsDescription cached_columns) const override; + + const char * getStorageTypeName() const override { return "SQLite"; } + + ColumnsDescription getActualTableStructure(ContextPtr context) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + String database_path, remote_table_name; + std::shared_ptr sqlite_db; +}; + +} + +#endif diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 6cf40c4f090..2aee91997b0 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -42,6 +42,10 @@ void registerTableFunctions() registerTableFunctionPostgreSQL(factory); #endif +#if USE_SQLITE + registerTableFunctionSQLite(factory); +#endif + registerTableFunctionDictionary(factory); } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index c49fafc5f86..6a13c82caba 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -42,6 +42,10 @@ void registerTableFunctionMySQL(TableFunctionFactory & factory); void registerTableFunctionPostgreSQL(TableFunctionFactory & factory); #endif +#if USE_SQLITE +void registerTableFunctionSQLite(TableFunctionFactory & factory); +#endif + void registerTableFunctionDictionary(TableFunctionFactory & factory); void registerTableFunctions(); diff --git a/src/TableFunctions/ya.make b/src/TableFunctions/ya.make index f50e345f2d8..e957c923423 100644 --- a/src/TableFunctions/ya.make +++ b/src/TableFunctions/ya.make @@ -22,6 +22,7 @@ SRCS( TableFunctionNull.cpp TableFunctionNumbers.cpp TableFunctionRemote.cpp + TableFunctionSQLite.cpp TableFunctionURL.cpp TableFunctionValues.cpp TableFunctionView.cpp diff --git a/tests/integration/README.md b/tests/integration/README.md index cc504f01f82..8c353658705 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -5,14 +5,14 @@ This directory contains tests that involve several ClickHouse instances, custom ### Running natively Prerequisites: -* Ubuntu 14.04 (Trusty) or higher. +* Ubuntu 20.04 (Focal) or higher. * [docker](https://www.docker.com/community-edition#/download). Minimum required API version: 1.25, check with `docker version`. You must install latest Docker from https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#set-up-the-repository Don't use Docker from your system repository. -* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python3-pip libpq-dev zlib1g-dev libcrypto++-dev libssl-dev libkrb5-dev` +* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python3-pip libpq-dev zlib1g-dev libcrypto++-dev libssl-dev libkrb5-dev python3-dev` * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: @@ -25,25 +25,29 @@ sudo -H pip install \ confluent-kafka \ dicttoxml \ docker \ - docker-compose==1.22.0 \ + docker-compose \ grpcio \ grpcio-tools \ kafka-python \ kazoo \ minio \ protobuf \ - psycopg2-binary==2.7.5 \ + psycopg2-binary \ pymongo \ + pytz \ pytest \ pytest-timeout \ redis \ tzlocal \ urllib3 \ requests-kerberos \ - dict2xml + dict2xml \ + hypothesis \ + pyhdfs \ + pika ``` -(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` +(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-protobuf python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` If you want to run the tests under a non-privileged user, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and re-login. (You must close all your sessions (for example, restart your computer)) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 993e7a6e973..bcd47899ca0 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,4 +1,3 @@ -import subprocess from helpers.cluster import run_and_check import pytest import logging diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5f7cfd9467b..ab5d32751bd 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -29,6 +29,8 @@ from dict2xml import dict2xml from kazoo.client import KazooClient from kazoo.exceptions import KazooException from minio import Minio +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT + from helpers.test_tools import assert_eq_with_retry from helpers import pytest_xdist_logging_to_separate_files @@ -332,12 +334,16 @@ class ClickHouseCluster: # available when with_postgres == True self.postgres_host = "postgres1" self.postgres_ip = None + self.postgres_conn = None self.postgres2_host = "postgres2" self.postgres2_ip = None + self.postgres2_conn = None self.postgres3_host = "postgres3" self.postgres3_ip = None + self.postgres3_conn = None self.postgres4_host = "postgres4" self.postgres4_ip = None + self.postgres4_conn = None self.postgres_port = 5432 self.postgres_dir = p.abspath(p.join(self.instances_dir, "postgres")) self.postgres_logs_dir = os.path.join(self.postgres_dir, "postgres1") @@ -1077,8 +1083,9 @@ class ClickHouseCluster: start = time.time() while time.time() - start < timeout: try: - conn = psycopg2.connect(host=self.postgres_ip, port=self.postgres_port, user='postgres', password='mysecretpassword') - conn.close() + self.postgres_conn = psycopg2.connect(host=self.postgres_ip, port=self.postgres_port, database='postgres', user='postgres', password='mysecretpassword') + self.postgres_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + self.postgres_conn.autocommit = True logging.debug("Postgres Started") return except Exception as ex: @@ -1092,16 +1099,36 @@ class ClickHouseCluster: self.postgres3_ip = self.get_instance_ip(self.postgres3_host) self.postgres4_ip = self.get_instance_ip(self.postgres4_host) start = time.time() - for ip in [self.postgres2_ip, self.postgres3_ip, self.postgres4_ip]: - while time.time() - start < timeout: - try: - conn = psycopg2.connect(host=ip, port=self.postgres_port, user='postgres', password='mysecretpassword') - conn.close() - logging.debug("Postgres Cluster Started") - return - except Exception as ex: - logging.debug("Can't connect to Postgres " + str(ex)) - time.sleep(0.5) + while time.time() - start < timeout: + try: + self.postgres2_conn = psycopg2.connect(host=self.postgres2_ip, port=self.postgres_port, database='postgres', user='postgres', password='mysecretpassword') + self.postgres2_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + self.postgres2_conn.autocommit = True + logging.debug("Postgres Cluster host 2 started") + break + except Exception as ex: + logging.debug("Can't connect to Postgres host 2" + str(ex)) + time.sleep(0.5) + while time.time() - start < timeout: + try: + self.postgres3_conn = psycopg2.connect(host=self.postgres3_ip, port=self.postgres_port, database='postgres', user='postgres', password='mysecretpassword') + self.postgres3_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + self.postgres3_conn.autocommit = True + logging.debug("Postgres Cluster host 3 started") + break + except Exception as ex: + logging.debug("Can't connect to Postgres host 3" + str(ex)) + time.sleep(0.5) + while time.time() - start < timeout: + try: + self.postgres4_conn = psycopg2.connect(host=self.postgres4_ip, port=self.postgres_port, database='postgres', user='postgres', password='mysecretpassword') + self.postgres4_conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + self.postgres4_conn.autocommit = True + logging.debug("Postgres Cluster host 4 started") + return + except Exception as ex: + logging.debug("Can't connect to Postgres host 4" + str(ex)) + time.sleep(0.5) raise Exception("Cannot wait Postgres container") diff --git a/tests/integration/helpers/hdfs_api.py b/tests/integration/helpers/hdfs_api.py index 2000de8c36a..3d2d647d0ed 100644 --- a/tests/integration/helpers/hdfs_api.py +++ b/tests/integration/helpers/hdfs_api.py @@ -102,6 +102,7 @@ class HDFSApi(object): return response_data else: logging.error(f"unexpected response_data.status_code {response_data.status_code} != {expected_code}") + time.sleep(1) response_data.raise_for_status() diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index ef530c4836b..b5d40659629 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -39,6 +39,9 @@ class TSV: def __str__(self): return '\n'.join(self.lines) + def __repr__(self): + return self.__str__() + def __len__(self): return len(self.lines) diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py new file mode 100644 index 00000000000..3aec527524b --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -0,0 +1,54 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="cte_distributed") +node1 = cluster.add_instance('node1', with_zookeeper=False) +node2 = cluster.add_instance('node2', + with_zookeeper=False, image='yandex/clickhouse-server', tag='21.7.3.14', stay_alive=True, + with_installed_binary=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + + +def test_cte_distributed(start_cluster): + node2.query(""" +WITH + quantile(0.05)(cnt) as p05, + quantile(0.95)(cnt) as p95, + p95 - p05 as inter_percentile_range +SELECT + sum(cnt) as total_requests, + count() as data_points, + inter_percentile_range +FROM ( + SELECT + count() as cnt + FROM remote('node{1,2}', numbers(10)) + GROUP BY number +)""") + + node1.query(""" +WITH + quantile(0.05)(cnt) as p05, + quantile(0.95)(cnt) as p95, + p95 - p05 as inter_percentile_range +SELECT + sum(cnt) as total_requests, + count() as data_points, + inter_percentile_range +FROM ( + SELECT + count() as cnt + FROM remote('node{1,2}', numbers(10)) + GROUP BY number +)""") diff --git a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py new file mode 100644 index 00000000000..b3f5c68cf68 --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py @@ -0,0 +1,29 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="aggregate_alias_column") +node1 = cluster.add_instance('node1', with_zookeeper=False) +node2 = cluster.add_instance('node2', + with_zookeeper=False, image='yandex/clickhouse-server', tag='21.7.2.7', stay_alive=True, + with_installed_binary=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_select_aggregate_alias_column(start_cluster): + node1.query("create table tab (x UInt64, x_alias UInt64 ALIAS x) engine = Memory") + node2.query("create table tab (x UInt64, x_alias UInt64 ALIAS x) engine = Memory") + node1.query('insert into tab values (1)') + node2.query('insert into tab values (1)') + + node1.query("select sum(x_alias) from remote('node{1,2}', default, tab)") + node2.query("select sum(x_alias) from remote('node{1,2}', default, tab)") diff --git a/tests/integration/test_cluster_copier/test.py b/tests/integration/test_cluster_copier/test.py index 7fe1d8c9d29..3d28295d40e 100644 --- a/tests/integration/test_cluster_copier/test.py +++ b/tests/integration/test_cluster_copier/test.py @@ -89,9 +89,9 @@ class Task1: instance = cluster.instances['s0_0_0'] for cluster_num in ["0", "1"]: - ddl_check_query(instance, "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format(cluster_num)) + ddl_check_query(instance, "DROP DATABASE IF EXISTS default ON CLUSTER cluster{} SYNC".format(cluster_num)) ddl_check_query(instance, - "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format( + "CREATE DATABASE default ON CLUSTER cluster{} ".format( cluster_num)) ddl_check_query(instance, "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) " + @@ -105,11 +105,11 @@ class Task1: settings={"insert_distributed_sync": 1}) def check(self): - assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n") - assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n") + assert self.cluster.instances['s0_0_0'].query("SELECT count() FROM hits_all").strip() == "1002" + assert self.cluster.instances['s1_0_0'].query("SELECT count() FROM hits_all").strip() == "1002" - assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("1\n") - assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("0\n") + assert self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM hits").strip() == "1" + assert self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM hits").strip() == "0" instance = self.cluster.instances['s0_0_0'] ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0") diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 58a48bde95d..d0edde2233b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -1,5 +1,8 @@ import pytest +import time +import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance') @@ -38,3 +41,46 @@ def test_grant_create_user(): instance.query("GRANT CREATE USER ON *.* TO A") instance.query("CREATE USER B", user='A') assert instance.query("SELECT 1", user='B') == "1\n" + + +def test_login_as_dropped_user(): + for _ in range(0, 2): + instance.query("CREATE USER A") + assert instance.query("SELECT 1", user='A') == "1\n" + + instance.query("DROP USER A") + expected_error = "no user with such name" + assert expected_error in instance.query_and_get_error("SELECT 1", user='A') + + +def test_login_as_dropped_user_xml(): + for _ in range(0, 2): + instance.exec_in_container(["bash", "-c" , """ + cat > /etc/clickhouse-server/users.d/user_c.xml << EOF + + + + + + + + +EOF"""]) + + assert_eq_with_retry(instance, "SELECT name FROM system.users WHERE name='C'", "C") + + instance.exec_in_container(["bash", "-c" , "rm /etc/clickhouse-server/users.d/user_c.xml"]) + + expected_error = "no user with such name" + while True: + out, err = instance.query_and_get_answer_with_error("SELECT 1", user='C') + if expected_error in err: + logging.debug(f"Got error '{expected_error}' just as expected") + break + if out == "1\n": + logging.debug(f"Got output '1', retrying...") + time.sleep(0.5) + continue + raise Exception(f"Expected either output '1' or error '{expected_error}', got output={out} and error={err}") + + assert instance.query("SELECT name FROM system.users WHERE name='C'") == "" diff --git a/tests/integration/test_disk_types/configs/config.xml b/tests/integration/test_disk_types/configs/config.xml index a3ec8b3a58a..0902130e106 100644 --- a/tests/integration/test_disk_types/configs/config.xml +++ b/tests/integration/test_disk_types/configs/config.xml @@ -19,6 +19,11 @@ memory + + encrypted + disk_s3 + 1234567812345678 + diff --git a/tests/integration/test_disk_types/configs/storage.xml b/tests/integration/test_disk_types/configs/storage.xml index 1167a4f7382..4d8050c050c 100644 --- a/tests/integration/test_disk_types/configs/storage.xml +++ b/tests/integration/test_disk_types/configs/storage.xml @@ -15,6 +15,11 @@ hdfs http://hdfs1:9000/data/ + + encrypted + disk_s3 + 1234567812345678 + diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 3f1a656d98f..35e900c3c9f 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -6,6 +6,7 @@ disk_types = { "disk_s3": "s3", "disk_memory": "memory", "disk_hdfs": "hdfs", + "disk_encrypted": "encrypted", } diff --git a/tests/integration/test_distributed_respect_user_timeouts/test.py b/tests/integration/test_distributed_respect_user_timeouts/test.py index 662bf7fa6de..a97c7c3d355 100644 --- a/tests/integration/test_distributed_respect_user_timeouts/test.py +++ b/tests/integration/test_distributed_respect_user_timeouts/test.py @@ -33,7 +33,7 @@ SELECTS_SQL = { "ORDER BY node"), } -EXCEPTION_NETWORK = 'e.displayText() = DB::NetException: ' +EXCEPTION_NETWORK = 'DB::NetException: ' EXCEPTION_TIMEOUT = 'Timeout exceeded while reading from socket (' EXCEPTION_CONNECT = 'Timeout: connect timed out: ' @@ -76,13 +76,13 @@ def _check_exception(exception, expected_tries=3): for i, line in enumerate(lines[3:3 + expected_tries]): expected_lines = ( - 'Code: 209, ' + EXCEPTION_NETWORK + EXCEPTION_TIMEOUT, - 'Code: 209, ' + EXCEPTION_NETWORK + EXCEPTION_CONNECT, + 'Code: 209. ' + EXCEPTION_NETWORK + EXCEPTION_TIMEOUT, + 'Code: 209. ' + EXCEPTION_NETWORK + EXCEPTION_CONNECT, EXCEPTION_TIMEOUT, ) assert any(line.startswith(expected) for expected in expected_lines), \ - 'Unexpected exception at one of the connection attempts' + 'Unexpected exception "{}" at one of the connection attempts'.format(line) assert lines[3 + expected_tries] == '', 'Wrong number of connect attempts' diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline_long.reference b/tests/integration/test_encrypted_disk/__init__.py similarity index 100% rename from tests/queries/0_stateless/01293_client_interactive_vertical_multiline_long.reference rename to tests/integration/test_encrypted_disk/__init__.py diff --git a/tests/integration/test_encrypted_disk/configs/storage.xml b/tests/integration/test_encrypted_disk/configs/storage.xml new file mode 100644 index 00000000000..6a5e016d501 --- /dev/null +++ b/tests/integration/test_encrypted_disk/configs/storage.xml @@ -0,0 +1,83 @@ + + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + memory + + + local + /disk/ + + + encrypted + disk_s3 + encrypted/ + 1234567812345678 + + + encrypted + disk_local + encrypted/ + 1234567812345678 + + + encrypted + disk_local + encrypted2/ + 1234567812345678 + + + encrypted + disk_local + encrypted_key192b/ + aes_192_ctr + 109105c600c12066f82f1a4dbb41a08e4A4348C8387ADB6A + + + + + +
+ disk_local_encrypted +
+
+
+ + +
+ disk_local_encrypted_key192b +
+
+
+ + +
+ disk_local +
+ + disk_local_encrypted + disk_local_encrypted2 + disk_local_encrypted_key192b + +
+
+ + +
+ disk_s3 +
+ + disk_s3_encrypted + +
+
+
+
+
diff --git a/tests/integration/test_encrypted_disk/test.py b/tests/integration/test_encrypted_disk/test.py new file mode 100644 index 00000000000..df605613ab7 --- /dev/null +++ b/tests/integration/test_encrypted_disk/test.py @@ -0,0 +1,107 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + + +FIRST_PART_NAME = "all_1_1_0" + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", + main_configs=["configs/storage.xml"], + tmpfs=["/disk:size=100M"], + with_minio=True) + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize("policy", ["encrypted_policy", "encrypted_policy_key192b", "local_policy", "s3_policy"]) +def test_encrypted_disk(cluster, policy): + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE encrypted_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format(policy) + ) + + node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')") + select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values" + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')") + node.query("OPTIMIZE TABLE encrypted_test FINAL") + assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')" + + node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY") + + +@pytest.mark.parametrize("policy, destination_disks", [("local_policy", ["disk_local_encrypted", "disk_local_encrypted2", "disk_local_encrypted_key192b", "disk_local"]), ("s3_policy", ["disk_s3_encrypted", "disk_s3"])]) +def test_part_move(cluster, policy, destination_disks): + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE encrypted_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format(policy) + ) + + node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')") + select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values" + assert node.query(select_query) == "(0,'data'),(1,'data')" + + for destination_disk in destination_disks: + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, destination_disk)) + assert node.query(select_query) == "(0,'data'),(1,'data')" + with pytest.raises(QueryRuntimeException) as exc: + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, destination_disk)) + assert("Part '{}' is already on disk '{}'".format(FIRST_PART_NAME, destination_disk) in str(exc.value)) + + assert node.query(select_query) == "(0,'data'),(1,'data')" + node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY") + + +@pytest.mark.parametrize("policy,encrypted_disk", [("local_policy", "disk_local_encrypted"), ("s3_policy", "disk_s3_encrypted")]) +def test_optimize_table(cluster, policy, encrypted_disk): + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE encrypted_test ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='{}' + """.format(policy) + ) + + node.query("INSERT INTO encrypted_test VALUES (0,'data'),(1,'data')") + select_query = "SELECT * FROM encrypted_test ORDER BY id FORMAT Values" + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, encrypted_disk)) + assert node.query(select_query) == "(0,'data'),(1,'data')" + + node.query("INSERT INTO encrypted_test VALUES (2,'data'),(3,'data')") + node.query("OPTIMIZE TABLE encrypted_test FINAL") + + with pytest.raises(QueryRuntimeException) as exc: + node.query("ALTER TABLE encrypted_test MOVE PART '{}' TO DISK '{}'".format(FIRST_PART_NAME, encrypted_disk)) + + assert("Part {} is not exists or not active".format(FIRST_PART_NAME) in str(exc.value)) + + assert node.query(select_query) == "(0,'data'),(1,'data'),(2,'data'),(3,'data')" + + node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY") diff --git a/tests/integration/test_explain_estimates/__init__.py b/tests/integration/test_explain_estimates/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_explain_estimates/test.py b/tests/integration/test_explain_estimates/test.py new file mode 100644 index 00000000000..a2b65564dbc --- /dev/null +++ b/tests/integration/test_explain_estimates/test.py @@ -0,0 +1,24 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('instance') + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_explain_estimates(start_cluster): + node1.query("CREATE TABLE test (i Int64) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity = 16, write_final_mark = 0") + node1.query("INSERT INTO test SELECT number FROM numbers(128)") + node1.query("OPTIMIZE TABLE test") + system_parts_result = node1.query("SELECT any(database), any(table), count() as parts, sum(rows) as rows, sum(marks) as marks FROM system.parts WHERE database = 'default' AND table = 'test' and active = 1 GROUP BY (database, table)") + explain_estimates_result = node1.query("EXPLAIN ESTIMATE SELECT * FROM test") + assert(system_parts_result == explain_estimates_result) diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index 3fd1cb0ecae..71693148a17 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -944,7 +944,22 @@ def move_to_prewhere_and_column_filtering(clickhouse_node, mysql_node, service_n clickhouse_node.query("CREATE DATABASE cond_on_key_col ENGINE = MaterializeMySQL('{}:3306', 'cond_on_key_col', 'root', 'clickhouse')".format(service_name)) mysql_node.query("create table cond_on_key_col.products (id int primary key, product_id int not null, catalog_id int not null, brand_id int not null, name text)") mysql_node.query("insert into cond_on_key_col.products (id, name, catalog_id, brand_id, product_id) values (915, 'ertyui', 5287, 15837, 0), (990, 'wer', 1053, 24390, 1), (781, 'qwerty', 1041, 1176, 2);") + mysql_node.query("create table cond_on_key_col.test (id int(11) NOT NULL AUTO_INCREMENT, a int(11) DEFAULT NULL, b int(11) DEFAULT NULL, PRIMARY KEY (id)) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=utf8mb4;") + mysql_node.query("insert into cond_on_key_col.test values (42, 123, 1);") + mysql_node.query("CREATE TABLE cond_on_key_col.balance_change_record (id bigint(20) NOT NULL AUTO_INCREMENT, type tinyint(4) DEFAULT NULL, value decimal(10,4) DEFAULT NULL, time timestamp NULL DEFAULT NULL, " + "initiative_id varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, passivity_id varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, " + "person_id varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, tenant_code varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin DEFAULT NULL, " + "created_time timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', updated_time timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, " + "value_snapshot decimal(10,4) DEFAULT NULL, PRIMARY KEY (id), KEY balance_change_record_initiative_id (person_id) USING BTREE, " + "KEY type (type) USING BTREE, KEY balance_change_record_type (time) USING BTREE, KEY initiative_id (initiative_id) USING BTREE, " + "KEY balance_change_record_tenant_code (passivity_id) USING BTREE, KEY tenant_code (tenant_code) USING BTREE) ENGINE=InnoDB AUTO_INCREMENT=1691049 DEFAULT CHARSET=utf8") + mysql_node.query("insert into cond_on_key_col.balance_change_record values (123, 1, 3.14, null, 'qwe', 'asd', 'zxc', 'rty', null, null, 2.7);") + mysql_node.query("CREATE TABLE cond_on_key_col.test1 (id int(11) NOT NULL AUTO_INCREMENT, c1 varchar(32) NOT NULL, c2 varchar(32), PRIMARY KEY (id)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4") + mysql_node.query("insert into cond_on_key_col.test1(c1,c2) values ('a','b'), ('c', null);") check_query(clickhouse_node, "SELECT DISTINCT P.id, P.name, P.catalog_id FROM cond_on_key_col.products P WHERE P.name ILIKE '%e%' and P.catalog_id=5287", '915\tertyui\t5287\n') + check_query(clickhouse_node, "select count(a) from cond_on_key_col.test where b = 1;", "1\n") + check_query(clickhouse_node, "select id from cond_on_key_col.balance_change_record where type=1;", "123\n") + check_query(clickhouse_node, "select count(c1) from cond_on_key_col.test1 where c2='b';", "1\n") clickhouse_node.query("DROP DATABASE cond_on_key_col") mysql_node.query("DROP DATABASE cond_on_key_col") diff --git a/tests/integration/test_max_http_connections_for_replication/test.py b/tests/integration/test_max_http_connections_for_replication/test.py index 3921cbfd1ae..67b3c5b53aa 100644 --- a/tests/integration/test_max_http_connections_for_replication/test.py +++ b/tests/integration/test_max_http_connections_for_replication/test.py @@ -11,7 +11,7 @@ def _fill_nodes(nodes, shard, connections_count): node.query( ''' CREATE DATABASE test; - + CREATE TABLE test_table(date Date, id UInt32, dummy UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}') PARTITION BY date @@ -114,5 +114,5 @@ def test_multiple_endpoint_connections_count(start_big_cluster): assert_eq_with_retry(node4, "select count() from test_table", "100") assert_eq_with_retry(node5, "select count() from test_table", "100") - # two per each host - assert node5.query("SELECT value FROM system.events where event='CreatedHTTPConnections'") == '4\n' + # Two per each host or sometimes less, if fetches are not performed in parallel. But not more. + assert node5.query("SELECT value FROM system.events where event='CreatedHTTPConnections'") <= '4\n' diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 223ad2e1af2..d26692a0d93 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -78,7 +78,7 @@ def wait_for_delete_hdfs_objects(cluster, expected, num_tries=30): while num_tries > 0: num_hdfs_objects = len(fs.listdir('/clickhouse')) if num_hdfs_objects == expected: - break; + break num_tries -= 1 time.sleep(1) assert(len(fs.listdir('/clickhouse')) == expected) diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 6533a6a23f9..070aa9967fc 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -95,8 +95,11 @@ def test_mysql_client(started_cluster): '''.format(host=started_cluster.get_instance_ip('node'), port=server_port), demux=True) assert stdout.decode() == 'count()\n1\n' - assert stderr[0:182].decode() == "mysql: [Warning] Using a password on the command line interface can be insecure.\n" \ - "ERROR 81 (00000) at line 1: Code: 81, e.displayText() = DB::Exception: Database system2 doesn't exist" + expected_msg = '\n'.join([ + "mysql: [Warning] Using a password on the command line interface can be insecure.", + "ERROR 81 (00000) at line 1: Code: 81. DB::Exception: Database system2 doesn't exist", + ]) + assert stderr[:len(expected_msg)].decode() == expected_msg code, (stdout, stderr) = started_cluster.mysql_client_container.exec_run(''' mysql --protocol tcp -h {host} -P {port} default -u default --password=123 @@ -122,8 +125,11 @@ def test_mysql_client_exception(started_cluster): -e "CREATE TABLE default.t1_remote_mysql AS mysql('127.0.0.1:10086','default','t1_local','default','');" '''.format(host=started_cluster.get_instance_ip('node'), port=server_port), demux=True) - assert stderr[0:258].decode() == "mysql: [Warning] Using a password on the command line interface can be insecure.\n" \ - "ERROR 1000 (00000) at line 1: Poco::Exception. Code: 1000, e.code() = 0, e.displayText() = Exception: Connections to all replicas failed: default@127.0.0.1:10086 as user default" + expected_msg = '\n'.join([ + "mysql: [Warning] Using a password on the command line interface can be insecure.", + "ERROR 1000 (00000) at line 1: Poco::Exception. Code: 1000, e.code() = 0, Exception: Connections to all replicas failed: default@127.0.0.1:10086 as user default", + ]) + assert stderr[:len(expected_msg)].decode() == expected_msg def test_mysql_affected_rows(started_cluster): @@ -328,8 +334,7 @@ def test_python_client(started_cluster): with pytest.raises(pymysql.InternalError) as exc_info: client.query('select name from tables') - assert exc_info.value.args[1][ - 0:77] == "Code: 60, e.displayText() = DB::Exception: Table default.tables doesn't exist" + assert exc_info.value.args[1].startswith("Code: 60. DB::Exception: Table default.tables doesn't exist"), exc_info.value.args[1] cursor = client.cursor(pymysql.cursors.DictCursor) cursor.execute("select 1 as a, 'теÑÑ‚' as b") @@ -348,8 +353,7 @@ def test_python_client(started_cluster): with pytest.raises(pymysql.InternalError) as exc_info: client.query('select name from tables') - assert exc_info.value.args[1][ - 0:77] == "Code: 60, e.displayText() = DB::Exception: Table default.tables doesn't exist" + assert exc_info.value.args[1].startswith("Code: 60. DB::Exception: Table default.tables doesn't exist"), exc_info.value.args[1] cursor = client.cursor(pymysql.cursors.DictCursor) cursor.execute("select 1 as a, 'теÑÑ‚' as b") @@ -360,7 +364,7 @@ def test_python_client(started_cluster): with pytest.raises(pymysql.InternalError) as exc_info: client.select_db('system2') - assert exc_info.value.args[1][0:73] == "Code: 81, e.displayText() = DB::Exception: Database system2 doesn't exist" + assert exc_info.value.args[1].startswith("Code: 81. DB::Exception: Database system2 doesn't exist"), exc_info.value.args[1] cursor = client.cursor(pymysql.cursors.DictCursor) cursor.execute('CREATE DATABASE x') diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 97fd461e640..ed26ab82bc7 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -236,7 +236,7 @@ def test_different_data_types(started_cluster): ( key Integer NOT NULL PRIMARY KEY, a Date[] NOT NULL, -- Date - b Timestamp[] NOT NULL, -- DateTime + b Timestamp[] NOT NULL, -- DateTime64(6) c real[][] NOT NULL, -- Float32 d double precision[][] NOT NULL, -- Float64 e decimal(5, 5)[][][] NOT NULL, -- Decimal32 @@ -253,11 +253,11 @@ def test_different_data_types(started_cluster): for i in range(10): instance.query(''' INSERT INTO postgres_database.test_data_types VALUES - ({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12', '2000-05-12', 0.2, 0.2)'''.format(i)) + ({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 0.2, 0.2)'''.format(i)) check_tables_are_synchronized('test_data_types', 'id'); result = instance.query('SELECT * FROM test_database.test_data_types ORDER BY id LIMIT 1;') - assert(result == '0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\t2000-05-12\t0.20000\t0.20000\n') + assert(result == '0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t0.20000\t0.20000\n') for i in range(10): col = random.choice(['a', 'b', 'c']) @@ -270,7 +270,7 @@ def test_different_data_types(started_cluster): "VALUES (" "0, " "['2000-05-12', '2000-05-12'], " - "['2000-05-12 12:12:12', '2000-05-12 12:12:12'], " + "['2000-05-12 12:12:12.012345', '2000-05-12 12:12:12.012345'], " "[[1.12345], [1.12345], [1.12345]], " "[[1.1234567891], [1.1234567891], [1.1234567891]], " "[[[0.11111, 0.11111]], [[0.22222, 0.22222]], [[0.33333, 0.33333]]], " @@ -284,7 +284,7 @@ def test_different_data_types(started_cluster): expected = ( "0\t" + "['2000-05-12','2000-05-12']\t" + - "['2000-05-12 12:12:12','2000-05-12 12:12:12']\t" + + "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t" + "[[1.12345],[1.12345],[1.12345]]\t" + "[[1.1234567891],[1.1234567891],[1.1234567891]]\t" + "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t" @@ -622,7 +622,7 @@ def test_virtual_columns(started_cluster): instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number from numbers(10)") check_tables_are_synchronized('postgresql_replica_0'); - # just check that it works, no check with `expected` becuase _version is taken as LSN, which will be different each time. + # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. result = instance.query('SELECT key, value, _sign, _version FROM test_database.postgresql_replica_0;') print(result) diff --git a/tests/integration/test_rename_column/test.py b/tests/integration/test_rename_column/test.py index 3a818303f40..e3e776a0791 100644 --- a/tests/integration/test_rename_column/test.py +++ b/tests/integration/test_rename_column/test.py @@ -99,8 +99,8 @@ def create_distributed_table(node, table_name): def drop_distributed_table(node, table_name): - node.query("DROP TABLE IF EXISTS {} ON CLUSTER test_cluster".format(table_name)) - node.query("DROP TABLE IF EXISTS {}_replicated ON CLUSTER test_cluster".format(table_name)) + node.query("DROP TABLE IF EXISTS {} ON CLUSTER test_cluster SYNC".format(table_name)) + node.query("DROP TABLE IF EXISTS {}_replicated ON CLUSTER test_cluster SYNC".format(table_name)) time.sleep(1) diff --git a/tests/integration/test_replicated_fetches_timeouts/test.py b/tests/integration/test_replicated_fetches_timeouts/test.py index 963ec2487fd..88763265270 100644 --- a/tests/integration/test_replicated_fetches_timeouts/test.py +++ b/tests/integration/test_replicated_fetches_timeouts/test.py @@ -78,7 +78,7 @@ def test_no_stall(started_cluster): """ SELECT count() FROM system.replication_queue - WHERE last_exception LIKE '%e.displayText() = Timeout%' + WHERE last_exception LIKE '%Timeout%' AND last_exception NOT LIKE '%connect timed out%' """).strip()) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..46a11a8fe16 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml @@ -0,0 +1,86 @@ + + + + + hdfs + hdfs://hdfs1:9000/clickhouse1/ + + + hdfs + hdfs://hdfs1:9000/clickhouse1/ + + + hdfs + hdfs://hdfs1:9000/clickhouse2/ + + + + + +
+ hdfs1 +
+
+
+ + +
+ default +
+ + hdfs1 + +
+ 0.0 +
+ + +
+ hdfs2 +
+ + hdfs1 + +
+
+ + +
+ hdfs1_again +
+ + hdfs1 + +
+
+
+
+ + + 1024000 + 1 + 1 + + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + + + test_cluster + 1 + +
diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py new file mode 100644 index 00000000000..f426c3619a4 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -0,0 +1,212 @@ +import logging +from string import Template +import time + +import pytest +from helpers.cluster import ClickHouseCluster + +from pyhdfs import HdfsClient + +SHARDS = 2 +FILES_OVERHEAD_PER_TABLE = 1 # format_version.txt +FILES_OVERHEAD_PER_PART_COMPACT = 7 + + +def wait_for_hdfs_objects(cluster, fp, expected, num_tries=30): + fs = HdfsClient(hosts=cluster.hdfs_ip) + while num_tries > 0: + num_hdfs_objects = len(fs.listdir(fp)) + if num_hdfs_objects == expected: + break + num_tries -= 1 + time.sleep(1) + assert(len(fs.listdir(fp)) == expected) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance("node1", main_configs=["configs/config.d/storage_conf.xml"], + macros={'replica': 'node1'}, + with_zookeeper=True, + with_hdfs=True) + cluster.add_instance("node2", main_configs=["configs/config.d/storage_conf.xml"], + macros={'replica': 'node2'}, + with_zookeeper=True, + with_hdfs=True) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + fs = HdfsClient(hosts=cluster.hdfs_ip) + fs.mkdirs('/clickhouse1') + fs.mkdirs('/clickhouse2') + logging.info("Created HDFS directory") + + yield cluster + finally: + cluster.shutdown() + + +def test_hdfs_zero_copy_replication_insert(cluster): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + try: + node1.query( + """ + CREATE TABLE hdfs_test ON CLUSTER test_cluster (dt DateTime, id Int64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hdfs_test', '{replica}') + ORDER BY (dt, id) + SETTINGS storage_policy='hdfs_only' + """ + ) + wait_for_hdfs_objects(cluster, "/clickhouse1", SHARDS * FILES_OVERHEAD_PER_TABLE) + + node1.query("INSERT INTO hdfs_test VALUES (now() - INTERVAL 3 DAY, 10)") + node2.query("SYSTEM SYNC REPLICA hdfs_test") + assert node1.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" + assert node2.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" + assert node1.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" + assert node2.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" + assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values") == "('all','hdfs1')" + assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values") == "('all','hdfs1')" + wait_for_hdfs_objects(cluster, "/clickhouse1", SHARDS * FILES_OVERHEAD_PER_TABLE + FILES_OVERHEAD_PER_PART_COMPACT) + finally: + node1.query("DROP TABLE IF EXISTS hdfs_test NO DELAY") + node2.query("DROP TABLE IF EXISTS hdfs_test NO DELAY") + + + +@pytest.mark.parametrize( + ("storage_policy", "init_objects"), + [("hybrid", 0), + ("tiered", 0), + ("tiered_copy", FILES_OVERHEAD_PER_TABLE)] +) +def test_hdfs_zero_copy_replication_single_move(cluster, storage_policy, init_objects): + node1 = cluster.instances["node1"] + try: + node1.query( + Template(""" + CREATE TABLE single_node_move_test (dt DateTime, id Int64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/single_node_move_test', '{replica}') + ORDER BY (dt, id) + SETTINGS storage_policy='$policy' + """).substitute(policy=storage_policy) + ) + wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) + + node1.query("INSERT INTO single_node_move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)") + assert node1.query("SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values") == "(10),(11)" + + node1.query("ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'external'") + assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='single_node_move_test' FORMAT Values") == "('all','hdfs1')" + assert node1.query("SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values") == "(10),(11)" + wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT) + + node1.query("ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'main'") + assert node1.query("SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values") == "(10),(11)" + finally: + node1.query("DROP TABLE IF EXISTS single_node_move_test NO DELAY") + + +@pytest.mark.parametrize( + ("storage_policy", "init_objects"), + [("hybrid", 0), + ("tiered", 0), + ("tiered_copy", SHARDS * FILES_OVERHEAD_PER_TABLE)] +) +def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + try: + node1.query( + Template(""" + CREATE TABLE move_test ON CLUSTER test_cluster (dt DateTime, id Int64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/move_test', '{replica}') + ORDER BY (dt, id) + SETTINGS storage_policy='$policy' + """).substitute(policy=storage_policy) + ) + wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) + + node1.query("INSERT INTO move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)") + node2.query("SYSTEM SYNC REPLICA move_test") + + assert node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") == "(10),(11)" + assert node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") == "(10),(11)" + + node1.query("ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'") + wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT) + + node2.query("ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'") + assert node1.query("SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values") == "('all','hdfs1')" + assert node2.query("SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values") == "('all','hdfs1')" + assert node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") == "(10),(11)" + assert node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") == "(10),(11)" + wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT) + finally: + node1.query("DROP TABLE IF EXISTS move_test NO DELAY") + node2.query("DROP TABLE IF EXISTS move_test NO DELAY") + + +@pytest.mark.parametrize( + ("storage_policy"), ["hybrid", "tiered", "tiered_copy"] +) +def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + try: + node1.query( + Template(""" + CREATE TABLE ttl_move_test ON CLUSTER test_cluster (dt DateTime, id Int64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_move_test', '{replica}') + ORDER BY (dt, id) + TTL dt + INTERVAL 2 DAY TO VOLUME 'external' + SETTINGS storage_policy='$policy' + """).substitute(policy=storage_policy) + ) + + node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 3 DAY, 10)") + node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 1 DAY, 11)") + + node1.query("OPTIMIZE TABLE ttl_move_test FINAL") + node2.query("SYSTEM SYNC REPLICA ttl_move_test") + + assert node1.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" + assert node2.query("SELECT count() FROM ttl_move_test FORMAT Values") == "(2)" + assert node1.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") == "(10),(11)" + assert node2.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") == "(10),(11)" + finally: + node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY") + node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY") + + +def test_hdfs_zero_copy_with_ttl_delete(cluster): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + try: + node1.query( + """ + CREATE TABLE ttl_delete_test ON CLUSTER test_cluster (dt DateTime, id Int64) + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_delete_test', '{replica}') + ORDER BY (dt, id) + TTL dt + INTERVAL 2 DAY + SETTINGS storage_policy='tiered' + """ + ) + + node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 3 DAY, 10)") + node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 1 DAY, 11)") + + node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") + node2.query("SYSTEM SYNC REPLICA ttl_delete_test") + + assert node1.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" + assert node2.query("SELECT count() FROM ttl_delete_test FORMAT Values") == "(1)" + assert node1.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") == "(11)" + assert node2.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") == "(11)" + finally: + node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY") + node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY") diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml index 1f75a4efeae..d22ac8113a8 100644 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml @@ -21,7 +21,7 @@ 0 - 0 + 0 diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml index d8c7f49fc49..0cf9191c4af 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml @@ -21,7 +21,7 @@ 0 - 1 + 1 diff --git a/tests/integration/test_replicated_mutations/test.py b/tests/integration/test_replicated_mutations/test.py index 12a49ec22d8..68bf48642ac 100644 --- a/tests/integration/test_replicated_mutations/test.py +++ b/tests/integration/test_replicated_mutations/test.py @@ -33,8 +33,13 @@ def started_cluster(): node.query("DROP TABLE IF EXISTS test_mutations") for node in [node1, node2, node3, node4]: - node.query( - "CREATE TABLE test_mutations(d Date, x UInt32, i UInt32) ENGINE ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/test_mutations', '{instance}') ORDER BY x PARTITION BY toYYYYMM(d)") + node.query(""" + CREATE TABLE test_mutations(d Date, x UInt32, i UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/{cluster}/tables/test/test_mutations', '{instance}') + ORDER BY x + PARTITION BY toYYYYMM(d) + SETTINGS number_of_free_entries_in_pool_to_execute_mutation=0 + """) node5.query( "CREATE TABLE test_mutations(d Date, x UInt32, i UInt32) ENGINE MergeTree() ORDER BY x PARTITION BY toYYYYMM(d)") diff --git a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml index db639cabb63..89c97aa3360 100644 --- a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml +++ b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml @@ -66,7 +66,7 @@ 1024 1 - 1 + 1 diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 731644b0987..f3c83166b46 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -17,7 +17,7 @@ def started_cluster(): def test_read_write_storage(started_cluster): hdfs_api = started_cluster.hdfs_api - + node1.query("drop table if exists SimpleHDFSStorage SYNC") node1.query( "create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')") node1.query("insert into SimpleHDFSStorage values (1, 'Mark', 72.53)") diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 51b2052baae..b9fc0b2272f 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -66,7 +66,7 @@ def get_kafka_producer(port, serializer, retries): except Exception as e: errors += [str(e)] time.sleep(1) - + raise Exception("Connection not establised, {}".format(errors)) def producer_serializer(x): @@ -1339,7 +1339,7 @@ def test_librdkafka_compression(kafka_cluster): Example of corruption: - 2020.12.10 09:59:56.831507 [ 20 ] {} void DB::StorageKafka::threadFunc(size_t): Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected '"' before: 'foo"}': (while reading the value of key value): (at row 1) + 2020.12.10 09:59:56.831507 [ 20 ] {} void DB::StorageKafka::threadFunc(size_t): Code: 27. DB::Exception: Cannot parse input: expected '"' before: 'foo"}': (while reading the value of key value): (at row 1) To trigger this regression there should duplicated messages diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 307879265df..28a76631c0f 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -1,55 +1,18 @@ -import time - +import logging import pytest -import psycopg2 from multiprocessing.dummy import Pool from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', with_postgres=True) node2 = cluster.add_instance('node2', with_postgres_cluster=True) -def get_postgres_conn(cluster, ip, database=False): - if database == True: - conn_string = f"host={ip} port='{cluster.postgres_port}' dbname='clickhouse' user='postgres' password='mysecretpassword'" - else: - conn_string = f"host={ip} port='{cluster.postgres_port}' user='postgres' password='mysecretpassword'" - - conn = psycopg2.connect(conn_string) - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - -def create_postgres_db(conn, name): - cursor = conn.cursor() - cursor.execute("DROP DATABASE IF EXISTS {}".format(name)) - cursor.execute("CREATE DATABASE {}".format(name)) - @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - postgres_conn = get_postgres_conn(cluster, ip=cluster.postgres_ip) - print("postgres connected") - create_postgres_db(postgres_conn, 'clickhouse') - - postgres_conn = get_postgres_conn(cluster, ip=cluster.postgres2_ip) - print("postgres2 connected") - create_postgres_db(postgres_conn, 'clickhouse') - - postgres_conn = get_postgres_conn(cluster, ip=cluster.postgres3_ip) - print("postgres2 connected") - create_postgres_db(postgres_conn, 'clickhouse') - - postgres_conn = get_postgres_conn(cluster, ip=cluster.postgres4_ip) - print("postgres2 connected") - create_postgres_db(postgres_conn, 'clickhouse') - - print("postgres connected") yield cluster finally: @@ -57,50 +20,58 @@ def started_cluster(): def test_postgres_select_insert(started_cluster): - conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) - cursor = conn.cursor() + cursor = started_cluster.postgres_conn.cursor() table_name = 'test_many' - table = f'''postgresql('{started_cluster.postgres_ip}:{started_cluster.postgres_port}', 'clickhouse', '{table_name}', 'postgres', 'mysecretpassword')''' - cursor.execute('CREATE TABLE IF NOT EXISTS {} (a integer, b text, c integer)'.format(table_name)) + table = f'''postgresql('{started_cluster.postgres_ip}:{started_cluster.postgres_port}', 'postgres', '{table_name}', 'postgres', 'mysecretpassword')''' + cursor.execute(f'DROP TABLE IF EXISTS {table_name}') + cursor.execute(f'CREATE TABLE {table_name} (a integer, b text, c integer)') - result = node1.query(''' - INSERT INTO TABLE FUNCTION {} - SELECT number, concat('name_', toString(number)), 3 from numbers(10000)'''.format(table)) - check1 = "SELECT count() FROM {}".format(table) - check2 = "SELECT Sum(c) FROM {}".format(table) - check3 = "SELECT count(c) FROM {} WHERE a % 2 == 0".format(table) - check4 = "SELECT count() FROM {} WHERE b LIKE concat('name_', toString(1))".format(table) + result = node1.query(f''' + INSERT INTO TABLE FUNCTION {table} + SELECT number, concat('name_', toString(number)), 3 from numbers(10000)''') + check1 = f"SELECT count() FROM {table}" + check2 = f"SELECT Sum(c) FROM {table}" + check3 = f"SELECT count(c) FROM {table} WHERE a % 2 == 0" + check4 = f"SELECT count() FROM {table} WHERE b LIKE concat('name_', toString(1))" assert (node1.query(check1)).rstrip() == '10000' assert (node1.query(check2)).rstrip() == '30000' assert (node1.query(check3)).rstrip() == '5000' assert (node1.query(check4)).rstrip() == '1' + # Triggers issue https://github.com/ClickHouse/ClickHouse/issues/26088 + # for i in range(1, 1000): + # assert (node1.query(check1)).rstrip() == '10000', f"Failed on {i}" + + cursor.execute(f'DROP TABLE {table_name} ') + def test_postgres_conversions(started_cluster): - conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) - cursor = conn.cursor() + cursor = started_cluster.postgres_conn.cursor() + cursor.execute(f'DROP TABLE IF EXISTS test_types') + cursor.execute(f'DROP TABLE IF EXISTS test_array_dimensions') + cursor.execute( - '''CREATE TABLE IF NOT EXISTS test_types ( + '''CREATE TABLE test_types ( a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, h timestamp, i date, j decimal(5, 3), k numeric, l boolean)''') node1.query(''' - INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'clickhouse', 'test_types', 'postgres', 'mysecretpassword') VALUES - (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12', '2000-05-12', 22.222, 22.222, 1)''') + INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword') VALUES + (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 22.222, 22.222, 1)''') result = node1.query(''' - SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3), l FROM postgresql('postgres1:5432', 'clickhouse', 'test_types', 'postgres', 'mysecretpassword')''') - assert(result == '-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\t2000-05-12\t22.222\t22.222\t1\n') + SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3), l FROM postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword')''') + assert(result == '-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t22.222\t22.222\t1\n') cursor.execute("INSERT INTO test_types (l) VALUES (TRUE), (true), ('yes'), ('y'), ('1');") cursor.execute("INSERT INTO test_types (l) VALUES (FALSE), (false), ('no'), ('off'), ('0');") expected = "1\n1\n1\n1\n1\n1\n0\n0\n0\n0\n0\n" - result = node1.query('''SELECT l FROM postgresql('postgres1:5432', 'clickhouse', 'test_types', 'postgres', 'mysecretpassword')''') + result = node1.query('''SELECT l FROM postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword')''') assert(result == expected) cursor.execute( '''CREATE TABLE IF NOT EXISTS test_array_dimensions ( a Date[] NOT NULL, -- Date - b Timestamp[] NOT NULL, -- DateTime + b Timestamp[] NOT NULL, -- DateTime64(6) c real[][] NOT NULL, -- Float32 d double precision[][] NOT NULL, -- Float64 e decimal(5, 5)[][][] NOT NULL, -- Decimal32 @@ -112,9 +83,9 @@ def test_postgres_conversions(started_cluster): )''') result = node1.query(''' - DESCRIBE TABLE postgresql('postgres1:5432', 'clickhouse', 'test_array_dimensions', 'postgres', 'mysecretpassword')''') + DESCRIBE TABLE postgresql('postgres1:5432', 'postgres', 'test_array_dimensions', 'postgres', 'mysecretpassword')''') expected = ('a\tArray(Date)\t\t\t\t\t\n' + - 'b\tArray(DateTime)\t\t\t\t\t\n' + + 'b\tArray(DateTime64(6))\t\t\t\t\t\n' + 'c\tArray(Array(Float32))\t\t\t\t\t\n' + 'd\tArray(Array(Float64))\t\t\t\t\t\n' + 'e\tArray(Array(Array(Decimal(5, 5))))\t\t\t\t\t\n' + @@ -126,10 +97,10 @@ def test_postgres_conversions(started_cluster): ) assert(result.rstrip() == expected) - node1.query("INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'clickhouse', 'test_array_dimensions', 'postgres', 'mysecretpassword') " + node1.query("INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'postgres', 'test_array_dimensions', 'postgres', 'mysecretpassword') " "VALUES (" "['2000-05-12', '2000-05-12'], " - "['2000-05-12 12:12:12', '2000-05-12 12:12:12'], " + "['2000-05-12 12:12:12.012345', '2000-05-12 12:12:12.012345'], " "[[1.12345], [1.12345], [1.12345]], " "[[1.1234567891], [1.1234567891], [1.1234567891]], " "[[[0.11111, 0.11111]], [[0.22222, 0.22222]], [[0.33333, 0.33333]]], " @@ -141,10 +112,10 @@ def test_postgres_conversions(started_cluster): ")") result = node1.query(''' - SELECT * FROM postgresql('postgres1:5432', 'clickhouse', 'test_array_dimensions', 'postgres', 'mysecretpassword')''') + SELECT * FROM postgresql('postgres1:5432', 'postgres', 'test_array_dimensions', 'postgres', 'mysecretpassword')''') expected = ( "['2000-05-12','2000-05-12']\t" + - "['2000-05-12 12:12:12','2000-05-12 12:12:12']\t" + + "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t" + "[[1.12345],[1.12345],[1.12345]]\t" + "[[1.1234567891],[1.1234567891],[1.1234567891]]\t" + "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t" @@ -156,25 +127,33 @@ def test_postgres_conversions(started_cluster): ) assert(result == expected) + cursor.execute(f'DROP TABLE test_types') + cursor.execute(f'DROP TABLE test_array_dimensions') + def test_non_default_scema(started_cluster): - conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) - cursor = conn.cursor() + node1.query('DROP TABLE IF EXISTS test_pg_table_schema') + node1.query('DROP TABLE IF EXISTS test_pg_table_schema_with_dots') + + cursor = started_cluster.postgres_conn.cursor() + cursor.execute('DROP SCHEMA IF EXISTS test_schema CASCADE') + cursor.execute('DROP SCHEMA IF EXISTS "test.nice.schema" CASCADE') + cursor.execute('CREATE SCHEMA test_schema') cursor.execute('CREATE TABLE test_schema.test_table (a integer)') cursor.execute('INSERT INTO test_schema.test_table SELECT i FROM generate_series(0, 99) as t(i)') node1.query(''' CREATE TABLE test_pg_table_schema (a UInt32) - ENGINE PostgreSQL('postgres1:5432', 'clickhouse', 'test_table', 'postgres', 'mysecretpassword', 'test_schema'); + ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword', 'test_schema'); ''') result = node1.query('SELECT * FROM test_pg_table_schema') expected = node1.query('SELECT number FROM numbers(100)') assert(result == expected) - table_function = '''postgresql('postgres1:5432', 'clickhouse', 'test_table', 'postgres', 'mysecretpassword', 'test_schema')''' - result = node1.query('SELECT * FROM {}'.format(table_function)) + table_function = '''postgresql('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword', 'test_schema')''' + result = node1.query(f'SELECT * FROM {table_function}') assert(result == expected) cursor.execute('''CREATE SCHEMA "test.nice.schema"''') @@ -183,24 +162,28 @@ def test_non_default_scema(started_cluster): node1.query(''' CREATE TABLE test_pg_table_schema_with_dots (a UInt32) - ENGINE PostgreSQL('postgres1:5432', 'clickhouse', 'test.nice.table', 'postgres', 'mysecretpassword', 'test.nice.schema'); + ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test.nice.table', 'postgres', 'mysecretpassword', 'test.nice.schema'); ''') result = node1.query('SELECT * FROM test_pg_table_schema_with_dots') assert(result == expected) cursor.execute('INSERT INTO "test_schema"."test_table" SELECT i FROM generate_series(100, 199) as t(i)') - result = node1.query('SELECT * FROM {}'.format(table_function)) + result = node1.query(f'SELECT * FROM {table_function}') expected = node1.query('SELECT number FROM numbers(200)') assert(result == expected) + cursor.execute('DROP SCHEMA test_schema CASCADE') + cursor.execute('DROP SCHEMA "test.nice.schema" CASCADE') + node1.query('DROP TABLE test_pg_table_schema') + node1.query('DROP TABLE test_pg_table_schema_with_dots') + def test_concurrent_queries(started_cluster): - conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) - cursor = conn.cursor() + cursor = started_cluster.postgres_conn.cursor() node1.query(''' CREATE TABLE test_table (key UInt32, value UInt32) - ENGINE = PostgreSQL('postgres1:5432', 'clickhouse', 'test_table', 'postgres', 'mysecretpassword')''') + ENGINE = PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword')''') cursor.execute('CREATE TABLE test_table (key integer, value integer)') @@ -212,7 +195,7 @@ def test_concurrent_queries(started_cluster): p = busy_pool.map_async(node_select, range(20)) p.wait() count = node1.count_in_log('New connection to postgres1:5432') - print(count, prev_count) + logging.debug(f'count {count}, prev_count {prev_count}') # 16 is default size for connection pool assert(int(count) <= int(prev_count) + 16) @@ -224,7 +207,7 @@ def test_concurrent_queries(started_cluster): p = busy_pool.map_async(node_insert, range(5)) p.wait() result = node1.query("SELECT count() FROM test_table", user='default') - print(result) + logging.debug(result) assert(int(result) == 5 * 5 * 1000) def node_insert_select(_): @@ -236,44 +219,41 @@ def test_concurrent_queries(started_cluster): p = busy_pool.map_async(node_insert_select, range(5)) p.wait() result = node1.query("SELECT count() FROM test_table", user='default') - print(result) + logging.debug(result) assert(int(result) == 5 * 5 * 1000 * 2) node1.query('DROP TABLE test_table;') cursor.execute('DROP TABLE test_table;') count = node1.count_in_log('New connection to postgres1:5432') - print(count, prev_count) + logging.debug(f'count {count}, prev_count {prev_count}') assert(int(count) <= int(prev_count) + 16) def test_postgres_distributed(started_cluster): - conn0 = get_postgres_conn(started_cluster, started_cluster.postgres_ip, database=True) - conn1 = get_postgres_conn(started_cluster, started_cluster.postgres2_ip, database=True) - conn2 = get_postgres_conn(started_cluster, started_cluster.postgres3_ip, database=True) - conn3 = get_postgres_conn(started_cluster, started_cluster.postgres4_ip, database=True) - - cursor0 = conn0.cursor() - cursor1 = conn1.cursor() - cursor2 = conn2.cursor() - cursor3 = conn3.cursor() + cursor0 = started_cluster.postgres_conn.cursor() + cursor1 = started_cluster.postgres2_conn.cursor() + cursor2 = started_cluster.postgres3_conn.cursor() + cursor3 = started_cluster.postgres4_conn.cursor() cursors = [cursor0, cursor1, cursor2, cursor3] for i in range(4): + cursors[i].execute('DROP TABLE IF EXISTS test_replicas') cursors[i].execute('CREATE TABLE test_replicas (id Integer, name Text)') - cursors[i].execute("""INSERT INTO test_replicas select i, 'host{}' from generate_series(0, 99) as t(i);""".format(i + 1)); + cursors[i].execute(f"""INSERT INTO test_replicas select i, 'host{i+1}' from generate_series(0, 99) as t(i);"""); # test multiple ports parsing - result = node2.query('''SELECT DISTINCT(name) FROM postgresql(`postgres{1|2|3}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword'); ''') + result = node2.query('''SELECT DISTINCT(name) FROM postgresql(`postgres{1|2|3}:5432`, 'postgres', 'test_replicas', 'postgres', 'mysecretpassword'); ''') assert(result == 'host1\n' or result == 'host2\n' or result == 'host3\n') - result = node2.query('''SELECT DISTINCT(name) FROM postgresql(`postgres2:5431|postgres3:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword'); ''') + result = node2.query('''SELECT DISTINCT(name) FROM postgresql(`postgres2:5431|postgres3:5432`, 'postgres', 'test_replicas', 'postgres', 'mysecretpassword'); ''') assert(result == 'host3\n' or result == 'host2\n') # Create storage with with 3 replicas + node2.query('DROP TABLE IF EXISTS test_replicas') node2.query(''' CREATE TABLE test_replicas (id UInt32, name String) - ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword'); ''') + ENGINE = PostgreSQL(`postgres{2|3|4}:5432`, 'postgres', 'test_replicas', 'postgres', 'mysecretpassword'); ''') # Check all replicas are traversed query = "SELECT name FROM (" @@ -284,10 +264,12 @@ def test_postgres_distributed(started_cluster): assert(result == 'host2\nhost3\nhost4\n') # Create storage with with two two shards, each has 2 replicas + node2.query('DROP TABLE IF EXISTS test_shards') + node2.query(''' CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) - ENGINE = ExternalDistributed('PostgreSQL', `postgres{1|2}:5432,postgres{3|4}:5432`, 'clickhouse', 'test_replicas', 'postgres', 'mysecretpassword'); ''') + ENGINE = ExternalDistributed('PostgreSQL', `postgres{1|2}:5432,postgres{3|4}:5432`, 'postgres', 'test_replicas', 'postgres', 'mysecretpassword'); ''') # Check only one replica in each shard is used result = node2.query("SELECT DISTINCT(name) FROM test_shards ORDER BY name") @@ -306,26 +288,32 @@ def test_postgres_distributed(started_cluster): result = node2.query("SELECT DISTINCT(name) FROM test_shards ORDER BY name") started_cluster.unpause_container('postgres1') assert(result == 'host2\nhost4\n' or result == 'host3\nhost4\n') + node2.query('DROP TABLE test_shards') + node2.query('DROP TABLE test_replicas') def test_datetime_with_timezone(started_cluster): - conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) - cursor = conn.cursor() + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS test_timezone") + node1.query("DROP TABLE IF EXISTS test_timezone") cursor.execute("CREATE TABLE test_timezone (ts timestamp without time zone, ts_z timestamp with time zone)") cursor.execute("insert into test_timezone select '2014-04-04 20:00:00', '2014-04-04 20:00:00'::timestamptz at time zone 'America/New_York';") cursor.execute("select * from test_timezone") result = cursor.fetchall()[0] - print(result[0], str(result[1])[:-6]) - node1.query("create table test_timezone ( ts DateTime, ts_z DateTime('America/New_York')) ENGINE PostgreSQL('postgres1:5432', 'clickhouse', 'test_timezone', 'postgres', 'mysecretpassword');") + logging.debug(f'{result[0]}, {str(result[1])[:-6]}') + node1.query("create table test_timezone ( ts DateTime, ts_z DateTime('America/New_York')) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_timezone', 'postgres', 'mysecretpassword');") assert(node1.query("select ts from test_timezone").strip() == str(result[0])) # [:-6] because 2014-04-04 16:00:00+00:00 -> 2014-04-04 16:00:00 assert(node1.query("select ts_z from test_timezone").strip() == str(result[1])[:-6]) assert(node1.query("select * from test_timezone") == "2014-04-04 20:00:00\t2014-04-04 16:00:00\n") + cursor.execute("DROP TABLE test_timezone") + node1.query("DROP TABLE test_timezone") def test_postgres_ndim(started_cluster): - conn = get_postgres_conn(started_cluster, started_cluster.postgres_ip, True) - cursor = conn.cursor() + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS arr1, arr2") + cursor.execute('CREATE TABLE arr1 (a Integer[])') cursor.execute("INSERT INTO arr1 SELECT '{{1}, {2}}'") @@ -335,8 +323,9 @@ def test_postgres_ndim(started_cluster): result = cursor.fetchall()[0] assert(int(result[0]) == 0) - result = node1.query('''SELECT toTypeName(a) FROM postgresql('postgres1:5432', 'clickhouse', 'arr2', 'postgres', 'mysecretpassword')''') + result = node1.query('''SELECT toTypeName(a) FROM postgresql('postgres1:5432', 'postgres', 'arr2', 'postgres', 'mysecretpassword')''') assert(result.strip() == "Array(Array(Nullable(Int32)))") + cursor.execute("DROP TABLE arr1, arr2") if __name__ == '__main__': diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 1ba29975202..5908def8297 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -198,12 +198,14 @@ def test_empty_put(started_cluster, auth): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + drop_empty_table_query = "DROP TABLE IF EXISTS empty_table" create_empty_table_query = """ CREATE TABLE empty_table ( {} ) ENGINE = Null() """.format(table_format) + run_query(instance, drop_empty_table_query) run_query(instance, create_empty_table_query) filename = "empty_put_test.csv" @@ -305,22 +307,22 @@ def test_put_with_zero_redirect(started_cluster): def test_put_get_with_globs(started_cluster): # type: (ClickHouseCluster) -> None - + unique_prefix = random.randint(1,10000) bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" for i in range(10): for j in range(10): - path = "{}_{}/{}.csv".format(i, random.choice(['a', 'b', 'c', 'd']), j) + path = "{}/{}_{}/{}.csv".format(unique_prefix, i, random.choice(['a', 'b', 'c', 'd']), j) max_path = max(path, max_path) values = "({},{},{})".format(i, j, i + j) query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, path, table_format, values) run_query(instance, query) - query = "select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from s3('http://{}:{}/{}/*_{{a,b,c,d}}/%3f.csv', 'CSV', '{}')".format( - started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, bucket, table_format) + query = "select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from s3('http://{}:{}/{}/{}/*_{{a,b,c,d}}/%3f.csv', 'CSV', '{}')".format( + started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, bucket, unique_prefix, table_format) assert run_query(instance, query).splitlines() == [ "450\t450\t900\t0.csv\t{bucket}/{max_path}".format(bucket=bucket, max_path=max_path)] @@ -479,6 +481,7 @@ def test_custom_auth_headers(started_cluster): result = run_query(instance, get_query) assert result == '1\t2\t3\n' + instance.query("DROP TABLE IF EXISTS test") instance.query( "CREATE TABLE test ({table_format}) ENGINE = S3('http://resolver:8080/{bucket}/{file}', 'CSV')".format( bucket=started_cluster.minio_restricted_bucket, @@ -494,6 +497,7 @@ def test_custom_auth_headers(started_cluster): replace_config("
Authorization: Bearer INVALID_TOKEN", "
Authorization: Bearer TOKEN") instance.query("SYSTEM RELOAD CONFIG") assert run_query(instance, "SELECT * FROM test") == '1\t2\t3\n' + instance.query("DROP TABLE test") def test_custom_auth_headers_exclusion(started_cluster): @@ -551,6 +555,8 @@ def test_storage_s3_get_gzip(started_cluster, extension, method): "Norman Ortega,33", "" ] + run_query(instance, f"DROP TABLE IF EXISTS {name}") + buf = io.BytesIO() compressed = gzip.GzipFile(fileobj=buf, mode="wb") compressed.write(("\n".join(data)).encode()) @@ -562,7 +568,8 @@ def test_storage_s3_get_gzip(started_cluster, extension, method): 'CSV', '{method}')""") - run_query(instance, "SELECT sum(id) FROM {}".format(name)).splitlines() == ["565"] + run_query(instance, f"SELECT sum(id) FROM {name}").splitlines() == ["565"] + run_query(instance, f"DROP TABLE {name}") def test_storage_s3_get_unstable(started_cluster): diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 4f8a61a5bf0..f6164516981 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -26,6 +26,7 @@ def start_cluster(): def test_mutate_and_upgrade(start_cluster): for node in [node1, node2]: + node.query("DROP TABLE IF EXISTS mt") node.query( "CREATE TABLE mt (EventDate Date, id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{}') ORDER BY tuple()".format( node.name)) @@ -67,8 +68,13 @@ def test_mutate_and_upgrade(start_cluster): assert node1.query("SELECT id FROM mt") == "1\n4\n" assert node2.query("SELECT id FROM mt") == "1\n4\n" + for node in [node1, node2]: + node.query("DROP TABLE mt") + def test_upgrade_while_mutation(start_cluster): + node3.query("DROP TABLE IF EXISTS mt1") + node3.query( "CREATE TABLE mt1 (EventDate Date, id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', 'node3') ORDER BY tuple()") @@ -86,3 +92,5 @@ def test_upgrade_while_mutation(start_cluster): # will delete nothing, but previous async mutation will finish with this query assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n") + + node3.query("DROP TABLE mt1") diff --git a/tests/performance/lot_of_subcolumns.xml b/tests/performance/lot_of_subcolumns.xml new file mode 100644 index 00000000000..d33a7704d70 --- /dev/null +++ b/tests/performance/lot_of_subcolumns.xml @@ -0,0 +1,23 @@ + + + CREATE TABLE lot_of_arrays(id UInt64, + `nested.arr0` Array(UInt64), `nested.arr1` Array(UInt64), `nested.arr2` Array(UInt64), `nested.arr3` Array(UInt64), `nested.arr4` Array(UInt64), `nested.arr5` Array(UInt64), `nested.arr6` Array(UInt64), `nested.arr7` Array(UInt64), `nested.arr8` Array(UInt64), `nested.arr9` Array(UInt64), `nested.arr10` Array(UInt64), `nested.arr11` Array(UInt64), `nested.arr12` Array(UInt64), `nested.arr13` Array(UInt64), `nested.arr14` Array(UInt64), `nested.arr15` Array(UInt64), `nested.arr16` Array(UInt64), `nested.arr17` Array(UInt64), `nested.arr18` Array(UInt64), `nested.arr19` Array(UInt64), `nested.arr20` Array(UInt64), `nested.arr21` Array(UInt64), `nested.arr22` Array(UInt64), `nested.arr23` Array(UInt64), `nested.arr24` Array(UInt64), `nested.arr25` Array(UInt64), `nested.arr26` Array(UInt64), `nested.arr27` Array(UInt64), `nested.arr28` Array(UInt64), `nested.arr29` Array(UInt64), `nested.arr30` Array(UInt64), `nested.arr31` Array(UInt64), `nested.arr32` Array(UInt64), `nested.arr33` Array(UInt64), `nested.arr34` Array(UInt64), `nested.arr35` Array(UInt64), `nested.arr36` Array(UInt64), `nested.arr37` Array(UInt64), `nested.arr38` Array(UInt64), `nested.arr39` Array(UInt64), `nested.arr40` Array(UInt64), `nested.arr41` Array(UInt64), `nested.arr42` Array(UInt64), `nested.arr43` Array(UInt64), `nested.arr44` Array(UInt64), `nested.arr45` Array(UInt64), `nested.arr46` Array(UInt64), `nested.arr47` Array(UInt64), `nested.arr48` Array(UInt64), `nested.arr49` Array(UInt64), `nested.arr50` Array(UInt64), `nested.arr51` Array(UInt64), `nested.arr52` Array(UInt64), `nested.arr53` Array(UInt64), `nested.arr54` Array(UInt64), `nested.arr55` Array(UInt64), `nested.arr56` Array(UInt64), `nested.arr57` Array(UInt64), `nested.arr58` Array(UInt64), `nested.arr59` Array(UInt64), `nested.arr60` Array(UInt64), `nested.arr61` Array(UInt64), `nested.arr62` Array(UInt64), `nested.arr63` Array(UInt64), `nested.arr64` Array(UInt64), `nested.arr65` Array(UInt64), `nested.arr66` Array(UInt64), `nested.arr67` Array(UInt64), `nested.arr68` Array(UInt64), `nested.arr69` Array(UInt64), `nested.arr70` Array(UInt64), `nested.arr71` Array(UInt64), `nested.arr72` Array(UInt64), `nested.arr73` Array(UInt64), `nested.arr74` Array(UInt64), `nested.arr75` Array(UInt64), `nested.arr76` Array(UInt64), `nested.arr77` Array(UInt64), `nested.arr78` Array(UInt64), `nested.arr79` Array(UInt64), `nested.arr80` Array(UInt64), `nested.arr81` Array(UInt64), `nested.arr82` Array(UInt64), `nested.arr83` Array(UInt64), `nested.arr84` Array(UInt64), `nested.arr85` Array(UInt64), `nested.arr86` Array(UInt64), `nested.arr87` Array(UInt64), `nested.arr88` Array(UInt64), `nested.arr89` Array(UInt64), `nested.arr90` Array(UInt64), `nested.arr91` Array(UInt64), `nested.arr92` Array(UInt64), `nested.arr93` Array(UInt64), `nested.arr94` Array(UInt64), `nested.arr95` Array(UInt64), `nested.arr96` Array(UInt64), `nested.arr97` Array(UInt64), `nested.arr98` Array(UInt64), `nested.arr99` Array(UInt64), + `nested.arr100` Array(UInt64), `nested.arr101` Array(UInt64), `nested.arr102` Array(UInt64), `nested.arr103` Array(UInt64), `nested.arr104` Array(UInt64), `nested.arr105` Array(UInt64), `nested.arr106` Array(UInt64), `nested.arr107` Array(UInt64), `nested.arr108` Array(UInt64), `nested.arr109` Array(UInt64), `nested.arr110` Array(UInt64), `nested.arr111` Array(UInt64), `nested.arr112` Array(UInt64), `nested.arr113` Array(UInt64), `nested.arr114` Array(UInt64), `nested.arr115` Array(UInt64), `nested.arr116` Array(UInt64), `nested.arr117` Array(UInt64), `nested.arr118` Array(UInt64), `nested.arr119` Array(UInt64), `nested.arr120` Array(UInt64), `nested.arr121` Array(UInt64), `nested.arr122` Array(UInt64), `nested.arr123` Array(UInt64), `nested.arr124` Array(UInt64), `nested.arr125` Array(UInt64), `nested.arr126` Array(UInt64), `nested.arr127` Array(UInt64), `nested.arr128` Array(UInt64), `nested.arr129` Array(UInt64), `nested.arr130` Array(UInt64), `nested.arr131` Array(UInt64), `nested.arr132` Array(UInt64), `nested.arr133` Array(UInt64), `nested.arr134` Array(UInt64), `nested.arr135` Array(UInt64), `nested.arr136` Array(UInt64), `nested.arr137` Array(UInt64), `nested.arr138` Array(UInt64), `nested.arr139` Array(UInt64), `nested.arr140` Array(UInt64), `nested.arr141` Array(UInt64), `nested.arr142` Array(UInt64), `nested.arr143` Array(UInt64), `nested.arr144` Array(UInt64), `nested.arr145` Array(UInt64), `nested.arr146` Array(UInt64), `nested.arr147` Array(UInt64), `nested.arr148` Array(UInt64), `nested.arr149` Array(UInt64), `nested.arr150` Array(UInt64), `nested.arr151` Array(UInt64), `nested.arr152` Array(UInt64), `nested.arr153` Array(UInt64), `nested.arr154` Array(UInt64), `nested.arr155` Array(UInt64), `nested.arr156` Array(UInt64), `nested.arr157` Array(UInt64), `nested.arr158` Array(UInt64), `nested.arr159` Array(UInt64), `nested.arr160` Array(UInt64), `nested.arr161` Array(UInt64), `nested.arr162` Array(UInt64), `nested.arr163` Array(UInt64), `nested.arr164` Array(UInt64), `nested.arr165` Array(UInt64), `nested.arr166` Array(UInt64), `nested.arr167` Array(UInt64), `nested.arr168` Array(UInt64), `nested.arr169` Array(UInt64), `nested.arr170` Array(UInt64), `nested.arr171` Array(UInt64), `nested.arr172` Array(UInt64), `nested.arr173` Array(UInt64), `nested.arr174` Array(UInt64), `nested.arr175` Array(UInt64), `nested.arr176` Array(UInt64), `nested.arr177` Array(UInt64), `nested.arr178` Array(UInt64), `nested.arr179` Array(UInt64), `nested.arr180` Array(UInt64), `nested.arr181` Array(UInt64), `nested.arr182` Array(UInt64), `nested.arr183` Array(UInt64), `nested.arr184` Array(UInt64), `nested.arr185` Array(UInt64), `nested.arr186` Array(UInt64), `nested.arr187` Array(UInt64), `nested.arr188` Array(UInt64), `nested.arr189` Array(UInt64), `nested.arr190` Array(UInt64), `nested.arr191` Array(UInt64), `nested.arr192` Array(UInt64), `nested.arr193` Array(UInt64), `nested.arr194` Array(UInt64), `nested.arr195` Array(UInt64), `nested.arr196` Array(UInt64), `nested.arr197` Array(UInt64), `nested.arr198` Array(UInt64), `nested.arr199` Array(UInt64), + `nested.arr200` Array(UInt64), `nested.arr201` Array(UInt64), `nested.arr202` Array(UInt64), `nested.arr203` Array(UInt64), `nested.arr204` Array(UInt64), `nested.arr205` Array(UInt64), `nested.arr206` Array(UInt64), `nested.arr207` Array(UInt64), `nested.arr208` Array(UInt64), `nested.arr209` Array(UInt64), `nested.arr210` Array(UInt64), `nested.arr211` Array(UInt64), `nested.arr212` Array(UInt64), `nested.arr213` Array(UInt64), `nested.arr214` Array(UInt64), `nested.arr215` Array(UInt64), `nested.arr216` Array(UInt64), `nested.arr217` Array(UInt64), `nested.arr218` Array(UInt64), `nested.arr219` Array(UInt64), `nested.arr220` Array(UInt64), `nested.arr221` Array(UInt64), `nested.arr222` Array(UInt64), `nested.arr223` Array(UInt64), `nested.arr224` Array(UInt64), `nested.arr225` Array(UInt64), `nested.arr226` Array(UInt64), `nested.arr227` Array(UInt64), `nested.arr228` Array(UInt64), `nested.arr229` Array(UInt64), `nested.arr230` Array(UInt64), `nested.arr231` Array(UInt64), `nested.arr232` Array(UInt64), `nested.arr233` Array(UInt64), `nested.arr234` Array(UInt64), `nested.arr235` Array(UInt64), `nested.arr236` Array(UInt64), `nested.arr237` Array(UInt64), `nested.arr238` Array(UInt64), `nested.arr239` Array(UInt64), `nested.arr240` Array(UInt64), `nested.arr241` Array(UInt64), `nested.arr242` Array(UInt64), `nested.arr243` Array(UInt64), `nested.arr244` Array(UInt64), `nested.arr245` Array(UInt64), `nested.arr246` Array(UInt64), `nested.arr247` Array(UInt64), `nested.arr248` Array(UInt64), `nested.arr249` Array(UInt64), `nested.arr250` Array(UInt64), `nested.arr251` Array(UInt64), `nested.arr252` Array(UInt64), `nested.arr253` Array(UInt64), `nested.arr254` Array(UInt64), `nested.arr255` Array(UInt64), `nested.arr256` Array(UInt64), `nested.arr257` Array(UInt64), `nested.arr258` Array(UInt64), `nested.arr259` Array(UInt64), `nested.arr260` Array(UInt64), `nested.arr261` Array(UInt64), `nested.arr262` Array(UInt64), `nested.arr263` Array(UInt64), `nested.arr264` Array(UInt64), `nested.arr265` Array(UInt64), `nested.arr266` Array(UInt64), `nested.arr267` Array(UInt64), `nested.arr268` Array(UInt64), `nested.arr269` Array(UInt64), `nested.arr270` Array(UInt64), `nested.arr271` Array(UInt64), `nested.arr272` Array(UInt64), `nested.arr273` Array(UInt64), `nested.arr274` Array(UInt64), `nested.arr275` Array(UInt64), `nested.arr276` Array(UInt64), `nested.arr277` Array(UInt64), `nested.arr278` Array(UInt64), `nested.arr279` Array(UInt64), `nested.arr280` Array(UInt64), `nested.arr281` Array(UInt64), `nested.arr282` Array(UInt64), `nested.arr283` Array(UInt64), `nested.arr284` Array(UInt64), `nested.arr285` Array(UInt64), `nested.arr286` Array(UInt64), `nested.arr287` Array(UInt64), `nested.arr288` Array(UInt64), `nested.arr289` Array(UInt64), `nested.arr290` Array(UInt64), `nested.arr291` Array(UInt64), `nested.arr292` Array(UInt64), `nested.arr293` Array(UInt64), `nested.arr294` Array(UInt64), `nested.arr295` Array(UInt64), `nested.arr296` Array(UInt64), `nested.arr297` Array(UInt64), `nested.arr298` Array(UInt64), `nested.arr299` Array(UInt64), + `nested.arr300` Array(UInt64), `nested.arr301` Array(UInt64), `nested.arr302` Array(UInt64), `nested.arr303` Array(UInt64), `nested.arr304` Array(UInt64), `nested.arr305` Array(UInt64), `nested.arr306` Array(UInt64), `nested.arr307` Array(UInt64), `nested.arr308` Array(UInt64), `nested.arr309` Array(UInt64), `nested.arr310` Array(UInt64), `nested.arr311` Array(UInt64), `nested.arr312` Array(UInt64), `nested.arr313` Array(UInt64), `nested.arr314` Array(UInt64), `nested.arr315` Array(UInt64), `nested.arr316` Array(UInt64), `nested.arr317` Array(UInt64), `nested.arr318` Array(UInt64), `nested.arr319` Array(UInt64), `nested.arr320` Array(UInt64), `nested.arr321` Array(UInt64), `nested.arr322` Array(UInt64), `nested.arr323` Array(UInt64), `nested.arr324` Array(UInt64), `nested.arr325` Array(UInt64), `nested.arr326` Array(UInt64), `nested.arr327` Array(UInt64), `nested.arr328` Array(UInt64), `nested.arr329` Array(UInt64), `nested.arr330` Array(UInt64), `nested.arr331` Array(UInt64), `nested.arr332` Array(UInt64), `nested.arr333` Array(UInt64), `nested.arr334` Array(UInt64), `nested.arr335` Array(UInt64), `nested.arr336` Array(UInt64), `nested.arr337` Array(UInt64), `nested.arr338` Array(UInt64), `nested.arr339` Array(UInt64), `nested.arr340` Array(UInt64), `nested.arr341` Array(UInt64), `nested.arr342` Array(UInt64), `nested.arr343` Array(UInt64), `nested.arr344` Array(UInt64), `nested.arr345` Array(UInt64), `nested.arr346` Array(UInt64), `nested.arr347` Array(UInt64), `nested.arr348` Array(UInt64), `nested.arr349` Array(UInt64), `nested.arr350` Array(UInt64), `nested.arr351` Array(UInt64), `nested.arr352` Array(UInt64), `nested.arr353` Array(UInt64), `nested.arr354` Array(UInt64), `nested.arr355` Array(UInt64), `nested.arr356` Array(UInt64), `nested.arr357` Array(UInt64), `nested.arr358` Array(UInt64), `nested.arr359` Array(UInt64), `nested.arr360` Array(UInt64), `nested.arr361` Array(UInt64), `nested.arr362` Array(UInt64), `nested.arr363` Array(UInt64), `nested.arr364` Array(UInt64), `nested.arr365` Array(UInt64), `nested.arr366` Array(UInt64), `nested.arr367` Array(UInt64), `nested.arr368` Array(UInt64), `nested.arr369` Array(UInt64), `nested.arr370` Array(UInt64), `nested.arr371` Array(UInt64), `nested.arr372` Array(UInt64), `nested.arr373` Array(UInt64), `nested.arr374` Array(UInt64), `nested.arr375` Array(UInt64), `nested.arr376` Array(UInt64), `nested.arr377` Array(UInt64), `nested.arr378` Array(UInt64), `nested.arr379` Array(UInt64), `nested.arr380` Array(UInt64), `nested.arr381` Array(UInt64), `nested.arr382` Array(UInt64), `nested.arr383` Array(UInt64), `nested.arr384` Array(UInt64), `nested.arr385` Array(UInt64), `nested.arr386` Array(UInt64), `nested.arr387` Array(UInt64), `nested.arr388` Array(UInt64), `nested.arr389` Array(UInt64), `nested.arr390` Array(UInt64), `nested.arr391` Array(UInt64), `nested.arr392` Array(UInt64), `nested.arr393` Array(UInt64), `nested.arr394` Array(UInt64), `nested.arr395` Array(UInt64), `nested.arr396` Array(UInt64), `nested.arr397` Array(UInt64), `nested.arr398` Array(UInt64), `nested.arr399` Array(UInt64), + `nested.arr400` Array(UInt64), `nested.arr401` Array(UInt64), `nested.arr402` Array(UInt64), `nested.arr403` Array(UInt64), `nested.arr404` Array(UInt64), `nested.arr405` Array(UInt64), `nested.arr406` Array(UInt64), `nested.arr407` Array(UInt64), `nested.arr408` Array(UInt64), `nested.arr409` Array(UInt64), `nested.arr410` Array(UInt64), `nested.arr411` Array(UInt64), `nested.arr412` Array(UInt64), `nested.arr413` Array(UInt64), `nested.arr414` Array(UInt64), `nested.arr415` Array(UInt64), `nested.arr416` Array(UInt64), `nested.arr417` Array(UInt64), `nested.arr418` Array(UInt64), `nested.arr419` Array(UInt64), `nested.arr420` Array(UInt64), `nested.arr421` Array(UInt64), `nested.arr422` Array(UInt64), `nested.arr423` Array(UInt64), `nested.arr424` Array(UInt64), `nested.arr425` Array(UInt64), `nested.arr426` Array(UInt64), `nested.arr427` Array(UInt64), `nested.arr428` Array(UInt64), `nested.arr429` Array(UInt64), `nested.arr430` Array(UInt64), `nested.arr431` Array(UInt64), `nested.arr432` Array(UInt64), `nested.arr433` Array(UInt64), `nested.arr434` Array(UInt64), `nested.arr435` Array(UInt64), `nested.arr436` Array(UInt64), `nested.arr437` Array(UInt64), `nested.arr438` Array(UInt64), `nested.arr439` Array(UInt64), `nested.arr440` Array(UInt64), `nested.arr441` Array(UInt64), `nested.arr442` Array(UInt64), `nested.arr443` Array(UInt64), `nested.arr444` Array(UInt64), `nested.arr445` Array(UInt64), `nested.arr446` Array(UInt64), `nested.arr447` Array(UInt64), `nested.arr448` Array(UInt64), `nested.arr449` Array(UInt64), `nested.arr450` Array(UInt64), `nested.arr451` Array(UInt64), `nested.arr452` Array(UInt64), `nested.arr453` Array(UInt64), `nested.arr454` Array(UInt64), `nested.arr455` Array(UInt64), `nested.arr456` Array(UInt64), `nested.arr457` Array(UInt64), `nested.arr458` Array(UInt64), `nested.arr459` Array(UInt64), `nested.arr460` Array(UInt64), `nested.arr461` Array(UInt64), `nested.arr462` Array(UInt64), `nested.arr463` Array(UInt64), `nested.arr464` Array(UInt64), `nested.arr465` Array(UInt64), `nested.arr466` Array(UInt64), `nested.arr467` Array(UInt64), `nested.arr468` Array(UInt64), `nested.arr469` Array(UInt64), `nested.arr470` Array(UInt64), `nested.arr471` Array(UInt64), `nested.arr472` Array(UInt64), `nested.arr473` Array(UInt64), `nested.arr474` Array(UInt64), `nested.arr475` Array(UInt64), `nested.arr476` Array(UInt64), `nested.arr477` Array(UInt64), `nested.arr478` Array(UInt64), `nested.arr479` Array(UInt64), `nested.arr480` Array(UInt64), `nested.arr481` Array(UInt64), `nested.arr482` Array(UInt64), `nested.arr483` Array(UInt64), `nested.arr484` Array(UInt64), `nested.arr485` Array(UInt64), `nested.arr486` Array(UInt64), `nested.arr487` Array(UInt64), `nested.arr488` Array(UInt64), `nested.arr489` Array(UInt64), `nested.arr490` Array(UInt64), `nested.arr491` Array(UInt64), `nested.arr492` Array(UInt64), `nested.arr493` Array(UInt64), `nested.arr494` Array(UInt64), `nested.arr495` Array(UInt64), `nested.arr496` Array(UInt64), `nested.arr497` Array(UInt64), `nested.arr498` Array(UInt64), `nested.arr499` Array(UInt64), + arr500 Array(Array(Nullable(UInt64))), arr501 Array(Array(Nullable(UInt64))), arr502 Array(Array(Nullable(UInt64))), arr503 Array(Array(Nullable(UInt64))), arr504 Array(Array(Nullable(UInt64))), arr505 Array(Array(Nullable(UInt64))), arr506 Array(Array(Nullable(UInt64))), arr507 Array(Array(Nullable(UInt64))), arr508 Array(Array(Nullable(UInt64))), arr509 Array(Array(Nullable(UInt64))), arr510 Array(Array(Nullable(UInt64))), arr511 Array(Array(Nullable(UInt64))), arr512 Array(Array(Nullable(UInt64))), arr513 Array(Array(Nullable(UInt64))), arr514 Array(Array(Nullable(UInt64))), arr515 Array(Array(Nullable(UInt64))), arr516 Array(Array(Nullable(UInt64))), arr517 Array(Array(Nullable(UInt64))), arr518 Array(Array(Nullable(UInt64))), arr519 Array(Array(Nullable(UInt64))), arr520 Array(Array(Nullable(UInt64))), arr521 Array(Array(Nullable(UInt64))), arr522 Array(Array(Nullable(UInt64))), arr523 Array(Array(Nullable(UInt64))), arr524 Array(Array(Nullable(UInt64))), arr525 Array(Array(Nullable(UInt64))), arr526 Array(Array(Nullable(UInt64))), arr527 Array(Array(Nullable(UInt64))), arr528 Array(Array(Nullable(UInt64))), arr529 Array(Array(Nullable(UInt64))), arr530 Array(Array(Nullable(UInt64))), arr531 Array(Array(Nullable(UInt64))), arr532 Array(Array(Nullable(UInt64))), arr533 Array(Array(Nullable(UInt64))), arr534 Array(Array(Nullable(UInt64))), arr535 Array(Array(Nullable(UInt64))), arr536 Array(Array(Nullable(UInt64))), arr537 Array(Array(Nullable(UInt64))), arr538 Array(Array(Nullable(UInt64))), arr539 Array(Array(Nullable(UInt64))), arr540 Array(Array(Nullable(UInt64))), arr541 Array(Array(Nullable(UInt64))), arr542 Array(Array(Nullable(UInt64))), arr543 Array(Array(Nullable(UInt64))), arr544 Array(Array(Nullable(UInt64))), arr545 Array(Array(Nullable(UInt64))), arr546 Array(Array(Nullable(UInt64))), arr547 Array(Array(Nullable(UInt64))), arr548 Array(Array(Nullable(UInt64))), arr549 Array(Array(Nullable(UInt64))), arr550 Array(Array(Nullable(UInt64))), arr551 Array(Array(Nullable(UInt64))), arr552 Array(Array(Nullable(UInt64))), arr553 Array(Array(Nullable(UInt64))), arr554 Array(Array(Nullable(UInt64))), arr555 Array(Array(Nullable(UInt64))), arr556 Array(Array(Nullable(UInt64))), arr557 Array(Array(Nullable(UInt64))), arr558 Array(Array(Nullable(UInt64))), arr559 Array(Array(Nullable(UInt64))), arr560 Array(Array(Nullable(UInt64))), arr561 Array(Array(Nullable(UInt64))), arr562 Array(Array(Nullable(UInt64))), arr563 Array(Array(Nullable(UInt64))), arr564 Array(Array(Nullable(UInt64))), arr565 Array(Array(Nullable(UInt64))), arr566 Array(Array(Nullable(UInt64))), arr567 Array(Array(Nullable(UInt64))), arr568 Array(Array(Nullable(UInt64))), arr569 Array(Array(Nullable(UInt64))), arr570 Array(Array(Nullable(UInt64))), arr571 Array(Array(Nullable(UInt64))), arr572 Array(Array(Nullable(UInt64))), arr573 Array(Array(Nullable(UInt64))), arr574 Array(Array(Nullable(UInt64))), arr575 Array(Array(Nullable(UInt64))), arr576 Array(Array(Nullable(UInt64))), arr577 Array(Array(Nullable(UInt64))), arr578 Array(Array(Nullable(UInt64))), arr579 Array(Array(Nullable(UInt64))), arr580 Array(Array(Nullable(UInt64))), arr581 Array(Array(Nullable(UInt64))), arr582 Array(Array(Nullable(UInt64))), arr583 Array(Array(Nullable(UInt64))), arr584 Array(Array(Nullable(UInt64))), arr585 Array(Array(Nullable(UInt64))), arr586 Array(Array(Nullable(UInt64))), arr587 Array(Array(Nullable(UInt64))), arr588 Array(Array(Nullable(UInt64))), arr589 Array(Array(Nullable(UInt64))), arr590 Array(Array(Nullable(UInt64))), arr591 Array(Array(Nullable(UInt64))), arr592 Array(Array(Nullable(UInt64))), arr593 Array(Array(Nullable(UInt64))), arr594 Array(Array(Nullable(UInt64))), arr595 Array(Array(Nullable(UInt64))), arr596 Array(Array(Nullable(UInt64))), arr597 Array(Array(Nullable(UInt64))), arr598 Array(Array(Nullable(UInt64))), arr599 Array(Array(Nullable(UInt64))), + arr600 Array(Array(Nullable(UInt64))), arr601 Array(Array(Nullable(UInt64))), arr602 Array(Array(Nullable(UInt64))), arr603 Array(Array(Nullable(UInt64))), arr604 Array(Array(Nullable(UInt64))), arr605 Array(Array(Nullable(UInt64))), arr606 Array(Array(Nullable(UInt64))), arr607 Array(Array(Nullable(UInt64))), arr608 Array(Array(Nullable(UInt64))), arr609 Array(Array(Nullable(UInt64))), arr610 Array(Array(Nullable(UInt64))), arr611 Array(Array(Nullable(UInt64))), arr612 Array(Array(Nullable(UInt64))), arr613 Array(Array(Nullable(UInt64))), arr614 Array(Array(Nullable(UInt64))), arr615 Array(Array(Nullable(UInt64))), arr616 Array(Array(Nullable(UInt64))), arr617 Array(Array(Nullable(UInt64))), arr618 Array(Array(Nullable(UInt64))), arr619 Array(Array(Nullable(UInt64))), arr620 Array(Array(Nullable(UInt64))), arr621 Array(Array(Nullable(UInt64))), arr622 Array(Array(Nullable(UInt64))), arr623 Array(Array(Nullable(UInt64))), arr624 Array(Array(Nullable(UInt64))), arr625 Array(Array(Nullable(UInt64))), arr626 Array(Array(Nullable(UInt64))), arr627 Array(Array(Nullable(UInt64))), arr628 Array(Array(Nullable(UInt64))), arr629 Array(Array(Nullable(UInt64))), arr630 Array(Array(Nullable(UInt64))), arr631 Array(Array(Nullable(UInt64))), arr632 Array(Array(Nullable(UInt64))), arr633 Array(Array(Nullable(UInt64))), arr634 Array(Array(Nullable(UInt64))), arr635 Array(Array(Nullable(UInt64))), arr636 Array(Array(Nullable(UInt64))), arr637 Array(Array(Nullable(UInt64))), arr638 Array(Array(Nullable(UInt64))), arr639 Array(Array(Nullable(UInt64))), arr640 Array(Array(Nullable(UInt64))), arr641 Array(Array(Nullable(UInt64))), arr642 Array(Array(Nullable(UInt64))), arr643 Array(Array(Nullable(UInt64))), arr644 Array(Array(Nullable(UInt64))), arr645 Array(Array(Nullable(UInt64))), arr646 Array(Array(Nullable(UInt64))), arr647 Array(Array(Nullable(UInt64))), arr648 Array(Array(Nullable(UInt64))), arr649 Array(Array(Nullable(UInt64))), arr650 Array(Array(Nullable(UInt64))), arr651 Array(Array(Nullable(UInt64))), arr652 Array(Array(Nullable(UInt64))), arr653 Array(Array(Nullable(UInt64))), arr654 Array(Array(Nullable(UInt64))), arr655 Array(Array(Nullable(UInt64))), arr656 Array(Array(Nullable(UInt64))), arr657 Array(Array(Nullable(UInt64))), arr658 Array(Array(Nullable(UInt64))), arr659 Array(Array(Nullable(UInt64))), arr660 Array(Array(Nullable(UInt64))), arr661 Array(Array(Nullable(UInt64))), arr662 Array(Array(Nullable(UInt64))), arr663 Array(Array(Nullable(UInt64))), arr664 Array(Array(Nullable(UInt64))), arr665 Array(Array(Nullable(UInt64))), arr666 Array(Array(Nullable(UInt64))), arr667 Array(Array(Nullable(UInt64))), arr668 Array(Array(Nullable(UInt64))), arr669 Array(Array(Nullable(UInt64))), arr670 Array(Array(Nullable(UInt64))), arr671 Array(Array(Nullable(UInt64))), arr672 Array(Array(Nullable(UInt64))), arr673 Array(Array(Nullable(UInt64))), arr674 Array(Array(Nullable(UInt64))), arr675 Array(Array(Nullable(UInt64))), arr676 Array(Array(Nullable(UInt64))), arr677 Array(Array(Nullable(UInt64))), arr678 Array(Array(Nullable(UInt64))), arr679 Array(Array(Nullable(UInt64))), arr680 Array(Array(Nullable(UInt64))), arr681 Array(Array(Nullable(UInt64))), arr682 Array(Array(Nullable(UInt64))), arr683 Array(Array(Nullable(UInt64))), arr684 Array(Array(Nullable(UInt64))), arr685 Array(Array(Nullable(UInt64))), arr686 Array(Array(Nullable(UInt64))), arr687 Array(Array(Nullable(UInt64))), arr688 Array(Array(Nullable(UInt64))), arr689 Array(Array(Nullable(UInt64))), arr690 Array(Array(Nullable(UInt64))), arr691 Array(Array(Nullable(UInt64))), arr692 Array(Array(Nullable(UInt64))), arr693 Array(Array(Nullable(UInt64))), arr694 Array(Array(Nullable(UInt64))), arr695 Array(Array(Nullable(UInt64))), arr696 Array(Array(Nullable(UInt64))), arr697 Array(Array(Nullable(UInt64))), arr698 Array(Array(Nullable(UInt64))), arr699 Array(Array(Nullable(UInt64))), + arr700 Array(Array(Nullable(UInt64))), arr701 Array(Array(Nullable(UInt64))), arr702 Array(Array(Nullable(UInt64))), arr703 Array(Array(Nullable(UInt64))), arr704 Array(Array(Nullable(UInt64))), arr705 Array(Array(Nullable(UInt64))), arr706 Array(Array(Nullable(UInt64))), arr707 Array(Array(Nullable(UInt64))), arr708 Array(Array(Nullable(UInt64))), arr709 Array(Array(Nullable(UInt64))), arr710 Array(Array(Nullable(UInt64))), arr711 Array(Array(Nullable(UInt64))), arr712 Array(Array(Nullable(UInt64))), arr713 Array(Array(Nullable(UInt64))), arr714 Array(Array(Nullable(UInt64))), arr715 Array(Array(Nullable(UInt64))), arr716 Array(Array(Nullable(UInt64))), arr717 Array(Array(Nullable(UInt64))), arr718 Array(Array(Nullable(UInt64))), arr719 Array(Array(Nullable(UInt64))), arr720 Array(Array(Nullable(UInt64))), arr721 Array(Array(Nullable(UInt64))), arr722 Array(Array(Nullable(UInt64))), arr723 Array(Array(Nullable(UInt64))), arr724 Array(Array(Nullable(UInt64))), arr725 Array(Array(Nullable(UInt64))), arr726 Array(Array(Nullable(UInt64))), arr727 Array(Array(Nullable(UInt64))), arr728 Array(Array(Nullable(UInt64))), arr729 Array(Array(Nullable(UInt64))), arr730 Array(Array(Nullable(UInt64))), arr731 Array(Array(Nullable(UInt64))), arr732 Array(Array(Nullable(UInt64))), arr733 Array(Array(Nullable(UInt64))), arr734 Array(Array(Nullable(UInt64))), arr735 Array(Array(Nullable(UInt64))), arr736 Array(Array(Nullable(UInt64))), arr737 Array(Array(Nullable(UInt64))), arr738 Array(Array(Nullable(UInt64))), arr739 Array(Array(Nullable(UInt64))), arr740 Array(Array(Nullable(UInt64))), arr741 Array(Array(Nullable(UInt64))), arr742 Array(Array(Nullable(UInt64))), arr743 Array(Array(Nullable(UInt64))), arr744 Array(Array(Nullable(UInt64))), arr745 Array(Array(Nullable(UInt64))), arr746 Array(Array(Nullable(UInt64))), arr747 Array(Array(Nullable(UInt64))), arr748 Array(Array(Nullable(UInt64))), arr749 Array(Array(Nullable(UInt64))), arr750 Array(Array(Nullable(UInt64))), arr751 Array(Array(Nullable(UInt64))), arr752 Array(Array(Nullable(UInt64))), arr753 Array(Array(Nullable(UInt64))), arr754 Array(Array(Nullable(UInt64))), arr755 Array(Array(Nullable(UInt64))), arr756 Array(Array(Nullable(UInt64))), arr757 Array(Array(Nullable(UInt64))), arr758 Array(Array(Nullable(UInt64))), arr759 Array(Array(Nullable(UInt64))), arr760 Array(Array(Nullable(UInt64))), arr761 Array(Array(Nullable(UInt64))), arr762 Array(Array(Nullable(UInt64))), arr763 Array(Array(Nullable(UInt64))), arr764 Array(Array(Nullable(UInt64))), arr765 Array(Array(Nullable(UInt64))), arr766 Array(Array(Nullable(UInt64))), arr767 Array(Array(Nullable(UInt64))), arr768 Array(Array(Nullable(UInt64))), arr769 Array(Array(Nullable(UInt64))), arr770 Array(Array(Nullable(UInt64))), arr771 Array(Array(Nullable(UInt64))), arr772 Array(Array(Nullable(UInt64))), arr773 Array(Array(Nullable(UInt64))), arr774 Array(Array(Nullable(UInt64))), arr775 Array(Array(Nullable(UInt64))), arr776 Array(Array(Nullable(UInt64))), arr777 Array(Array(Nullable(UInt64))), arr778 Array(Array(Nullable(UInt64))), arr779 Array(Array(Nullable(UInt64))), arr780 Array(Array(Nullable(UInt64))), arr781 Array(Array(Nullable(UInt64))), arr782 Array(Array(Nullable(UInt64))), arr783 Array(Array(Nullable(UInt64))), arr784 Array(Array(Nullable(UInt64))), arr785 Array(Array(Nullable(UInt64))), arr786 Array(Array(Nullable(UInt64))), arr787 Array(Array(Nullable(UInt64))), arr788 Array(Array(Nullable(UInt64))), arr789 Array(Array(Nullable(UInt64))), arr790 Array(Array(Nullable(UInt64))), arr791 Array(Array(Nullable(UInt64))), arr792 Array(Array(Nullable(UInt64))), arr793 Array(Array(Nullable(UInt64))), arr794 Array(Array(Nullable(UInt64))), arr795 Array(Array(Nullable(UInt64))), arr796 Array(Array(Nullable(UInt64))), arr797 Array(Array(Nullable(UInt64))), arr798 Array(Array(Nullable(UInt64))), arr799 Array(Array(Nullable(UInt64))), + arr800 Array(Array(Nullable(UInt64))), arr801 Array(Array(Nullable(UInt64))), arr802 Array(Array(Nullable(UInt64))), arr803 Array(Array(Nullable(UInt64))), arr804 Array(Array(Nullable(UInt64))), arr805 Array(Array(Nullable(UInt64))), arr806 Array(Array(Nullable(UInt64))), arr807 Array(Array(Nullable(UInt64))), arr808 Array(Array(Nullable(UInt64))), arr809 Array(Array(Nullable(UInt64))), arr810 Array(Array(Nullable(UInt64))), arr811 Array(Array(Nullable(UInt64))), arr812 Array(Array(Nullable(UInt64))), arr813 Array(Array(Nullable(UInt64))), arr814 Array(Array(Nullable(UInt64))), arr815 Array(Array(Nullable(UInt64))), arr816 Array(Array(Nullable(UInt64))), arr817 Array(Array(Nullable(UInt64))), arr818 Array(Array(Nullable(UInt64))), arr819 Array(Array(Nullable(UInt64))), arr820 Array(Array(Nullable(UInt64))), arr821 Array(Array(Nullable(UInt64))), arr822 Array(Array(Nullable(UInt64))), arr823 Array(Array(Nullable(UInt64))), arr824 Array(Array(Nullable(UInt64))), arr825 Array(Array(Nullable(UInt64))), arr826 Array(Array(Nullable(UInt64))), arr827 Array(Array(Nullable(UInt64))), arr828 Array(Array(Nullable(UInt64))), arr829 Array(Array(Nullable(UInt64))), arr830 Array(Array(Nullable(UInt64))), arr831 Array(Array(Nullable(UInt64))), arr832 Array(Array(Nullable(UInt64))), arr833 Array(Array(Nullable(UInt64))), arr834 Array(Array(Nullable(UInt64))), arr835 Array(Array(Nullable(UInt64))), arr836 Array(Array(Nullable(UInt64))), arr837 Array(Array(Nullable(UInt64))), arr838 Array(Array(Nullable(UInt64))), arr839 Array(Array(Nullable(UInt64))), arr840 Array(Array(Nullable(UInt64))), arr841 Array(Array(Nullable(UInt64))), arr842 Array(Array(Nullable(UInt64))), arr843 Array(Array(Nullable(UInt64))), arr844 Array(Array(Nullable(UInt64))), arr845 Array(Array(Nullable(UInt64))), arr846 Array(Array(Nullable(UInt64))), arr847 Array(Array(Nullable(UInt64))), arr848 Array(Array(Nullable(UInt64))), arr849 Array(Array(Nullable(UInt64))), arr850 Array(Array(Nullable(UInt64))), arr851 Array(Array(Nullable(UInt64))), arr852 Array(Array(Nullable(UInt64))), arr853 Array(Array(Nullable(UInt64))), arr854 Array(Array(Nullable(UInt64))), arr855 Array(Array(Nullable(UInt64))), arr856 Array(Array(Nullable(UInt64))), arr857 Array(Array(Nullable(UInt64))), arr858 Array(Array(Nullable(UInt64))), arr859 Array(Array(Nullable(UInt64))), arr860 Array(Array(Nullable(UInt64))), arr861 Array(Array(Nullable(UInt64))), arr862 Array(Array(Nullable(UInt64))), arr863 Array(Array(Nullable(UInt64))), arr864 Array(Array(Nullable(UInt64))), arr865 Array(Array(Nullable(UInt64))), arr866 Array(Array(Nullable(UInt64))), arr867 Array(Array(Nullable(UInt64))), arr868 Array(Array(Nullable(UInt64))), arr869 Array(Array(Nullable(UInt64))), arr870 Array(Array(Nullable(UInt64))), arr871 Array(Array(Nullable(UInt64))), arr872 Array(Array(Nullable(UInt64))), arr873 Array(Array(Nullable(UInt64))), arr874 Array(Array(Nullable(UInt64))), arr875 Array(Array(Nullable(UInt64))), arr876 Array(Array(Nullable(UInt64))), arr877 Array(Array(Nullable(UInt64))), arr878 Array(Array(Nullable(UInt64))), arr879 Array(Array(Nullable(UInt64))), arr880 Array(Array(Nullable(UInt64))), arr881 Array(Array(Nullable(UInt64))), arr882 Array(Array(Nullable(UInt64))), arr883 Array(Array(Nullable(UInt64))), arr884 Array(Array(Nullable(UInt64))), arr885 Array(Array(Nullable(UInt64))), arr886 Array(Array(Nullable(UInt64))), arr887 Array(Array(Nullable(UInt64))), arr888 Array(Array(Nullable(UInt64))), arr889 Array(Array(Nullable(UInt64))), arr890 Array(Array(Nullable(UInt64))), arr891 Array(Array(Nullable(UInt64))), arr892 Array(Array(Nullable(UInt64))), arr893 Array(Array(Nullable(UInt64))), arr894 Array(Array(Nullable(UInt64))), arr895 Array(Array(Nullable(UInt64))), arr896 Array(Array(Nullable(UInt64))), arr897 Array(Array(Nullable(UInt64))), arr898 Array(Array(Nullable(UInt64))), arr899 Array(Array(Nullable(UInt64))), + arr900 Array(Array(Nullable(UInt64))), arr901 Array(Array(Nullable(UInt64))), arr902 Array(Array(Nullable(UInt64))), arr903 Array(Array(Nullable(UInt64))), arr904 Array(Array(Nullable(UInt64))), arr905 Array(Array(Nullable(UInt64))), arr906 Array(Array(Nullable(UInt64))), arr907 Array(Array(Nullable(UInt64))), arr908 Array(Array(Nullable(UInt64))), arr909 Array(Array(Nullable(UInt64))), arr910 Array(Array(Nullable(UInt64))), arr911 Array(Array(Nullable(UInt64))), arr912 Array(Array(Nullable(UInt64))), arr913 Array(Array(Nullable(UInt64))), arr914 Array(Array(Nullable(UInt64))), arr915 Array(Array(Nullable(UInt64))), arr916 Array(Array(Nullable(UInt64))), arr917 Array(Array(Nullable(UInt64))), arr918 Array(Array(Nullable(UInt64))), arr919 Array(Array(Nullable(UInt64))), arr920 Array(Array(Nullable(UInt64))), arr921 Array(Array(Nullable(UInt64))), arr922 Array(Array(Nullable(UInt64))), arr923 Array(Array(Nullable(UInt64))), arr924 Array(Array(Nullable(UInt64))), arr925 Array(Array(Nullable(UInt64))), arr926 Array(Array(Nullable(UInt64))), arr927 Array(Array(Nullable(UInt64))), arr928 Array(Array(Nullable(UInt64))), arr929 Array(Array(Nullable(UInt64))), arr930 Array(Array(Nullable(UInt64))), arr931 Array(Array(Nullable(UInt64))), arr932 Array(Array(Nullable(UInt64))), arr933 Array(Array(Nullable(UInt64))), arr934 Array(Array(Nullable(UInt64))), arr935 Array(Array(Nullable(UInt64))), arr936 Array(Array(Nullable(UInt64))), arr937 Array(Array(Nullable(UInt64))), arr938 Array(Array(Nullable(UInt64))), arr939 Array(Array(Nullable(UInt64))), arr940 Array(Array(Nullable(UInt64))), arr941 Array(Array(Nullable(UInt64))), arr942 Array(Array(Nullable(UInt64))), arr943 Array(Array(Nullable(UInt64))), arr944 Array(Array(Nullable(UInt64))), arr945 Array(Array(Nullable(UInt64))), arr946 Array(Array(Nullable(UInt64))), arr947 Array(Array(Nullable(UInt64))), arr948 Array(Array(Nullable(UInt64))), arr949 Array(Array(Nullable(UInt64))), arr950 Array(Array(Nullable(UInt64))), arr951 Array(Array(Nullable(UInt64))), arr952 Array(Array(Nullable(UInt64))), arr953 Array(Array(Nullable(UInt64))), arr954 Array(Array(Nullable(UInt64))), arr955 Array(Array(Nullable(UInt64))), arr956 Array(Array(Nullable(UInt64))), arr957 Array(Array(Nullable(UInt64))), arr958 Array(Array(Nullable(UInt64))), arr959 Array(Array(Nullable(UInt64))), arr960 Array(Array(Nullable(UInt64))), arr961 Array(Array(Nullable(UInt64))), arr962 Array(Array(Nullable(UInt64))), arr963 Array(Array(Nullable(UInt64))), arr964 Array(Array(Nullable(UInt64))), arr965 Array(Array(Nullable(UInt64))), arr966 Array(Array(Nullable(UInt64))), arr967 Array(Array(Nullable(UInt64))), arr968 Array(Array(Nullable(UInt64))), arr969 Array(Array(Nullable(UInt64))), arr970 Array(Array(Nullable(UInt64))), arr971 Array(Array(Nullable(UInt64))), arr972 Array(Array(Nullable(UInt64))), arr973 Array(Array(Nullable(UInt64))), arr974 Array(Array(Nullable(UInt64))), arr975 Array(Array(Nullable(UInt64))), arr976 Array(Array(Nullable(UInt64))), arr977 Array(Array(Nullable(UInt64))), arr978 Array(Array(Nullable(UInt64))), arr979 Array(Array(Nullable(UInt64))), arr980 Array(Array(Nullable(UInt64))), arr981 Array(Array(Nullable(UInt64))), arr982 Array(Array(Nullable(UInt64))), arr983 Array(Array(Nullable(UInt64))), arr984 Array(Array(Nullable(UInt64))), arr985 Array(Array(Nullable(UInt64))), arr986 Array(Array(Nullable(UInt64))), arr987 Array(Array(Nullable(UInt64))), arr988 Array(Array(Nullable(UInt64))), arr989 Array(Array(Nullable(UInt64))), arr990 Array(Array(Nullable(UInt64))), arr991 Array(Array(Nullable(UInt64))), arr992 Array(Array(Nullable(UInt64))), arr993 Array(Array(Nullable(UInt64))), arr994 Array(Array(Nullable(UInt64))), arr995 Array(Array(Nullable(UInt64))), arr996 Array(Array(Nullable(UInt64))), arr997 Array(Array(Nullable(UInt64))), arr998 Array(Array(Nullable(UInt64))), arr999 Array(Array(Nullable(UInt64)))) + ENGINE = MergeTree ORDER BY id PARTITION BY id % 100 + + + INSERT INTO lot_of_arrays(id) SELECT number FROM numbers(1000) + OPTIMIZE TABLE lot_of_arrays FINAL + + SELECT nested.arr0 FROM lot_of_arrays WHERE id > 10 FORMAT Null + + DROP TABLE IF EXISTS lot_of_arrays + diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index aab9aeba937..394e8e99052 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -18,7 +18,7 @@ function ch_url() { # Check correct exceptions handling -exception_pattern="displayText() = DB::Exception:[[:print:]]*" +exception_pattern="DB::Exception:[[:print:]]*" function check_only_exception() { local res diff --git a/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh b/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh index bdb4627ae30..e1540d1a25e 100755 --- a/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh +++ b/tests/queries/0_stateless/00704_drop_truncate_memory_table.sh @@ -23,7 +23,7 @@ INSERT INTO memory SELECT * FROM numbers(1000);" ${CLICKHOUSE_CLIENT} --multiquery --query=" SET max_threads = 1; -SELECT count() FROM memory WHERE NOT ignore(sleep(0.0001));" 2>&1 | grep -c -P '^1000$|^0$|Table .+? doesn.t exist' & +SELECT count() FROM memory WHERE NOT ignore(sleep(0.0001));" 2>&1 | grep -c -P '^1000$|^0$|Exception' & sleep 0.05; diff --git a/tests/queries/0_stateless/00850_global_join_dups.reference b/tests/queries/0_stateless/00850_global_join_dups.reference index d94e4df3425..bbe467dd08e 100644 --- a/tests/queries/0_stateless/00850_global_join_dups.reference +++ b/tests/queries/0_stateless/00850_global_join_dups.reference @@ -1,5 +1,6 @@ 1 \N +\N 0 0 0 0 diff --git a/tests/queries/0_stateless/00850_global_join_dups.sql b/tests/queries/0_stateless/00850_global_join_dups.sql index 92c1d81aa8e..5843bf9c227 100644 --- a/tests/queries/0_stateless/00850_global_join_dups.sql +++ b/tests/queries/0_stateless/00850_global_join_dups.sql @@ -22,6 +22,8 @@ GLOBAL INNER JOIN -- query from fuzzer SELECT toDateTime64(toString(toString('0000-00-00 00:00:000000-00-00 00:00:00', toDateTime64(toDateTime64('655.36', -2, NULL)))), NULL) FROM t1_00850 GLOBAL INNER JOIN (SELECT toDateTime64(toDateTime64('6553.6', '', NULL), NULL), * FROM (SELECT * FROM t2_00850) INNER JOIN (SELECT toDateTime64('6553.7', 1024, NULL), * FROM t1_00850) USING (dummy)) USING (dummy); +SELECT toString('0000-00-00 00:00:000000-00-00 00:00:00', toDateTime64(toDateTime64('655.36', -2, NULL))); + DROP TABLE t_local; DROP TABLE t1_00850; DROP TABLE t2_00850; diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 1a9c81ef686..7b1cc383949 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -89,7 +89,7 @@ idx10 ['This','is','a','test'] 23.00 24.00 === Try load data from datapage_v2.snappy.parquet -Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin +Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin. (CANNOT_READ_ALL_DATA) === Try load data from datatype-date32.parquet 1925-01-01 diff --git a/tests/queries/0_stateless/00907_set_index_with_nullable_and_low_cardinality_bug.sql b/tests/queries/0_stateless/00907_set_index_with_nullable_and_low_cardinality_bug.sql index 75e0e482566..3a55a69c726 100644 --- a/tests/queries/0_stateless/00907_set_index_with_nullable_and_low_cardinality_bug.sql +++ b/tests/queries/0_stateless/00907_set_index_with_nullable_and_low_cardinality_bug.sql @@ -8,8 +8,7 @@ CREATE TABLE null_lc_set_index ( INDEX test_user_idx (user) TYPE set(0) GRANULARITY 8192 ) ENGINE=MergeTree PARTITION BY toYYYYMMDD(timestamp) - ORDER BY (timestamp, action, cityHash64(user)) - SAMPLE BY cityHash64(user); + ORDER BY (timestamp, action, cityHash64(user)); INSERT INTO null_lc_set_index VALUES (1550883010, 'subscribe', 'alice'); INSERT INTO null_lc_set_index VALUES (1550883020, 'follow', 'bob'); diff --git a/tests/queries/0_stateless/00944_minmax_null.sql b/tests/queries/0_stateless/00944_minmax_null.sql index 01b86775481..71ae2e77205 100644 --- a/tests/queries/0_stateless/00944_minmax_null.sql +++ b/tests/queries/0_stateless/00944_minmax_null.sql @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS min_max_with_nullable_string; CREATE TABLE min_max_with_nullable_string ( t DateTime, nullable_str Nullable(String), - INDEX nullable_str_min_max nullable_str TYPE minmax GRANULARITY 8192 + INDEX nullable_str_min_max nullable_str TYPE minmax GRANULARITY 1 ) ENGINE = MergeTree ORDER BY (t); INSERT INTO min_max_with_nullable_string(t) VALUES (now()) (now()); @@ -20,4 +20,4 @@ SELECT count() FROM min_max_with_nullable_string WHERE nullable_str = '.'; SELECT count() FROM min_max_with_nullable_string WHERE nullable_str = ''; -DROP TABLE min_max_with_nullable_string; \ No newline at end of file +DROP TABLE min_max_with_nullable_string; diff --git a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql index b6b981c7d00..6d57cd0447d 100644 --- a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql +++ b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql @@ -1,2 +1,2 @@ -SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 279 } +SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 198 } SELECT count() FROM remote('127.0.0.1|localhos', system.one); diff --git a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql index dec748789c8..5e30b6b1a9d 100644 --- a/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql +++ b/tests/queries/0_stateless/01040_distributed_directory_monitor_batch_inserts.sql @@ -1,12 +1,11 @@ -SET distributed_directory_monitor_batch_inserts=1; -SET distributed_directory_monitor_sleep_time_ms=10; -SET distributed_directory_monitor_max_sleep_time_ms=100; - DROP TABLE IF EXISTS test_01040; DROP TABLE IF EXISTS dist_test_01040; CREATE TABLE test_01040 (key UInt64) ENGINE=TinyLog(); -CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards, currentDatabase(), test_01040, key); +CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards, currentDatabase(), test_01040, key) SETTINGS + monitor_batch_inserts=1, + monitor_sleep_time_ms=10, + monitor_max_sleep_time_ms=100; -- internal_replication=false SELECT 'test_cluster_two_shards prefer_localhost_replica=0'; @@ -26,7 +25,10 @@ TRUNCATE TABLE test_01040; DROP TABLE dist_test_01040; -- internal_replication=true -CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards_internal_replication, currentDatabase(), test_01040, key); +CREATE TABLE dist_test_01040 AS test_01040 Engine=Distributed(test_cluster_two_shards_internal_replication, currentDatabase(), test_01040, key) SETTINGS + monitor_batch_inserts=1, + monitor_sleep_time_ms=10, + monitor_max_sleep_time_ms=100; SELECT 'test_cluster_two_shards_internal_replication prefer_localhost_replica=0'; SET prefer_localhost_replica=0; INSERT INTO dist_test_01040 SELECT toUInt64(number) FROM numbers(2); diff --git a/tests/queries/0_stateless/01060_avro.reference b/tests/queries/0_stateless/01060_avro.reference index 338ffe0cf96..7a5aa43a36a 100644 --- a/tests/queries/0_stateless/01060_avro.reference +++ b/tests/queries/0_stateless/01060_avro.reference @@ -58,3 +58,9 @@ not found 0 1000 147 += string column pattern +"руÑÑÐºÐ°Ñ Ñтрока" +Ok +1 0 +1 1 +1 1 diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 4779cd64953..8c37014a593 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -89,3 +89,22 @@ ${CLICKHOUSE_LOCAL} -q "select toInt64(number) as a from numbers(1000) format A # type supported via conversion ${CLICKHOUSE_LOCAL} -q "select toInt16(123) as a format Avro" | wc -c | tr -d ' ' + +echo '=' string column pattern +${CLICKHOUSE_LOCAL} -q "select 'руÑÑÐºÐ°Ñ Ñтрока' as a format Avro SETTINGS output_format_avro_string_column_pattern = 'a'" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a String" -q 'select * from table' + +# it is expected that invalid UTF-8 can be created +${CLICKHOUSE_LOCAL} -q "select '\x61\xF0\x80\x80\x80b' as a format Avro" > /dev/null && echo Ok + +A_NEEDLE="'\"name\":\"a\",\"type\":\"string\"'" +B_NEEDLE="'\"name\":\"b\",\"type\":\"string\"'" +PATTERNQUERY="select 'руÑÑÐºÐ°Ñ Ñтрока' as a, 'руÑÑÐºÐ°Ñ Ñтрока' as b format Avro SETTINGS output_format_avro_string_column_pattern =" + +PATTERNPATTERN="'a'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" + +PATTERNPATTERN="'a|b'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" + +PATTERNPATTERN="'.*'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" diff --git a/tests/queries/0_stateless/01107_join_right_table_totals.reference b/tests/queries/0_stateless/01107_join_right_table_totals.reference index f71d3b0d05f..daf503b776d 100644 --- a/tests/queries/0_stateless/01107_join_right_table_totals.reference +++ b/tests/queries/0_stateless/01107_join_right_table_totals.reference @@ -18,3 +18,31 @@ 0 0 0 0 +1 1 +1 1 + +0 0 +1 1 +1 1 + +0 0 +1 1 +1 1 + +0 0 +1 1 +1 1 + +0 0 +1 1 + +0 0 +1 foo 1 1 300 + +0 foo 1 0 300 +1 100 1970-01-01 1 100 1970-01-01 +1 100 1970-01-01 1 200 1970-01-02 +1 200 1970-01-02 1 100 1970-01-01 +1 200 1970-01-02 1 200 1970-01-02 + +0 0 1970-01-01 0 0 1970-01-01 diff --git a/tests/queries/0_stateless/01107_join_right_table_totals.sql b/tests/queries/0_stateless/01107_join_right_table_totals.sql index a4f284e5e2d..f894b6bf8bb 100644 --- a/tests/queries/0_stateless/01107_join_right_table_totals.sql +++ b/tests/queries/0_stateless/01107_join_right_table_totals.sql @@ -35,29 +35,66 @@ FULL JOIN ) rr USING (id); -SELECT id, yago +SELECT id, yago FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr USING (id); -SELECT id, yago +SELECT id, yago FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr USING (id); -SELECT id, yago +SELECT id, yago FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr USING (id); -SELECT id, yago +SELECT id, yago FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr USING (id); -SELECT id, yago +SELECT id, yago FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll -FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr +FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr USING (id); +INSERT INTO t VALUES (1, 100, '1970-01-01'), (1, 200, '1970-01-02'); + +SELECT * +FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l +LEFT JOIN (SELECT item_id FROM t ) r +ON l.item_id = r.item_id; + +SELECT * +FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l +RIGHT JOIN (SELECT item_id FROM t ) r +ON l.item_id = r.item_id; + +SELECT * +FROM (SELECT item_id FROM t) l +LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r +ON l.item_id = r.item_id; + +SELECT * +FROM (SELECT item_id FROM t) l +RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r +ON l.item_id = r.item_id; + +SELECT * +FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l +LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r +ON l.item_id = r.item_id; + +SELECT * +FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS) l +LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ) r +ON l.item_id = r.item_id; + +SELECT * +FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS) l +LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ) r +ON l.item_id = r.item_id; + DROP TABLE t; diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference index 0cc8c788fed..ca79d4a022e 100644 --- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference +++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference @@ -1,25 +1,25 @@ none Received exception from server: -Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57, e.displayText() = Error: Table default.throw already exists +Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.throw already exists. (TABLE_ALREADY_EXISTS) Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=8) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background. +Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=8) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background.(TIMEOUT_EXCEEDED) throw localhost 9000 0 0 0 -localhost 9000 57 Code: 57, e.displayText() = Error: Table default.throw already exists. 0 0 +localhost 9000 57 Code: 57. Error: Table default.throw already exists. (TABLE_ALREADY_EXISTS) 0 0 Received exception from server: -Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57, e.displayText() = Error: Table default.throw already exists +Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.throw already exists. (TABLE_ALREADY_EXISTS) localhost 9000 0 1 0 Received exception from server: -Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=8) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background. +Code: 159. Error: Received from localhost:9000. Error: Watching task is executing longer than distributed_ddl_task_timeout (=8) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background.(TIMEOUT_EXCEEDED) null_status_on_timeout localhost 9000 0 0 0 -localhost 9000 57 Code: 57, e.displayText() = Error: Table default.null_status already exists. 0 0 +localhost 9000 57 Code: 57. Error: Table default.null_status already exists. (TABLE_ALREADY_EXISTS) 0 0 Received exception from server: -Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57, e.displayText() = Error: Table default.null_status already exists +Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.null_status already exists. (TABLE_ALREADY_EXISTS) localhost 9000 0 1 0 localhost 1 \N \N 1 0 never_throw localhost 9000 0 0 0 -localhost 9000 57 Code: 57, e.displayText() = Error: Table default.never_throw already exists. 0 0 +localhost 9000 57 Code: 57. Error: Table default.never_throw already exists. (TABLE_ALREADY_EXISTS) 0 0 localhost 9000 0 1 0 localhost 1 \N \N 1 0 diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect index b2dc88a7795..2337b7d01fe 100755 --- a/tests/queries/0_stateless/01176_mysql_client_interactive.expect +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -22,5 +22,27 @@ expect "| dummy |" expect "| 0 |" expect "1 row in set" +# exception before start +send -- "select * from table_that_does_not_exist;\r" +expect "ERROR 60 (00000): Code: 60" + +# exception after start +send -- "select throwIf(number) from numbers(2) settings max_block_size=1;\r" +expect "ERROR 395 (00000): Code: 395" + +# other formats +send -- "select * from system.one format TSV;\r" +expect "ERROR 1 (00000): Code: 1" + +send -- "select count(number), sum(number) from numbers(10);\r" +expect "+---------------+-------------+" +expect "| count(number) | sum(number) |" +expect "+---------------+-------------+" +expect "| 10 | 45 |" +expect "+---------------+-------------+" +expect "1 row in set" +expect "Read 10 rows, 80.00 B" +expect "mysql> " + send -- "quit;\r" expect eof diff --git a/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference b/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference index 3d8d7fb770d..e7d4ea81714 100644 --- a/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference +++ b/tests/queries/0_stateless/01226_dist_on_dist_global_in.reference @@ -2,5 +2,4 @@ GLOBAL IN 0 0 0 -0 GLOBAL NOT IN diff --git a/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql b/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql index 588ea9c1048..ca9b28a14f4 100644 --- a/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql +++ b/tests/queries/0_stateless/01226_dist_on_dist_global_in.sql @@ -1,10 +1,8 @@ SELECT 'GLOBAL IN'; select * from remote('localhost', system.one) where dummy global in (0); -select * from remote('localhost', system.one) where toUInt64(dummy) global in numbers(1); select * from remote('localhost', system.one) where dummy global in system.one; select * from remote('localhost', system.one) where dummy global in (select 0); SELECT 'GLOBAL NOT IN'; select * from remote('localhost', system.one) where dummy global not in (0); -select * from remote('localhost', system.one) where toUInt64(dummy) global not in numbers(1); select * from remote('localhost', system.one) where dummy global not in system.one; select * from remote('localhost', system.one) where dummy global not in (select 0); diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference index acaf6531101..4442b0b6b61 100644 --- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference @@ -115,6 +115,7 @@ GROUP BY WITH TOTALS LIMIT 2 0 4 0 +GROUP BY (compound) GROUP BY sharding_key, ... 0 0 1 0 @@ -123,6 +124,15 @@ GROUP BY sharding_key, ... GROUP BY ..., sharding_key 0 0 1 0 +0 0 +1 0 +sharding_key (compound) +1 2 3 +1 2 3 +1 2 6 +1 2 +1 2 +2 window functions 0 0 1 0 diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql index 6b6300a4871..4719119165a 100644 --- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql @@ -97,6 +97,7 @@ select 'GROUP BY WITH TOTALS LIMIT'; select count(), * from dist_01247 group by number with totals limit 1; -- GROUP BY (compound) +select 'GROUP BY (compound)'; drop table if exists dist_01247; drop table if exists data_01247; create table data_01247 engine=Memory() as select number key, 0 value from numbers(2); @@ -106,6 +107,13 @@ select * from dist_01247 group by key, value; select 'GROUP BY ..., sharding_key'; select * from dist_01247 group by value, key; +-- sharding_key (compound) +select 'sharding_key (compound)'; +select k1, k2, sum(v) from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v), cityHash64(k1, k2)) group by k1, k2; -- optimization applied +select k1, any(k2), sum(v) from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v), cityHash64(k1, k2)) group by k1; -- optimization does not applied +select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v), cityHash64(k1, k2)); -- optimization applied +select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v), cityHash64(k1, k2)); -- optimization does not applied + -- window functions select 'window functions'; select key, sum(sum(value)) over (rows unbounded preceding) from dist_01247 group by key settings allow_experimental_window_functions=1; diff --git a/tests/queries/0_stateless/01247_some_msan_crashs_from_22517.reference b/tests/queries/0_stateless/01247_some_msan_crashs_from_22517.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01247_some_msan_crashs_from_22517.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01247_some_msan_crashs_from_22517.sql b/tests/queries/0_stateless/01247_some_msan_crashs_from_22517.sql new file mode 100644 index 00000000000..8bcbbde63d6 --- /dev/null +++ b/tests/queries/0_stateless/01247_some_msan_crashs_from_22517.sql @@ -0,0 +1,3 @@ +SELECT a FROM (SELECT ignore((SELECT 1)) AS a, a AS b); + +SELECT x FROM (SELECT dummy AS x, plus(ignore(ignore(ignore(ignore('-922337203.6854775808', ignore(NULL)), ArrLen = 256, ignore(100, Arr.C3, ignore(NULL), (SELECT 10.000100135803223, count(*) FROM system.time_zones) > NULL)))), dummy, 65535) AS dummy ORDER BY ignore(-2) ASC, identity(x) DESC NULLS FIRST) FORMAT Null; -- { serverError 47 } diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 035cb902bff..95f9e407f21 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -119,6 +119,7 @@ REMOTE [] GLOBAL SOURCES MONGO [] GLOBAL SOURCES MYSQL [] GLOBAL SOURCES POSTGRES [] GLOBAL SOURCES +SQLITE [] GLOBAL SOURCES ODBC [] GLOBAL SOURCES JDBC [] GLOBAL SOURCES HDFS [] GLOBAL SOURCES diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline_long.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect similarity index 95% rename from tests/queries/0_stateless/01293_client_interactive_vertical_multiline_long.expect rename to tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 85eb97fb6f2..5e845754402 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline_long.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -41,7 +41,7 @@ expect ":) " send -- "" expect eof -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --multiline" expect ":) " send -- "SELECT 1;\r" diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.reference b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01410_nullable_key.reference b/tests/queries/0_stateless/01410_nullable_key.reference deleted file mode 100644 index 75163f1bf41..00000000000 --- a/tests/queries/0_stateless/01410_nullable_key.reference +++ /dev/null @@ -1,35 +0,0 @@ -0 0 -2 3 -4 6 -6 9 -8 12 -10 15 -12 18 -14 21 -16 24 -18 27 -\N 0 -\N -1 -\N -2 -\N 0 -\N -1 -\N -2 -0 0 -2 3 -4 6 -6 9 -8 12 -10 15 -12 18 -14 21 -16 24 -18 27 -12 18 -14 21 -16 24 -18 27 -0 0 -2 3 -4 6 -6 9 -8 12 diff --git a/tests/queries/0_stateless/01410_nullable_key.sql b/tests/queries/0_stateless/01410_nullable_key.sql deleted file mode 100644 index 4a3701cf46d..00000000000 --- a/tests/queries/0_stateless/01410_nullable_key.sql +++ /dev/null @@ -1,13 +0,0 @@ -DROP TABLE IF EXISTS nullable_key; -CREATE TABLE nullable_key (k Nullable(int), v int) ENGINE MergeTree ORDER BY k SETTINGS allow_nullable_key = 1; - -INSERT INTO nullable_key SELECT number * 2, number * 3 FROM numbers(10); -INSERT INTO nullable_key SELECT NULL, -number FROM numbers(3); - -SELECT * FROM nullable_key ORDER BY k; -SELECT * FROM nullable_key WHERE k IS NULL; -SELECT * FROM nullable_key WHERE k IS NOT NULL; -SELECT * FROM nullable_key WHERE k > 10; -SELECT * FROM nullable_key WHERE k < 10; - -DROP TABLE nullable_key; diff --git a/tests/queries/0_stateless/01410_nullable_key_and_index.reference b/tests/queries/0_stateless/01410_nullable_key_and_index.reference new file mode 100644 index 00000000000..1fc2cf91e62 --- /dev/null +++ b/tests/queries/0_stateless/01410_nullable_key_and_index.reference @@ -0,0 +1,81 @@ +0 0 +2 3 +4 6 +6 9 +8 12 +10 15 +12 18 +14 21 +16 24 +18 27 +\N 0 +\N -1 +\N -2 +\N 0 +\N -1 +\N -2 +0 0 +2 3 +4 6 +6 9 +8 12 +10 15 +12 18 +14 21 +16 24 +18 27 +12 18 +14 21 +16 24 +18 27 +0 0 +2 3 +4 6 +6 9 +8 12 +\N 0 +\N -1 +\N -2 +0 0 +2 3 +4 6 +6 9 +8 12 +10 15 +12 18 +14 21 +16 24 +18 27 +10 15 +\N 0 +\N -1 +\N -2 +\N +123 +1 1 +1 3 +2 \N +2 2 +2 1 +2 7 +2 \N +3 \N +3 2 +3 4 +2 \N +2 \N +3 \N +1 3 +2 7 +3 4 +1 1 +2 2 +2 1 +3 2 +1 3 +2 7 +3 4 +1 1 +2 2 +2 1 +3 2 diff --git a/tests/queries/0_stateless/01410_nullable_key_and_index.sql b/tests/queries/0_stateless/01410_nullable_key_and_index.sql new file mode 100644 index 00000000000..ba473b5c29a --- /dev/null +++ b/tests/queries/0_stateless/01410_nullable_key_and_index.sql @@ -0,0 +1,61 @@ +DROP TABLE IF EXISTS nullable_key; +DROP TABLE IF EXISTS nullable_key_without_final_mark; +DROP TABLE IF EXISTS nullable_minmax_index; + +SET max_threads = 1; + +CREATE TABLE nullable_key (k Nullable(int), v int) ENGINE MergeTree ORDER BY k SETTINGS allow_nullable_key = 1, index_granularity = 1; + +INSERT INTO nullable_key SELECT number * 2, number * 3 FROM numbers(10); +INSERT INTO nullable_key SELECT NULL, -number FROM numbers(3); + +SELECT * FROM nullable_key ORDER BY k; + +SET force_primary_key = 1; +SET max_rows_to_read = 3; +SELECT * FROM nullable_key WHERE k IS NULL; +SET max_rows_to_read = 10; +SELECT * FROM nullable_key WHERE k IS NOT NULL; +SET max_rows_to_read = 5; +SELECT * FROM nullable_key WHERE k > 10; +SELECT * FROM nullable_key WHERE k < 10; + +OPTIMIZE TABLE nullable_key FINAL; + +SET max_rows_to_read = 4; -- one additional left mark needs to be read +SELECT * FROM nullable_key WHERE k IS NULL; +SET max_rows_to_read = 10; +SELECT * FROM nullable_key WHERE k IS NOT NULL; + +-- Nullable in set and with transform_null_in = 1 +SET max_rows_to_read = 3; +SELECT * FROM nullable_key WHERE k IN (10, 20) SETTINGS transform_null_in = 1; +SET max_rows_to_read = 5; +SELECT * FROM nullable_key WHERE k IN (3, NULL) SETTINGS transform_null_in = 1; + +CREATE TABLE nullable_key_without_final_mark (s Nullable(String)) ENGINE MergeTree ORDER BY s SETTINGS allow_nullable_key = 1, write_final_mark = 0; +INSERT INTO nullable_key_without_final_mark VALUES ('123'), (NULL); +SET max_rows_to_read = 0; +SELECT * FROM nullable_key_without_final_mark WHERE s IS NULL; +SELECT * FROM nullable_key_without_final_mark WHERE s IS NOT NULL; + +CREATE TABLE nullable_minmax_index (k int, v Nullable(int), INDEX v_minmax v TYPE minmax GRANULARITY 4) ENGINE MergeTree ORDER BY k SETTINGS index_granularity = 1; + +INSERT INTO nullable_minmax_index VALUES (1, 3), (2, 7), (3, 4), (2, NULL); -- [3, +Inf] +INSERT INTO nullable_minmax_index VALUES (1, 1), (2, 2), (3, 2), (2, 1); -- [1, 2] +INSERT INTO nullable_minmax_index VALUES (2, NULL), (3, NULL); -- [+Inf, +Inf] + +SET force_primary_key = 0; +SELECT * FROM nullable_minmax_index ORDER BY k; +SET max_rows_to_read = 6; +SELECT * FROM nullable_minmax_index WHERE v IS NULL; +SET max_rows_to_read = 8; +SELECT * FROM nullable_minmax_index WHERE v IS NOT NULL; +SET max_rows_to_read = 6; +SELECT * FROM nullable_minmax_index WHERE v > 2; +SET max_rows_to_read = 4; +SELECT * FROM nullable_minmax_index WHERE v <= 2; + +DROP TABLE nullable_key; +DROP TABLE nullable_key_without_final_mark; +DROP TABLE nullable_minmax_index; diff --git a/tests/queries/0_stateless/01455_shard_leaf_max_rows_bytes_to_read.sql b/tests/queries/0_stateless/01455_shard_leaf_max_rows_bytes_to_read.sql index fca5c4534f7..d21aa391890 100644 --- a/tests/queries/0_stateless/01455_shard_leaf_max_rows_bytes_to_read.sql +++ b/tests/queries/0_stateless/01455_shard_leaf_max_rows_bytes_to_read.sql @@ -1,3 +1,10 @@ +-- Leaf limits is unreliable w/ prefer_localhost_replica=1. +-- Since in this case initial query and the query on the local node (to the +-- underlying table) has the same counters, so if query on the remote node +-- will be finished before local, then local node will already have some rows +-- read, and leaf limit will fail. +SET prefer_localhost_replica=0; + SELECT count() FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 100) SETTINGS max_rows_to_read_leaf=1; -- { serverError 158 } SELECT count() FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 100) SETTINGS max_bytes_to_read_leaf=1; -- { serverError 307 } SELECT count() FROM (SELECT * FROM remote('127.0.0.1', system.numbers) LIMIT 100) SETTINGS max_rows_to_read_leaf=100; @@ -26,4 +33,4 @@ SELECT count() FROM (SELECT * FROM test_distributed) SETTINGS max_bytes_to_read SELECT count() FROM (SELECT * FROM test_distributed) SETTINGS max_bytes_to_read_leaf = 100000; DROP TABLE IF EXISTS test_local; -DROP TABLE IF EXISTS test_distributed; \ No newline at end of file +DROP TABLE IF EXISTS test_distributed; diff --git a/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql b/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql index 34c0d55d573..02e3d3ef73f 100644 --- a/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql +++ b/tests/queries/0_stateless/01460_DistributedFilesToInsert.sql @@ -2,33 +2,31 @@ -- (i.e. no .bin files and hence no sending is required) set prefer_localhost_replica=0; -set distributed_directory_monitor_sleep_time_ms=50; - drop table if exists data_01460; drop table if exists dist_01460; create table data_01460 as system.one engine=Null(); -create table dist_01460 as data_01460 engine=Distributed(test_shard_localhost, currentDatabase(), data_01460); +create table dist_01460 as data_01460 engine=Distributed(test_shard_localhost, currentDatabase(), data_01460) settings monitor_sleep_time_ms=50; select 'INSERT'; select value from system.metrics where metric = 'DistributedFilesToInsert'; insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- distributed_directory_monitor_sleep_time_ms +select sleep(1) format Null; -- monitor_sleep_time_ms select value from system.metrics where metric = 'DistributedFilesToInsert'; select 'STOP/START DISTRIBUTED SENDS'; system stop distributed sends dist_01460; insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- distributed_directory_monitor_sleep_time_ms +select sleep(1) format Null; -- monitor_sleep_time_ms select value from system.metrics where metric = 'DistributedFilesToInsert'; system start distributed sends dist_01460; -select sleep(1) format Null; -- distributed_directory_monitor_sleep_time_ms +select sleep(1) format Null; -- monitor_sleep_time_ms select value from system.metrics where metric = 'DistributedFilesToInsert'; select 'FLUSH DISTRIBUTED'; system stop distributed sends dist_01460; insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- distributed_directory_monitor_sleep_time_ms +select sleep(1) format Null; -- monitor_sleep_time_ms select value from system.metrics where metric = 'DistributedFilesToInsert'; system flush distributed dist_01460; select value from system.metrics where metric = 'DistributedFilesToInsert'; @@ -36,7 +34,7 @@ select value from system.metrics where metric = 'DistributedFilesToInsert'; select 'DROP TABLE'; system stop distributed sends dist_01460; insert into dist_01460 select * from system.one; -select sleep(1) format Null; -- distributed_directory_monitor_sleep_time_ms +select sleep(1) format Null; -- monitor_sleep_time_ms select value from system.metrics where metric = 'DistributedFilesToInsert'; drop table dist_01460; select value from system.metrics where metric = 'DistributedFilesToInsert'; diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case b/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case index 585c8c369dd..00fb5c4e85b 100755 --- a/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case +++ b/tests/queries/0_stateless/01526_client_start_and_exit.expect-not-a-test-case @@ -4,7 +4,7 @@ log_user 1 set timeout 5 match_max 100000 -spawn bash -c "$env(CLICKHOUSE_CLIENT_BINARY) $env(CLICKHOUSE_CLIENT_OPT)" +spawn bash -c "$env(CLICKHOUSE_CLIENT_BINARY) --no-warnings $env(CLICKHOUSE_CLIENT_OPT)" expect ":) " send -- "\4" expect eof diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index ef598570b10..c9a4c04b352 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -61,3 +61,4 @@ second-index 1 1 1 +1 1 diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index cab32db0192..910c95afd64 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -127,3 +127,11 @@ select sum(i) from pd group by dt_m settings allow_experimental_projection_optim drop table pd; drop table pl; + +drop table if exists t; + +create temporary table t (x UInt64, y alias x); +insert into t values (1); +select sum(x), sum(y) from t; + +drop table t; diff --git a/tests/queries/0_stateless/01585_use_index_for_global_in_with_null.reference b/tests/queries/0_stateless/01585_use_index_for_global_in_with_null.reference new file mode 100644 index 00000000000..de0116f9eaa --- /dev/null +++ b/tests/queries/0_stateless/01585_use_index_for_global_in_with_null.reference @@ -0,0 +1,20 @@ +0 2 +1 3 +0 2 +1 3 +0 2 +1 3 +0 2 +1 3 +0 2 +1 3 +0 2 +1 3 +0 2 +1 3 +0 2 +1 3 +\N 100 +\N 100 +\N 100 +\N 100 diff --git a/tests/queries/0_stateless/01585_use_index_for_global_in_with_null.sql b/tests/queries/0_stateless/01585_use_index_for_global_in_with_null.sql new file mode 100644 index 00000000000..72f12ce435a --- /dev/null +++ b/tests/queries/0_stateless/01585_use_index_for_global_in_with_null.sql @@ -0,0 +1,30 @@ +drop table if exists xp; +drop table if exists xp_d; + +create table xp(i Nullable(UInt64), j UInt64) engine MergeTree order by i settings index_granularity = 1, allow_nullable_key = 1; +create table xp_d as xp engine Distributed(test_shard_localhost, currentDatabase(), xp); + +insert into xp select number, number + 2 from numbers(10); +insert into xp select null, 100; + +optimize table xp final; + +set max_rows_to_read = 2; +select * from xp where i in (select * from numbers(2)); +select * from xp where i global in (select * from numbers(2)); +select * from xp_d where i in (select * from numbers(2)); +select * from xp_d where i global in (select * from numbers(2)); + +set transform_null_in = 1; +select * from xp where i in (select * from numbers(2)); +select * from xp where i global in (select * from numbers(2)); +select * from xp_d where i in (select * from numbers(2)); +select * from xp_d where i global in (select * from numbers(2)); + +select * from xp where i in (null); +select * from xp where i global in (null); +select * from xp_d where i in (null); +select * from xp_d where i global in (null); + +drop table if exists xp; +drop table if exists xp_d; diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.reference b/tests/queries/0_stateless/01615_random_one_shard_insertion.reference index 448a73c4789..20ed3c2d518 100644 --- a/tests/queries/0_stateless/01615_random_one_shard_insertion.reference +++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.reference @@ -1,8 +1,22 @@ -0 -0 1 1 -2 +0 +1 2 3 -3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql index 7d07629feda..59412adbdbf 100644 --- a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql +++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql @@ -1,22 +1,26 @@ -drop table if exists shard; +create database if not exists shard_0; +create database if not exists shard_1; +drop table if exists shard_0.tbl; +drop table if exists shard_1.tbl; drop table if exists distr; -create table shard (id Int32) engine = MergeTree order by cityHash64(id); -create table distr as shard engine Distributed (test_cluster_two_shards_localhost, currentDatabase(), shard); - -insert into distr (id) values (0), (1); -- { serverError 55; } +create table shard_0.tbl (number UInt64) engine = MergeTree order by number; +create table shard_1.tbl (number UInt64) engine = MergeTree order by number; +create table distr (number UInt64) engine = Distributed(test_cluster_two_shards_different_databases, '', tbl); set insert_distributed_sync = 1; - -insert into distr (id) values (0), (1); -- { serverError 55; } - -set insert_distributed_sync = 0; set insert_distributed_one_random_shard = 1; +set max_block_size = 1; +set max_insert_block_size = 1; +set min_insert_block_size_rows = 1; +insert into distr select number from numbers(20); -insert into distr (id) values (0), (1); -insert into distr (id) values (2), (3); +select count() != 0 from shard_0.tbl; +select count() != 0 from shard_1.tbl; +select * from distr order by number; -select * from distr order by id; - -drop table if exists shard; -drop table if exists distr; +drop table if exists shard_0.tbl; +drop table if exists shard_1.tbl; +drop database shard_0; +drop database shard_1; +drop table distr; diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 072e8d75f52..1bfcf863184 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -44,7 +44,7 @@ echo "clickhouse-client --query "'"select file('"'${user_files_path}/dir'), file echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null # Test relative path consists of ".." whose absolute path is out of the user_files directory. -echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../../../../../../../../../../../../../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null diff --git a/tests/queries/0_stateless/01659_h3_buffer_overflow.sql b/tests/queries/0_stateless/01659_h3_buffer_overflow.sql index b752059da48..f2d77641ec9 100644 --- a/tests/queries/0_stateless/01659_h3_buffer_overflow.sql +++ b/tests/queries/0_stateless/01659_h3_buffer_overflow.sql @@ -7,3 +7,4 @@ SELECT h3kRing(0xFFFFFFFFFFFFFF, 1000) FORMAT Null; SELECT h3GetBaseCell(0xFFFFFFFFFFFFFF) FORMAT Null; SELECT h3GetResolution(0xFFFFFFFFFFFFFF) FORMAT Null; SELECT h3kRing(0xFFFFFFFFFFFFFF, 10) FORMAT Null; +SELECT h3ToGeo(0xFFFFFFFFFFFFFF) FORMAT Null; diff --git a/tests/queries/0_stateless/01664_ntoa_aton_mysql_compatibility.reference b/tests/queries/0_stateless/01664_ntoa_aton_mysql_compatibility.reference new file mode 100644 index 00000000000..b8c6661bca7 --- /dev/null +++ b/tests/queries/0_stateless/01664_ntoa_aton_mysql_compatibility.reference @@ -0,0 +1,4 @@ +2a02:6b8::11 +2A0206B8000000000000000000000011 +0.0.5.57 +3232235521 diff --git a/tests/queries/0_stateless/01664_ntoa_aton_mysql_compatibility.sql b/tests/queries/0_stateless/01664_ntoa_aton_mysql_compatibility.sql new file mode 100644 index 00000000000..4f4aef09259 --- /dev/null +++ b/tests/queries/0_stateless/01664_ntoa_aton_mysql_compatibility.sql @@ -0,0 +1,4 @@ +SELECT INET6_NTOA(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)); +SELECT hex(INET6_ATON('2a02:6b8::11')); +SELECT INET_NTOA(toUInt32(1337)); +SELECT INET_ATON('192.168.0.1'); diff --git a/tests/queries/0_stateless/01664_test_FunctionIPv6NumToString_mysql_compatibility.reference b/tests/queries/0_stateless/01664_test_FunctionIPv6NumToString_mysql_compatibility.reference deleted file mode 100644 index 18a9c3436e5..00000000000 --- a/tests/queries/0_stateless/01664_test_FunctionIPv6NumToString_mysql_compatibility.reference +++ /dev/null @@ -1 +0,0 @@ -2a02:6b8::11 diff --git a/tests/queries/0_stateless/01664_test_FunctionIPv6NumToString_mysql_compatibility.sql b/tests/queries/0_stateless/01664_test_FunctionIPv6NumToString_mysql_compatibility.sql deleted file mode 100644 index 85bf1f8c7f9..00000000000 --- a/tests/queries/0_stateless/01664_test_FunctionIPv6NumToString_mysql_compatibility.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT INET6_NTOA(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)); diff --git a/tests/queries/0_stateless/01665_test_FunctionIPv6StringToNum_mysql_compatibility.reference b/tests/queries/0_stateless/01665_test_FunctionIPv6StringToNum_mysql_compatibility.reference deleted file mode 100644 index 0b3192fc44c..00000000000 --- a/tests/queries/0_stateless/01665_test_FunctionIPv6StringToNum_mysql_compatibility.reference +++ /dev/null @@ -1 +0,0 @@ -*¸\0\0\0\0\0\0\0\0\0\0\0 diff --git a/tests/queries/0_stateless/01665_test_FunctionIPv6StringToNum_mysql_compatibility.sql b/tests/queries/0_stateless/01665_test_FunctionIPv6StringToNum_mysql_compatibility.sql deleted file mode 100644 index 2eff6cca793..00000000000 --- a/tests/queries/0_stateless/01665_test_FunctionIPv6StringToNum_mysql_compatibility.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT INET6_ATON('2a02:6b8::11'); diff --git a/tests/queries/0_stateless/01666_test_FunctionIPv4NumToString_mysql_compatibility.reference b/tests/queries/0_stateless/01666_test_FunctionIPv4NumToString_mysql_compatibility.reference deleted file mode 100644 index 08674e64f67..00000000000 --- a/tests/queries/0_stateless/01666_test_FunctionIPv4NumToString_mysql_compatibility.reference +++ /dev/null @@ -1 +0,0 @@ -0.0.5.57 diff --git a/tests/queries/0_stateless/01666_test_FunctionIPv4NumToString_mysql_compatibility.sql b/tests/queries/0_stateless/01666_test_FunctionIPv4NumToString_mysql_compatibility.sql deleted file mode 100644 index 0c6608c6e74..00000000000 --- a/tests/queries/0_stateless/01666_test_FunctionIPv4NumToString_mysql_compatibility.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT INET_NTOA(toUInt32(1337)); diff --git a/tests/queries/0_stateless/01667_test_FunctionIPv4StringToNum_mysql_compatibility.reference b/tests/queries/0_stateless/01667_test_FunctionIPv4StringToNum_mysql_compatibility.reference deleted file mode 100644 index c15798a747d..00000000000 --- a/tests/queries/0_stateless/01667_test_FunctionIPv4StringToNum_mysql_compatibility.reference +++ /dev/null @@ -1 +0,0 @@ -3232235521 diff --git a/tests/queries/0_stateless/01667_test_FunctionIPv4StringToNum_mysql_compatibility.sql b/tests/queries/0_stateless/01667_test_FunctionIPv4StringToNum_mysql_compatibility.sql deleted file mode 100644 index 6a91900370c..00000000000 --- a/tests/queries/0_stateless/01667_test_FunctionIPv4StringToNum_mysql_compatibility.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT INET_ATON('192.168.0.1'); diff --git a/tests/queries/0_stateless/01710_normal_projection_fix1.reference b/tests/queries/0_stateless/01710_normal_projection_fix1.reference new file mode 100644 index 00000000000..cd121fd3feb --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_fix1.reference @@ -0,0 +1,2 @@ +1 +1 1 diff --git a/tests/queries/0_stateless/01710_normal_projection_fix1.sql b/tests/queries/0_stateless/01710_normal_projection_fix1.sql new file mode 100644 index 00000000000..b4d7c6e8734 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_fix1.sql @@ -0,0 +1,17 @@ +drop table if exists t; + +create table t (i int, j int) engine MergeTree order by i; + +insert into t values (1, 2); + +alter table t add projection x (select * order by j); + +insert into t values (1, 4); + +set allow_experimental_projection_optimization = 1, force_optimize_projection = 1; + +select i from t prewhere j = 4; + +SELECT j = 2, i FROM t PREWHERE j = 2; + +drop table t; diff --git a/tests/queries/0_stateless/01710_projection_in_index.reference b/tests/queries/0_stateless/01710_projection_in_index.reference new file mode 100644 index 00000000000..73c1df53be4 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_in_index.reference @@ -0,0 +1,2 @@ +1 1 1 +2 2 2 diff --git a/tests/queries/0_stateless/01710_projection_in_index.sql b/tests/queries/0_stateless/01710_projection_in_index.sql new file mode 100644 index 00000000000..2669d69dc9f --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_in_index.sql @@ -0,0 +1,11 @@ +drop table if exists t; + +create table t (i int, j int, k int, projection p (select * order by j)) engine MergeTree order by i settings index_granularity = 1; + +insert into t select number, number, number from numbers(10); + +set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; + +select * from t where i < 5 and j in (1, 2); + +drop table t; diff --git a/tests/queries/0_stateless/01736_null_as_default.reference b/tests/queries/0_stateless/01736_null_as_default.reference index baf83eb21d7..d201a9636ef 100644 --- a/tests/queries/0_stateless/01736_null_as_default.reference +++ b/tests/queries/0_stateless/01736_null_as_default.reference @@ -1,2 +1,5 @@ A \N +A +\N +\N diff --git a/tests/queries/0_stateless/01736_null_as_default.sql b/tests/queries/0_stateless/01736_null_as_default.sql index a00011b06d4..c897d035a50 100644 --- a/tests/queries/0_stateless/01736_null_as_default.sql +++ b/tests/queries/0_stateless/01736_null_as_default.sql @@ -2,4 +2,6 @@ drop table if exists test_enum; create table test_enum (c Nullable(Enum16('A' = 1, 'B' = 2))) engine Log; insert into test_enum values (1), (NULL); select * from test_enum; +select toString(c) from test_enum; +select toString('aaaa', NULL); drop table test_enum; diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 3bc986f4d2b..65b7bf54f7f 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -22,6 +22,7 @@ others different types -- prohibited different types -- conversion 0 +0 optimize_skip_unused_shards_limit 0 0 diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index dbe76f146b0..ea7d526c039 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -93,8 +93,6 @@ select 'errors'; -- optimize_skip_unused_shards does not support non-constants select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 } select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 } --- intHash64 does not accept string -select * from dist_01756 where dummy in ('0', '2'); -- { serverError 43 } -- NOT IN does not supported select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 } @@ -126,6 +124,8 @@ select 'different types -- conversion'; create table dist_01756_column as system.one engine=Distributed(test_cluster_two_shards, system, one, dummy); select * from dist_01756_column where dummy in (0, '255'); select * from dist_01756_column where dummy in (0, '255foo'); -- { serverError 53 } +-- intHash64 does not accept string, but implicit conversion should be done +select * from dist_01756 where dummy in ('0', '2'); -- optimize_skip_unused_shards_limit select 'optimize_skip_unused_shards_limit'; diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 7a0a0af3e05..9b2df9773ea 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -3,21 +3,21 @@ MinMax Keys: y - Condition: (y in [1, +inf)) + Condition: (y in [1, +Inf)) Parts: 4/5 Granules: 11/12 Partition Keys: y bitAnd(z, 3) - Condition: and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1]))) + Condition: and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))) Parts: 3/4 Granules: 10/11 PrimaryKey Keys: x y - Condition: and((x in [11, +inf)), (y in [1, +inf))) + Condition: and((x in [11, +Inf)), (y in [1, +Inf))) Parts: 2/3 Granules: 6/10 Skip @@ -36,7 +36,7 @@ { "Type": "MinMax", "Keys": ["y"], - "Condition": "(y in [1, +inf))", + "Condition": "(y in [1, +Inf))", "Initial Parts": 5, "Selected Parts": 4, "Initial Granules": 12, @@ -45,7 +45,7 @@ { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], - "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1])))", + "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])))", "Initial Parts": 4, "Selected Parts": 3, "Initial Granules": 11, @@ -54,7 +54,7 @@ { "Type": "PrimaryKey", "Keys": ["x", "y"], - "Condition": "and((x in [11, +inf)), (y in [1, +inf)))", + "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))", "Initial Parts": 3, "Selected Parts": 2, "Initial Granules": 10, @@ -104,6 +104,6 @@ Keys: x plus(x, y) - Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-inf, 2])) + Condition: or((x in 2-element set), (plus(plus(x, y), 1) in (-Inf, 2])) Parts: 1/1 Granules: 1/1 diff --git a/tests/queries/0_stateless/01849_geoToS2.reference b/tests/queries/0_stateless/01849_geoToS2.reference new file mode 100644 index 00000000000..08d76978791 --- /dev/null +++ b/tests/queries/0_stateless/01849_geoToS2.reference @@ -0,0 +1,42 @@ +Checking s2 index generation. +(-19.82614013111778,-41.19291183249827) 145638248314527735 +(-41.56412828256075,-16.777072680829264) 525948609053544771 +(40.76827373895363,2.853562616147134) 1723620528513494869 +(8.774109215249668,-3.271373776817451) 1913723177026859677 +(54.7243525263686,-19.21060843697615) 2414200527355011557 +(49.942295220850404,-18.63385558246411) 2446780491370050277 +(125.93850250627281,25.519361668875952) 3814912406305146887 +(51.39037388040172,29.368252361755857) 4590287096029015617 +(-165.90797307310456,54.20517787077579) 6317132534461540395 +(140.4288338857567,28.399754752831992) 6944470717484584123 +(170.14574840189854,7.606448822713084) 7280210779810726069 +(-170.42364912433663,-10.102188288980733) 8094352344009072653 +(-168.25897915006252,-38.27117023780382) 8295275405228382549 +(-46.71824230901231,5.161978621886426) 13251733624047612511 +(-64.36499761086276,-13.206225582160274) 10654167528317614093 +(-61.76193800786795,-24.969589107565216) 10670400906708524493 +(-79.24545956192031,-22.940848730236024) 10868726821406045765 +(74.00610377406458,-68.32123992734591) 12793606480989360605 +(10.610774069458158,-64.18410328814072) 13202270384266773975 +(-89.81096210929424,-57.01398354986957) 13606307743304496003 +(-19.82614,-41.19291) (-19.82614,-41.19291) ok +(-41.56413,-16.77707) (-41.56413,-16.77707) ok +(40.76827,2.85356) (40.76827,2.85356) ok +(8.77411,-3.27137) (8.77411,-3.27137) ok +(54.72435,-19.21061) (54.72435,-19.21061) ok +(49.94229,-18.63386) (49.94229,-18.63386) ok +(125.9385,25.51936) (125.9385,25.51936) ok +(51.39037,29.36825) (51.39037,29.36825) ok +(-165.90797,54.20518) (-165.90797,54.20518) ok +(140.42883,28.39976) (140.42883,28.39976) ok +(170.14575,7.60645) (170.14575,7.60645) ok +(-170.42365,-10.10219) (-170.42365,-10.10219) ok +(-168.25898,-38.27117) (-168.25898,-38.27117) ok +(5.16198,-46.71824) (5.16198,-46.71824) ok +(-64.365,-13.20623) (-64.365,-13.20623) ok +(-61.76194,-24.96959) (-61.76194,-24.96959) ok +(-79.24546,-22.94085) (-79.24546,-22.94085) ok +(74.0061,-68.32124) (74.0061,-68.32124) ok +(10.61077,-64.1841) (10.61077,-64.1841) ok +(-89.81096,-57.01398) (-89.81096,-57.01398) ok +4864204703484167331 diff --git a/tests/queries/0_stateless/01849_geoToS2.sql b/tests/queries/0_stateless/01849_geoToS2.sql new file mode 100644 index 00000000000..eb50fa81b8a --- /dev/null +++ b/tests/queries/0_stateless/01849_geoToS2.sql @@ -0,0 +1,50 @@ +DROP TABLE IF EXISTS s2_indexes; + +CREATE TABLE s2_indexes (s2_index UInt64, longitude Float64, latitude Float64) ENGINE = Memory; + +-- Random geo coordinates were generated using S2Testing::RandomPoint() method from s2 API. + +INSERT INTO s2_indexes VALUES (3814912406305146967, 125.938503, 25.519362); +INSERT INTO s2_indexes VALUES (10654167528317613967, -64.364998, -13.206226); +INSERT INTO s2_indexes VALUES (1913723177026859705, 8.774109, -3.271374); +INSERT INTO s2_indexes VALUES (13606307743304496111, -89.810962, -57.013984); +INSERT INTO s2_indexes VALUES (8094352344009072761,-170.423649, -10.102188); +INSERT INTO s2_indexes VALUES (2414200527355011659, 54.724353, -19.210608); +INSERT INTO s2_indexes VALUES (4590287096029015693, 51.390374, 29.368252); +INSERT INTO s2_indexes VALUES (10173921221664598133, 5.161979, -46.718242); +INSERT INTO s2_indexes VALUES (525948609053546189, -41.564128, -16.777073); +INSERT INTO s2_indexes VALUES (2446780491369950853, 49.94229, -18.633856); +INSERT INTO s2_indexes VALUES (1723620528513492581, 40.768274, 2.853563); +INSERT INTO s2_indexes VALUES (8295275405228383207, -168.258979, -38.271170); +INSERT INTO s2_indexes VALUES (7280210779810727639, 170.145748, 7.606449); +INSERT INTO s2_indexes VALUES (10670400906708524495, -61.761938, -24.969589); +INSERT INTO s2_indexes VALUES (10868726821406046149, -79.245460, -22.940849); +INSERT INTO s2_indexes VALUES (13202270384266773545, 10.610774, -64.184103); +INSERT INTO s2_indexes VALUES (145638248314527629, -19.826140, -41.192912); +INSERT INTO s2_indexes VALUES (12793606480989360601, 74.006104, -68.321240); +INSERT INTO s2_indexes VALUES (6317132534461540391, -165.907973, 54.205178); +INSERT INTO s2_indexes VALUES (6944470717485986643, 140.428834, 28.399755); + +SELECT 'Checking s2 index generation.'; + +SELECT s2ToGeo(s2_index), geoToS2(longitude, latitude) FROM s2_indexes ORDER BY s2_index; + +SELECT first, second, result FROM ( + SELECT + s2ToGeo(geoToS2(longitude, latitude)) AS output_geo, + tuple(roundBankers(longitude, 5), roundBankers(latitude, 5)) AS first, + tuple(roundBankers(output_geo.1, 5), roundBankers(output_geo.2, 5)) AS second, + if(first = second, 'ok', 'fail') AS result + FROM s2_indexes + ORDER BY s2_index + ); + +SELECT s2ToGeo(toUInt64(-1)); -- { serverError 36 } +SELECT s2ToGeo(nan); -- { serverError 43 } +SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); +SELECT geoToS2(nan, nan); -- { serverError 43 } +SELECT geoToS2(-inf, 1.1754943508222875e-38); -- { serverError 43 } + + + +DROP TABLE IF EXISTS s2_indexes; diff --git a/tests/queries/0_stateless/01851_s2_to_geo.reference b/tests/queries/0_stateless/01851_s2_to_geo.reference new file mode 100644 index 00000000000..75b182ebd1f --- /dev/null +++ b/tests/queries/0_stateless/01851_s2_to_geo.reference @@ -0,0 +1,2 @@ +(55.779227241803866,37.63098046233757) +(55.76324102676383,37.660183005258276) diff --git a/tests/queries/0_stateless/01851_s2_to_geo.sql b/tests/queries/0_stateless/01851_s2_to_geo.sql new file mode 100644 index 00000000000..76e4b2a5346 --- /dev/null +++ b/tests/queries/0_stateless/01851_s2_to_geo.sql @@ -0,0 +1,2 @@ +select s2ToGeo(4573520603753570041); +select s2ToGeo(4573517609713934091); diff --git a/tests/queries/0_stateless/01852_s2_get_neighbors.reference b/tests/queries/0_stateless/01852_s2_get_neighbors.reference new file mode 100644 index 00000000000..3182a1c5e00 --- /dev/null +++ b/tests/queries/0_stateless/01852_s2_get_neighbors.reference @@ -0,0 +1 @@ +[5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] diff --git a/tests/queries/0_stateless/01852_s2_get_neighbours.reference b/tests/queries/0_stateless/01852_s2_get_neighbours.reference new file mode 100644 index 00000000000..3182a1c5e00 --- /dev/null +++ b/tests/queries/0_stateless/01852_s2_get_neighbours.reference @@ -0,0 +1 @@ +[5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] diff --git a/tests/queries/0_stateless/01852_s2_get_neighbours.sql b/tests/queries/0_stateless/01852_s2_get_neighbours.sql new file mode 100644 index 00000000000..8163f827697 --- /dev/null +++ b/tests/queries/0_stateless/01852_s2_get_neighbours.sql @@ -0,0 +1 @@ +select s2GetNeighbors(5074766849661468672); diff --git a/tests/queries/0_stateless/01853_s2_cells_intersect.reference b/tests/queries/0_stateless/01853_s2_cells_intersect.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/01853_s2_cells_intersect.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/01853_s2_cells_intersect.sql b/tests/queries/0_stateless/01853_s2_cells_intersect.sql new file mode 100644 index 00000000000..2a033a67d58 --- /dev/null +++ b/tests/queries/0_stateless/01853_s2_cells_intersect.sql @@ -0,0 +1,5 @@ +select s2CellsIntersect(9926595209846587392, 9926594385212866560); +select s2CellsIntersect(9926595209846587392, 9937259648002293760); + + +SELECT s2CellsIntersect(9926595209846587392, 9223372036854775806); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01854_s2_cap_contains.reference b/tests/queries/0_stateless/01854_s2_cap_contains.reference new file mode 100644 index 00000000000..16db301bb51 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_contains.reference @@ -0,0 +1,3 @@ +1 +0 +1 diff --git a/tests/queries/0_stateless/01854_s2_cap_contains.sql b/tests/queries/0_stateless/01854_s2_cap_contains.sql new file mode 100644 index 00000000000..1a8d2548352 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_contains.sql @@ -0,0 +1,11 @@ +select s2CapContains(1157339245694594829, 1.0, 1157347770437378819); +select s2CapContains(1157339245694594829, 1.0, 1152921504606846977); +select s2CapContains(1157339245694594829, 3.14, 1157339245694594829); + +select s2CapContains(nan, 3.14, 1157339245694594829); -- { serverError 43 } +select s2CapContains(1157339245694594829, nan, 1157339245694594829); -- { serverError 43 } +select s2CapContains(1157339245694594829, 3.14, nan); -- { serverError 43 } + + +select s2CapContains(toUInt64(-1), -1.0, toUInt64(-1)); -- { serverError 36 } +select s2CapContains(toUInt64(-1), 9999.9999, toUInt64(-1)); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01854_s2_cap_union.reference b/tests/queries/0_stateless/01854_s2_cap_union.reference new file mode 100644 index 00000000000..8be71d7ba28 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_union.reference @@ -0,0 +1,3 @@ +(4534655147792050737,60.2088283994957) +(1157339245694594829,-57.29577951308232) +(1157339245694594829,180) diff --git a/tests/queries/0_stateless/01854_s2_cap_union.sql b/tests/queries/0_stateless/01854_s2_cap_union.sql new file mode 100644 index 00000000000..921a00ac663 --- /dev/null +++ b/tests/queries/0_stateless/01854_s2_cap_union.sql @@ -0,0 +1,9 @@ +select s2CapUnion(3814912406305146967, 1.0, 1157347770437378819, 1.0); +select s2CapUnion(1157339245694594829, -1.0, 1152921504606846977, -1.0); +select s2CapUnion(1157339245694594829, toFloat64(toUInt64(-1)), 1157339245694594829, toFloat64(toUInt64(-1))); + + +select s2CapUnion(nan, 3.14, 1157339245694594829, 3.14); -- { serverError 43 } +select s2CapUnion(1157339245694594829, nan, 1157339245694594829, 3.14); -- { serverError 43 } +select s2CapUnion(1157339245694594829, 3.14, nan, 3.14); -- { serverError 43 } +select s2CapUnion(1157339245694594829, 3.14, 1157339245694594829, nan); -- { serverError 43 } diff --git a/tests/queries/0_stateless/01882_total_rows_approx.reference b/tests/queries/0_stateless/01882_total_rows_approx.reference index 7f2070fc9cb..fd1fb9b7231 100644 --- a/tests/queries/0_stateless/01882_total_rows_approx.reference +++ b/tests/queries/0_stateless/01882_total_rows_approx.reference @@ -1,8 +1 @@ -Waiting for query to be started... -Query started. -Checking total_rows_approx. -10 -10 -10 -10 -10 +"total_rows_to_read":"10" diff --git a/tests/queries/0_stateless/01882_total_rows_approx.sh b/tests/queries/0_stateless/01882_total_rows_approx.sh index f51e95b15c0..26333f61692 100755 --- a/tests/queries/0_stateless/01882_total_rows_approx.sh +++ b/tests/queries/0_stateless/01882_total_rows_approx.sh @@ -1,23 +1,12 @@ #!/usr/bin/env bash -# Check that total_rows_approx (via system.processes) includes all rows from +# Check that total_rows_approx (via http headers) includes all rows from # all parts at the query start. # # At some point total_rows_approx was accounted only when the query starts # reading the part, and so total_rows_approx wasn't reliable, even for simple # SELECT FROM MergeTree() # It was fixed by take total_rows_approx into account as soon as possible. -# -# To check total_rows_approx this query starts the query in background, -# that sleep's 1 second for each part, and by using max_threads=1 the query -# reads parts sequentially and sleeps 1 second between parts. -# Also the test spawns background process to check total_rows_approx for this -# query. -# It checks multiple times since at first few iterations the query may not -# start yet (since there are 3 excessive sleep calls - 1 for primary key -# analysis and 2 for partition pruning), and get only last 5 total_rows_approx -# rows (one row is not enough since when the query finishes total_rows_approx -# will be set to 10 anyway, regardless proper accounting). CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -25,31 +14,14 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists data_01882" $CLICKHOUSE_CLIENT -q "create table data_01882 (key Int) Engine=MergeTree() partition by key order by key as select * from numbers(10)" -QUERY_ID="$CLICKHOUSE_TEST_NAME-$(tr -cd '[:lower:]' < /dev/urandom | head -c10)" - -function check_background_query() -{ - echo "Waiting for query to be started..." - while [[ $($CLICKHOUSE_CLIENT --param_query_id="$QUERY_ID" -q 'select count() from system.processes where query_id = {query_id:String}') != 1 ]]; do - sleep 0.01 - done - echo "Query started." - - echo "Checking total_rows_approx." - # check total_rows_approx multiple times - # (to make test more reliable to what it covers) - local i=0 - for ((i = 0; i < 20; ++i)); do - $CLICKHOUSE_CLIENT --param_query_id="$QUERY_ID" -q 'select total_rows_approx from system.processes where query_id = {query_id:String}' - (( ++i )) - sleep 1 - done | tail -n5 -} -check_background_query & - -# this query will sleep 10 seconds in total, 1 seconds for each part (10 parts). -$CLICKHOUSE_CLIENT -q "select *, sleepEachRow(1) from data_01882" --max_threads=1 --format Null --query_id="$QUERY_ID" --max_block_size=1 - -wait - -$CLICKHOUSE_CLIENT -q "drop table data_01882" +# send_progress_in_http_headers will periodically send the progress +# but this is not stable, i.e. it can be dumped on query end, +# thus check few times to be sure that this is not coincidence. +for _ in {1..30}; do + $CLICKHOUSE_CURL -vsS "${CLICKHOUSE_URL}&max_threads=1&default_format=Null&send_progress_in_http_headers=1&http_headers_progress_interval_ms=1" --data-binary @- <<< "select * from data_01882" |& { + grep -o -F '"total_rows_to_read":"10"' + } | { + # grep out final result + grep -v -F '"read_rows":"10"' + } +done | uniq diff --git a/tests/queries/0_stateless/01889_clickhouse_client_config_format.reference b/tests/queries/0_stateless/01889_clickhouse_client_config_format.reference index aa7748928f1..202e32a583e 100644 --- a/tests/queries/0_stateless/01889_clickhouse_client_config_format.reference +++ b/tests/queries/0_stateless/01889_clickhouse_client_config_format.reference @@ -13,4 +13,4 @@ yml yaml 2 ini -Code: 347. Unknown format of '/config_default.ini' config +Code: 347. Unknown format of '/config_default.ini' config. (CANNOT_LOAD_CONFIG) diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.reference b/tests/queries/0_stateless/01889_sqlite_read_write.reference new file mode 100644 index 00000000000..e979b5816c5 --- /dev/null +++ b/tests/queries/0_stateless/01889_sqlite_read_write.reference @@ -0,0 +1,42 @@ +create database engine +show database tables: +table1 +table2 +table3 +table4 +table5 +show creare table: +CREATE TABLE SQLite.table1\n(\n `col1` Nullable(String),\n `col2` Nullable(Int16)\n)\nENGINE = SQLite +CREATE TABLE SQLite.table2\n(\n `col1` Nullable(Int32),\n `col2` Nullable(String)\n)\nENGINE = SQLite +describe table: +col1 Nullable(String) +col2 Nullable(Int16) +col1 Nullable(Int32) +col2 Nullable(String) +select *: +line1 1 +line2 2 +line3 3 +1 text1 +2 text2 +3 text3 +test types +CREATE TABLE SQLite.table4\n(\n `a` Nullable(Int32),\n `b` Nullable(Int32),\n `c` Nullable(Int8),\n `d` Nullable(Int16),\n `e` Nullable(Int32),\n `bigint` Nullable(String),\n `int2` Nullable(String),\n `int8` Nullable(String)\n)\nENGINE = SQLite +CREATE TABLE SQLite.table5\n(\n `a` Nullable(String),\n `b` Nullable(String),\n `c` Nullable(Float64),\n `d` Nullable(Float64),\n `e` Nullable(Float64),\n `f` Nullable(Float32)\n)\nENGINE = SQLite +create table engine with table3 +CREATE TABLE default.sqlite_table3\n(\n `col1` String,\n `col2` Int32\n)\nENGINE = SQLite + 1 +not a null 2 + 3 + 4 +line6 6 + 7 +test table function +line1 1 +line2 2 +line3 3 +line4 4 +test path in clickhouse-local +line1 1 +line2 2 +line3 3 diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh new file mode 100755 index 00000000000..73b106e9eb4 --- /dev/null +++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +mkdir -p ${user_files_path}/ +chmod 777 ${user_files_path} +DB_PATH=${user_files_path}/db1 + + +sqlite3 ${DB_PATH} 'DROP TABLE IF EXISTS table1' +sqlite3 ${DB_PATH} 'DROP TABLE IF EXISTS table2' +sqlite3 ${DB_PATH} 'DROP TABLE IF EXISTS table3' +sqlite3 ${DB_PATH} 'DROP TABLE IF EXISTS table4' +sqlite3 ${DB_PATH} 'DROP TABLE IF EXISTS table5' + +sqlite3 ${DB_PATH} 'CREATE TABLE table1 (col1 text, col2 smallint);' +sqlite3 ${DB_PATH} 'CREATE TABLE table2 (col1 int, col2 text);' + +chmod ugo+w ${DB_PATH} + +sqlite3 ${DB_PATH} "INSERT INTO table1 VALUES ('line1', 1), ('line2', 2), ('line3', 3)" +sqlite3 ${DB_PATH} "INSERT INTO table2 VALUES (1, 'text1'), (2, 'text2'), (3, 'text3')" + +sqlite3 ${DB_PATH} 'CREATE TABLE table3 (col1 text, col2 int);' +sqlite3 ${DB_PATH} 'INSERT INTO table3 VALUES (NULL, 1)' +sqlite3 ${DB_PATH} "INSERT INTO table3 VALUES ('not a null', 2)" +sqlite3 ${DB_PATH} 'INSERT INTO table3 VALUES (NULL, 3)' +sqlite3 ${DB_PATH} "INSERT INTO table3 VALUES ('', 4)" + +sqlite3 ${DB_PATH} 'CREATE TABLE table4 (a int, b integer, c tinyint, d smallint, e mediumint, bigint, int2, int8)' +sqlite3 ${DB_PATH} 'CREATE TABLE table5 (a character(20), b varchar(10), c real, d double, e double precision, f float)' + + +${CLICKHOUSE_CLIENT} --query='DROP DATABASE IF EXISTS sqlite_database' + +${CLICKHOUSE_CLIENT} --query="select 'create database engine'"; +${CLICKHOUSE_CLIENT} --query="CREATE DATABASE sqlite_database ENGINE = SQLite('${DB_PATH}')" + +${CLICKHOUSE_CLIENT} --query="select 'show database tables:'"; +${CLICKHOUSE_CLIENT} --query='SHOW TABLES FROM sqlite_database;' + +${CLICKHOUSE_CLIENT} --query="select 'show creare table:'"; +${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_database.table1;' | sed -r 's/(.*SQLite)(.*)/\1/' +${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_database.table2;' | sed -r 's/(.*SQLite)(.*)/\1/' + +${CLICKHOUSE_CLIENT} --query="select 'describe table:'"; +${CLICKHOUSE_CLIENT} --query='DESCRIBE TABLE sqlite_database.table1;' +${CLICKHOUSE_CLIENT} --query='DESCRIBE TABLE sqlite_database.table2;' + +${CLICKHOUSE_CLIENT} --query="select 'select *:'"; +${CLICKHOUSE_CLIENT} --query='SELECT * FROM sqlite_database.table1 ORDER BY col2' +${CLICKHOUSE_CLIENT} --query='SELECT * FROM sqlite_database.table2 ORDER BY col1;' + +${CLICKHOUSE_CLIENT} --query="select 'test types'"; +${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_database.table4;' | sed -r 's/(.*SQLite)(.*)/\1/' +${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_database.table5;' | sed -r 's/(.*SQLite)(.*)/\1/' + +${CLICKHOUSE_CLIENT} --query='DROP DATABASE IF EXISTS sqlite_database' + + +${CLICKHOUSE_CLIENT} --query="select 'create table engine with table3'"; +${CLICKHOUSE_CLIENT} --query='DROP TABLE IF EXISTS sqlite_table3' +${CLICKHOUSE_CLIENT} --query="CREATE TABLE sqlite_table3 (col1 String, col2 Int32) ENGINE = SQLite('${DB_PATH}', 'table3')" + +${CLICKHOUSE_CLIENT} --query='SHOW CREATE TABLE sqlite_table3;' | sed -r 's/(.*SQLite)(.*)/\1/' +${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES ('line6', 6);" +${CLICKHOUSE_CLIENT} --query="INSERT INTO sqlite_table3 VALUES (NULL, 7);" + +${CLICKHOUSE_CLIENT} --query='SELECT * FROM sqlite_table3 ORDER BY col2' + + +${CLICKHOUSE_CLIENT} --query="select 'test table function'"; +${CLICKHOUSE_CLIENT} --query="INSERT INTO TABLE FUNCTION sqlite('${DB_PATH}', 'table1') SELECT 'line4', 4" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM sqlite('${DB_PATH}', 'table1') ORDER BY col2" + + +sqlite3 $CUR_DIR/db2 'DROP TABLE IF EXISTS table1' +sqlite3 $CUR_DIR/db2 'CREATE TABLE table1 (col1 text, col2 smallint);' +sqlite3 $CUR_DIR/db2 "INSERT INTO table1 VALUES ('line1', 1), ('line2', 2), ('line3', 3)" + +${CLICKHOUSE_CLIENT} --query="select 'test path in clickhouse-local'"; +${CLICKHOUSE_LOCAL} --query="SELECT * FROM sqlite('$CUR_DIR/db2', 'table1') ORDER BY col2" + +rm -r ${DB_PATH} diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference new file mode 100644 index 00000000000..b70a1cb7c75 --- /dev/null +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference @@ -0,0 +1,3 @@ +8 +4 +4 diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql new file mode 100644 index 00000000000..7aa1b0112a6 --- /dev/null +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql @@ -0,0 +1,11 @@ +CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); + +INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); + +ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1])); + +SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000; + +SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; + +SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; diff --git a/tests/queries/0_stateless/01906_h3_to_geo.reference b/tests/queries/0_stateless/01906_h3_to_geo.reference new file mode 100644 index 00000000000..93e8600576c --- /dev/null +++ b/tests/queries/0_stateless/01906_h3_to_geo.reference @@ -0,0 +1,32 @@ +(-173.6412167681162,-14.130272474941535) +(59.48137613600854,58.020407687755686) +(172.68095885060296,-83.6576608516349) +(-94.46556851304558,-69.1999982492279) +(-8.188263637093279,-55.856179102736284) +(77.25594891852249,47.39278564360122) +(135.11348004704536,36.60778126579667) +(39.28534828967223,49.07710003066973) +(124.71163478198051,-27.481172161567258) +(-147.4887686066785,76.73237945824442) +(86.63291906118863,-25.52526285188784) +(23.27751790712118,13.126101362212724) +(-70.40163237204142,-63.12562536833242) +(15.642428355535966,40.285813505163574) +(-76.53411447979884,54.5560449693637) +(8.19906334981474,67.69370966550179) +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok +ok diff --git a/tests/queries/0_stateless/01906_h3_to_geo.sql b/tests/queries/0_stateless/01906_h3_to_geo.sql new file mode 100644 index 00000000000..aa6ecca1754 --- /dev/null +++ b/tests/queries/0_stateless/01906_h3_to_geo.sql @@ -0,0 +1,61 @@ +DROP TABLE IF EXISTS h3_indexes; + +CREATE TABLE h3_indexes (h3_index UInt64) ENGINE = Memory; + +-- Random geo coordinates were generated using the H3 tool: https://github.com/ClickHouse-Extras/h3/blob/master/src/apps/testapps/mkRandGeo.c at various resolutions from 0 to 15. +-- Corresponding H3 index values were in turn generated with those geo coordinates using `geoToH3(lon, lat, res)` ClickHouse function for the following test. + +INSERT INTO h3_indexes VALUES (579205133326352383); +INSERT INTO h3_indexes VALUES (581263419093549055); +INSERT INTO h3_indexes VALUES (589753847883235327); +INSERT INTO h3_indexes VALUES (594082350283882495); +INSERT INTO h3_indexes VALUES (598372386957426687); +INSERT INTO h3_indexes VALUES (599542359671177215); +INSERT INTO h3_indexes VALUES (604296355086598143); +INSERT INTO h3_indexes VALUES (608785214872748031); +INSERT INTO h3_indexes VALUES (615732192485572607); +INSERT INTO h3_indexes VALUES (617056794467368959); +INSERT INTO h3_indexes VALUES (624586477873168383); +INSERT INTO h3_indexes VALUES (627882919484481535); +INSERT INTO h3_indexes VALUES (634600058503392255); +INSERT INTO h3_indexes VALUES (635544851677385791); +INSERT INTO h3_indexes VALUES (639763125756281263); +INSERT INTO h3_indexes VALUES (644178757620501158); + + +SELECT h3ToGeo(h3_index) FROM h3_indexes ORDER BY h3_index; + +DROP TABLE h3_indexes; + +DROP TABLE IF EXISTS h3_geo; + +-- compare if the results of h3ToGeo and geoToH3 are the same + +CREATE TABLE h3_geo(lat Float64, lon Float64, res UInt8) ENGINE = Memory; + +INSERT INTO h3_geo VALUES (-173.6412167681162, -14.130272474941535, 0); +INSERT INTO h3_geo VALUES (59.48137613600854, 58.020407687755686, 1); +INSERT INTO h3_geo VALUES (172.68095885060296, -83.6576608516349, 2); +INSERT INTO h3_geo VALUES (-94.46556851304558, -69.1999982492279, 3); +INSERT INTO h3_geo VALUES (-8.188263637093279, -55.856179102736284, 4); +INSERT INTO h3_geo VALUES (77.25594891852249, 47.39278564360122, 5); +INSERT INTO h3_geo VALUES (135.11348004704536, 36.60778126579667, 6); +INSERT INTO h3_geo VALUES (39.28534828967223, 49.07710003066973, 7); +INSERT INTO h3_geo VALUES (124.71163478198051, -27.481172161567258, 8); +INSERT INTO h3_geo VALUES (-147.4887686066785, 76.73237945824442, 9); +INSERT INTO h3_geo VALUES (86.63291906118863, -25.52526285188784, 10); +INSERT INTO h3_geo VALUES (23.27751790712118, 13.126101362212724, 11); +INSERT INTO h3_geo VALUES (-70.40163237204142, -63.12562536833242, 12); +INSERT INTO h3_geo VALUES (15.642428355535966, 40.285813505163574, 13); +INSERT INTO h3_geo VALUES (-76.53411447979884, 54.5560449693637, 14); +INSERT INTO h3_geo VALUES (8.19906334981474, 67.69370966550179, 15); + +SELECT result FROM ( + SELECT + (lat, lon) AS input_geo, + h3ToGeo(geoToH3(lat, lon, res)) AS output_geo, + if(input_geo = output_geo, 'ok', 'fail') AS result + FROM h3_geo +); + +DROP TABLE h3_geo; diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference index 3efe9079cc2..da9d93b2644 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.reference +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -280,3 +280,6 @@ -------toDate32--------- 1925-01-01 2000-01-01 1925-01-01 1925-01-01 +1925-01-01 \N +1925-01-01 +\N diff --git a/tests/queries/0_stateless/01921_datatype_date32.sql b/tests/queries/0_stateless/01921_datatype_date32.sql index 5431736fab3..18fb97b699f 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.sql +++ b/tests/queries/0_stateless/01921_datatype_date32.sql @@ -23,7 +23,7 @@ select toMinute(x1) from t1; -- { serverError 43 } select '-------toSecond---------'; select toSecond(x1) from t1; -- { serverError 43 } select '-------toStartOfDay---------'; -select toStartOfDay(x1) from t1; +select toStartOfDay(x1, 'Europe/Moscow') from t1; select '-------toMonday---------'; select toMonday(x1) from t1; select '-------toISOWeek---------'; @@ -57,21 +57,21 @@ select toStartOfHour(x1) from t1; -- { serverError 43 } select '-------toStartOfISOYear---------'; select toStartOfISOYear(x1) from t1; select '-------toRelativeYearNum---------'; -select toRelativeYearNum(x1) from t1; +select toRelativeYearNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeQuarterNum---------'; -select toRelativeQuarterNum(x1) from t1; +select toRelativeQuarterNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeMonthNum---------'; -select toRelativeMonthNum(x1) from t1; +select toRelativeMonthNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeWeekNum---------'; -select toRelativeWeekNum(x1) from t1; +select toRelativeWeekNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeDayNum---------'; -select toRelativeDayNum(x1) from t1; +select toRelativeDayNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeHourNum---------'; -select toRelativeHourNum(x1) from t1; +select toRelativeHourNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeMinuteNum---------'; -select toRelativeMinuteNum(x1) from t1; +select toRelativeMinuteNum(x1, 'Europe/Moscow') from t1; select '-------toRelativeSecondNum---------'; -select toRelativeSecondNum(x1) from t1; +select toRelativeSecondNum(x1, 'Europe/Moscow') from t1; select '-------toTime---------'; select toTime(x1) from t1; -- { serverError 43 } select '-------toYYYYMM---------'; @@ -115,4 +115,7 @@ select subtractYears(x1, 1) from t1; select '-------toDate32---------'; select toDate32('1925-01-01'), toDate32(toDate('2000-01-01')); select toDate32OrZero('1924-01-01'), toDate32OrNull('1924-01-01'); +select toDate32OrZero(''), toDate32OrNull(''); +select (select toDate32OrZero('')); +select (select toDate32OrNull('')); diff --git a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference new file mode 100644 index 00000000000..b856b079327 --- /dev/null +++ b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference @@ -0,0 +1,132 @@ +-- { echoOn } + +-- Int8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +2 1 +4 127 +-- Int8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +2 1 +4 127 +-- UInt8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +1 128 +2 1 +4 127 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 +-- UInt8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; +1 0 +1 0 +1 0 +1 0 +2 1 +4 127 +-- Int16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +1 0 +1 0 +1 0 +2 1 +4 32767 +-- Int16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +1 0 +1 0 +1 0 +2 1 +4 32767 +-- UInt16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; +1 0 +1 0 +1 0 +1 32768 +2 1 +4 32767 +4 65535 +4 65535 +4 65535 +4 65535 +4 65535 +-- UInt16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; +1 0 +1 0 +1 0 +2 1 +4 32767 +-- Int32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +1 0 +1 0 +2 1 +4 2147483647 +-- Int32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +1 0 +1 0 +2 1 +4 2147483647 +-- UInt32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; +1 0 +1 0 +1 2147483648 +2 1 +4 2147483647 +4 4294967295 +4 4294967295 +4 4294967295 +-- UInt32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; +1 0 +1 0 +2 1 +4 2147483647 +-- Int64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +1 0 +2 1 +4 9223372036854775807 +-- Int64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +1 0 +2 1 +4 9223372036854775807 +-- UInt64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; +1 0 +1 9223372036854775808 +2 1 +4 9223372036854775807 +4 18446744073709551615 +-- UInt64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; +1 0 +2 1 +4 9223372036854775807 +-- modulo(Int8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt8(id)%255) where id in (-1) order by _shard_num, id; +4 -1 +-- modulo(UInt8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt8(id)%255) where id in (-1) order by _shard_num, id; +1 -1 diff --git a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.sql new file mode 100644 index 00000000000..7e53c0c2db7 --- /dev/null +++ b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.sql @@ -0,0 +1,63 @@ +set optimize_skip_unused_shards=1; +set force_optimize_skip_unused_shards=2; + +create temporary table data (id UInt64) engine=Memory() as with [ + 0, + 1, + 0x7f, 0x80, 0xff, + 0x7fff, 0x8000, 0xffff, + 0x7fffffff, 0x80000000, 0xffffffff, + 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff +] as values select arrayJoin(values) id; + +-- { echoOn } + +-- Int8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +-- Int8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f) order by _shard_num, id; +-- UInt8, UInt8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toUInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; +-- UInt8, Int8 +select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from data), toInt8(id)) where id in (0, 1, 0x7f, 0x80, 0xff) order by _shard_num, id; + +-- Int16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +-- Int16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; +-- UInt16, UInt16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toUInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; +-- UInt16, Int16 +select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff, 0x8000, 0xffff) order by _shard_num, id; + +-- Int32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +-- Int32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; +-- UInt32, UInt32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toUInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; +-- UInt32, Int32 +select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; + +-- Int64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +-- Int64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; +-- UInt64, UInt64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toUInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; +-- UInt64, Int64 +select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; + +-- modulo(Int8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt8(id)%255) where id in (-1) order by _shard_num, id; +-- modulo(UInt8) +select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toUInt8(id)%255) where id in (-1) order by _shard_num, id; + +-- { echoOff } + +-- those two had been reported initially by amosbird: +-- (the problem is that murmurHash3_32() returns different value to toInt64(1) and toUInt64(1)) +---- error for local node +select * from remote('127.{1..4}', view(select number id from numbers(0)), bitAnd(murmurHash3_32(id), 2147483647)) where id in (2, 3); +---- error for remote node +select * from remote('127.{1..8}', view(select number id from numbers(0)), bitAnd(murmurHash3_32(id), 2147483647)) where id in (2, 3); diff --git a/tests/queries/0_stateless/01932_global_in_function.reference b/tests/queries/0_stateless/01932_global_in_function.reference new file mode 100644 index 00000000000..44e0be8e356 --- /dev/null +++ b/tests/queries/0_stateless/01932_global_in_function.reference @@ -0,0 +1,4 @@ +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/01932_global_in_function.sql b/tests/queries/0_stateless/01932_global_in_function.sql new file mode 100644 index 00000000000..467bf6c3495 --- /dev/null +++ b/tests/queries/0_stateless/01932_global_in_function.sql @@ -0,0 +1,2 @@ +select number from cluster(test_cluster_two_shards_localhost, numbers(1)) where number global in tuple(0, 1, 2, 3); +select number from cluster(test_cluster_two_shards_localhost, numbers(1)) where number global in array(0, 1, 2, 3); diff --git a/tests/queries/0_stateless/01940_totimezone_operator_monotonicity.reference b/tests/queries/0_stateless/01940_totimezone_operator_monotonicity.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01940_totimezone_operator_monotonicity.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01940_totimezone_operator_monotonicity.sql b/tests/queries/0_stateless/01940_totimezone_operator_monotonicity.sql new file mode 100644 index 00000000000..b8065947ead --- /dev/null +++ b/tests/queries/0_stateless/01940_totimezone_operator_monotonicity.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS totimezone_op_mono; +CREATE TABLE totimezone_op_mono(i int, tz String, create_time DateTime) ENGINE MergeTree PARTITION BY toDate(create_time) ORDER BY i; +INSERT INTO totimezone_op_mono VALUES (1, 'UTC', toDateTime('2020-09-01 00:00:00', 'UTC')), (2, 'UTC', toDateTime('2020-09-02 00:00:00', 'UTC')); +SET max_rows_to_read = 1; +SELECT count() FROM totimezone_op_mono WHERE toTimeZone(create_time, 'UTC') = '2020-09-01 00:00:00'; +DROP TABLE IF EXISTS totimezone_op_mono; diff --git a/tests/queries/0_stateless/01942_create_table_with_sample.reference b/tests/queries/0_stateless/01942_create_table_with_sample.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01942_create_table_with_sample.sql b/tests/queries/0_stateless/01942_create_table_with_sample.sql new file mode 100644 index 00000000000..6320edd7a31 --- /dev/null +++ b/tests/queries/0_stateless/01942_create_table_with_sample.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS sample_incorrect +(`x` UUID) +ENGINE = MergeTree +ORDER BY tuple(x) +SAMPLE BY x; -- { serverError 59 } + +DROP TABLE IF EXISTS sample_correct; +CREATE TABLE IF NOT EXISTS sample_correct +(`x` String) +ENGINE = MergeTree +ORDER BY tuple(sipHash64(x)) +SAMPLE BY sipHash64(x); + +DROP TABLE sample_correct; diff --git a/tests/queries/0_stateless/01943_log_column_sizes.reference b/tests/queries/0_stateless/01943_log_column_sizes.reference new file mode 100644 index 00000000000..91ae12e38ce --- /dev/null +++ b/tests/queries/0_stateless/01943_log_column_sizes.reference @@ -0,0 +1,6 @@ +27 +33 +105 +27 +33 +105 diff --git a/tests/queries/0_stateless/01943_log_column_sizes.sql b/tests/queries/0_stateless/01943_log_column_sizes.sql new file mode 100644 index 00000000000..c6cd48c33d9 --- /dev/null +++ b/tests/queries/0_stateless/01943_log_column_sizes.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test_log; +DROP TABLE IF EXISTS test_tiny_log; + +CREATE TABLE test_log (x UInt8, s String, a Array(Nullable(String))) ENGINE = Log; +CREATE TABLE test_tiny_log (x UInt8, s String, a Array(Nullable(String))) ENGINE = TinyLog; + +INSERT INTO test_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); +INSERT INTO test_tiny_log VALUES (64, 'Value1', ['Value2', 'Value3', NULL]); + +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_log' AND database = currentDatabase(); +SELECT data_compressed_bytes FROM system.columns WHERE table = 'test_tiny_log' AND database = currentDatabase(); + +DROP TABLE test_log; +DROP TABLE test_tiny_log; \ No newline at end of file diff --git a/tests/queries/0_stateless/01943_non_deterministic_order_key.reference b/tests/queries/0_stateless/01943_non_deterministic_order_key.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01943_non_deterministic_order_key.sql b/tests/queries/0_stateless/01943_non_deterministic_order_key.sql new file mode 100644 index 00000000000..200a88ec677 --- /dev/null +++ b/tests/queries/0_stateless/01943_non_deterministic_order_key.sql @@ -0,0 +1,3 @@ +CREATE TABLE a (number UInt64) ENGINE = MergeTree ORDER BY if(now() > toDateTime('2020-06-01 13:31:40'), toInt64(number), -number); -- { serverError 36 } +CREATE TABLE b (number UInt64) ENGINE = MergeTree ORDER BY now() > toDateTime(number); -- { serverError 36 } +CREATE TABLE c (number UInt64) ENGINE = MergeTree ORDER BY now(); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference new file mode 100644 index 00000000000..58501cbd0fc --- /dev/null +++ b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference @@ -0,0 +1,16 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql new file mode 100644 index 00000000000..ad7331ee2db --- /dev/null +++ b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql @@ -0,0 +1,19 @@ +SET max_block_size = 6, join_algorithm = 'partial_merge'; + +SELECT count() == 4 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 5 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 6 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 7 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 8 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3]) AS s) AS js2 USING (s); +SELECT count() == 9 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 10 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 11 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 12 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 13 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 14 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 15 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); + +SELECT count() == 8 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 9 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3]) AS s) AS js2 USING (s); +SELECT count() == 10 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 11 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3]) AS s) AS js2 USING (s); diff --git a/tests/queries/0_stateless/01944_range_max_elements.reference b/tests/queries/0_stateless/01944_range_max_elements.reference new file mode 100644 index 00000000000..7763ac4ce96 --- /dev/null +++ b/tests/queries/0_stateless/01944_range_max_elements.reference @@ -0,0 +1,33 @@ +[] +[0] +[0,1] +[] +[0] +[0,1] +[] +[0] +[0,1] +[] +[] +[0] +[0,1] +[] +[0] +[0,1] +[] +[0] +[0,1] +[] +[0] +[] +[0] +[0,1] +[] +[0] +[0,1] +[] +[0] +[0,1] +[] +[0] +[0,1] diff --git a/tests/queries/0_stateless/01944_range_max_elements.sql b/tests/queries/0_stateless/01944_range_max_elements.sql new file mode 100644 index 00000000000..c18f61e3190 --- /dev/null +++ b/tests/queries/0_stateless/01944_range_max_elements.sql @@ -0,0 +1,7 @@ +SET function_range_max_elements_in_block = 10; +SELECT range(number % 3) FROM numbers(10); +SELECT range(number % 3) FROM numbers(11); +SELECT range(number % 3) FROM numbers(12); -- { serverError 69 } + +SET function_range_max_elements_in_block = 12; +SELECT range(number % 3) FROM numbers(12); diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect new file mode 100755 index 00000000000..7f14fdfbc96 --- /dev/null +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -0,0 +1,50 @@ +#!/usr/bin/expect -f + +# This is a test for system.warnings. Testing in interactive mode is necessary, +# as we want to see certain warnings from client + +log_user 0 +set timeout 60 +match_max 100000 + +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +set Debug_type 0 + +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" +expect ":) " + +# Check debug type +send -- "SELECT value FROM system.build_options WHERE name='BUILD_TYPE'\r" +expect { +"Debug" { + set Debug_type 1 + expect ":) " + } +"RelWithDebInfo" +} + +send -- "q\r" +expect eof + +if { $Debug_type > 0} { + +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" +expect "Warnings:" +expect " * Server was built in debug mode. It will work slowly." +expect ":) " + +# Check debug message in system.warnings +send -- "SELECT message FROM system.warnings WHERE message='Server was built in debug mode. It will work slowly.'\r" +expect "Server was built in debug mode. It will work slowly." +expect ":) " + +send -- "q\r" +expect eof +} diff --git a/tests/queries/0_stateless/01945_show_debug_warning.reference b/tests/queries/0_stateless/01945_show_debug_warning.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01945_system_warnings.expect b/tests/queries/0_stateless/01945_system_warnings.expect new file mode 100755 index 00000000000..01a314429f8 --- /dev/null +++ b/tests/queries/0_stateless/01945_system_warnings.expect @@ -0,0 +1,40 @@ +#!/usr/bin/expect -f + +# This is a test for system.warnings. Testing in interactive mode is necessary, +# as we want to see certain warnings from client + +log_user 0 +set timeout 60 +match_max 100000 + +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" +expect ":) " + +#find out BUILD TYPE +send -- "SELECT value FROM system.build_options WHERE name='BUILD_TYPE'\r" +expect { + "Debug" { + # Check debug message in system.warnings + send -- "SELECT message FROM system.warnings WHERE message='Server was built in debug mode. It will work slowly.'\r" + expect "Server was built in debug mode. It will work slowly." + expect ":) " + } + "RelWithDebInfo" { + # Check empty to find out existence + send -- "SELECT message FROM system.warnings WHERE 0=1\r" + expect "Ok." + expect ":) " + } +} + +# Finish test +send -- "q\r" +expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01946_profile_sleep.reference b/tests/queries/0_stateless/01946_profile_sleep.reference new file mode 100644 index 00000000000..cc2d9ab80f9 --- /dev/null +++ b/tests/queries/0_stateless/01946_profile_sleep.reference @@ -0,0 +1,6 @@ +{"'SLEEP #1 CHECK'":"SLEEP #1 CHECK","calls":"1","microseconds":"1000"} +{"'SLEEP #2 CHECK'":"SLEEP #2 CHECK","calls":"1","microseconds":"1000"} +{"'SLEEP #3 CHECK'":"SLEEP #3 CHECK","calls":"1","microseconds":"1000"} +{"'SLEEP #4 CHECK'":"SLEEP #4 CHECK","calls":"2","microseconds":"2000"} +{"'SLEEP #5 CHECK'":"SLEEP #5 CHECK","calls":"0","microseconds":"0"} +{"'SLEEP #6 CHECK'":"SLEEP #6 CHECK","calls":"10","microseconds":"10000"} diff --git a/tests/queries/0_stateless/01946_profile_sleep.sql b/tests/queries/0_stateless/01946_profile_sleep.sql new file mode 100644 index 00000000000..01c203fb73e --- /dev/null +++ b/tests/queries/0_stateless/01946_profile_sleep.sql @@ -0,0 +1,65 @@ +SET log_queries=1; +SET log_profile_events=true; + +SELECT 'SLEEP #1 TEST', sleep(0.001) FORMAT Null; +SYSTEM FLUSH LOGS; +SELECT 'SLEEP #1 CHECK', ProfileEvents['SleepFunctionCalls'] as calls, ProfileEvents['SleepFunctionMicroseconds'] as microseconds +FROM system.query_log +WHERE query like '%SELECT ''SLEEP #1 TEST''%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT 'SLEEP #2 TEST', sleep(0.001) FROM numbers(2) FORMAT Null; +SYSTEM FLUSH LOGS; +SELECT 'SLEEP #2 CHECK', ProfileEvents['SleepFunctionCalls'] as calls, ProfileEvents['SleepFunctionMicroseconds'] as microseconds +FROM system.query_log +WHERE query like '%SELECT ''SLEEP #2 TEST''%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT 'SLEEP #3 TEST', sleepEachRow(0.001) FORMAT Null; +SYSTEM FLUSH LOGS; +SELECT 'SLEEP #3 CHECK', ProfileEvents['SleepFunctionCalls'] as calls, ProfileEvents['SleepFunctionMicroseconds'] as microseconds +FROM system.query_log +WHERE query like '%SELECT ''SLEEP #3 TEST''%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT 'SLEEP #4 TEST', sleepEachRow(0.001) FROM numbers(2) FORMAT Null; +SYSTEM FLUSH LOGS; +SELECT 'SLEEP #4 CHECK', ProfileEvents['SleepFunctionCalls'] as calls, ProfileEvents['SleepFunctionMicroseconds'] as microseconds +FROM system.query_log +WHERE query like '%SELECT ''SLEEP #4 TEST''%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + + +CREATE VIEW sleep_view AS SELECT sleepEachRow(0.001) FROM system.numbers; +SYSTEM FLUSH LOGS; +SELECT 'SLEEP #5 CHECK', ProfileEvents['SleepFunctionCalls'] as calls, ProfileEvents['SleepFunctionMicroseconds'] as microseconds +FROM system.query_log +WHERE query like '%CREATE VIEW sleep_view AS%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT 'SLEEP #6 TEST', sleepEachRow(0.001) FROM sleep_view LIMIT 10 FORMAT Null; +SYSTEM FLUSH LOGS; +SELECT 'SLEEP #6 CHECK', ProfileEvents['SleepFunctionCalls'] as calls, ProfileEvents['SleepFunctionMicroseconds'] as microseconds +FROM system.query_log +WHERE query like '%SELECT ''SLEEP #6 TEST''%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +DROP TABLE sleep_view; diff --git a/tests/queries/0_stateless/01946_test.reference b/tests/queries/0_stateless/01946_test.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference b/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh new file mode 100755 index 00000000000..288a3438dc9 --- /dev/null +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +MYHOSTNAME=$(hostname -f) + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query " + DROP USER IF EXISTS dns_fail_1, dns_fail_2; + CREATE USER dns_fail_1 HOST NAME 'non.existing.host.name', '${MYHOSTNAME}'; + CREATE USER dns_fail_2 HOST NAME '${MYHOSTNAME}', 'non.existing.host.name';" + +${CLICKHOUSE_CLIENT} --query "SELECT 1" --user dns_fail_1 --host ${MYHOSTNAME} + +${CLICKHOUSE_CLIENT} --query "SELECT 2" --user dns_fail_2 --host ${MYHOSTNAME} + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS dns_fail_1, dns_fail_2" diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.reference b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh new file mode 100755 index 00000000000..abca5cdfa3b --- /dev/null +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${user_files_path}/ +cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ + +${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +set max_read_buffer_size = 65536; +set input_format_parallel_parsing = 0; +select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; +set input_format_parallel_parsing = 1; +select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; +" + diff --git a/tests/queries/0_stateless/01946_tskv.reference b/tests/queries/0_stateless/01946_tskv.reference new file mode 100644 index 00000000000..5a3b19fa88f --- /dev/null +++ b/tests/queries/0_stateless/01946_tskv.reference @@ -0,0 +1 @@ +can contain = symbol diff --git a/tests/queries/0_stateless/01946_tskv.sh b/tests/queries/0_stateless/01946_tskv.sh new file mode 100755 index 00000000000..ecc18d205d2 --- /dev/null +++ b/tests/queries/0_stateless/01946_tskv.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS tskv"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE tskv (text String) ENGINE = Memory"; + +# shellcheck disable=SC2028 +echo -n 'tskv text=can contain \= symbol +' | $CLICKHOUSE_CLIENT --query="INSERT INTO tskv FORMAT TSKV"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM tskv"; +$CLICKHOUSE_CLIENT --query="DROP TABLE tskv"; diff --git a/tests/queries/0_stateless/01947_mv_subquery.reference b/tests/queries/0_stateless/01947_mv_subquery.reference new file mode 100644 index 00000000000..fe65b417907 --- /dev/null +++ b/tests/queries/0_stateless/01947_mv_subquery.reference @@ -0,0 +1,6 @@ +{"test":"1947 #1 CHECK - TRUE","sleep_calls":"0","sleep_microseconds":"0"} +{"test":"1947 #2 CHECK - TRUE","sleep_calls":"2","sleep_microseconds":"2000"} +{"test":"1947 #3 CHECK - TRUE","sleep_calls":"0","sleep_microseconds":"0"} +{"test":"1947 #1 CHECK - FALSE","sleep_calls":"0","sleep_microseconds":"0"} +{"test":"1947 #2 CHECK - FALSE","sleep_calls":"2","sleep_microseconds":"2000"} +{"test":"1947 #3 CHECK - FALSE","sleep_calls":"0","sleep_microseconds":"0"} diff --git a/tests/queries/0_stateless/01947_mv_subquery.sql b/tests/queries/0_stateless/01947_mv_subquery.sql new file mode 100644 index 00000000000..ae67e46e0ae --- /dev/null +++ b/tests/queries/0_stateless/01947_mv_subquery.sql @@ -0,0 +1,145 @@ +SET log_queries=1; +SET log_profile_events=true; + +CREATE TABLE src Engine=MergeTree ORDER BY id AS SELECT number as id, toInt32(1) as value FROM numbers(1); +CREATE TABLE dst (id UInt64, delta Int64) Engine=MergeTree ORDER BY id; + +-- First we try with default values (https://github.com/ClickHouse/ClickHouse/issues/9587) +SET use_index_for_in_with_subqueries = 1; + +CREATE MATERIALIZED VIEW src2dst_true TO dst AS +SELECT + id, + src.value - deltas_sum as delta +FROM src +LEFT JOIN +( + SELECT id, sum(delta) as deltas_sum FROM dst + WHERE id IN (SELECT id FROM src WHERE not sleepEachRow(0.001)) + GROUP BY id +) _a +USING (id); + +-- Inserting 2 numbers should require 2 calls to sleep +INSERT into src SELECT number + 100 as id, 1 FROM numbers(2); + +-- Describe should not need to call sleep +DESCRIBE ( SELECT '1947 #3 QUERY - TRUE', + id, + src.value - deltas_sum as delta + FROM src + LEFT JOIN + ( + SELECT id, sum(delta) as deltas_sum FROM dst + WHERE id IN (SELECT id FROM src WHERE not sleepEachRow(0.001)) + GROUP BY id + ) _a + USING (id) + ) FORMAT Null; + + +SYSTEM FLUSH LOGS; + +SELECT '1947 #1 CHECK - TRUE' as test, + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds +FROM system.query_log +WHERE query like '%CREATE MATERIALIZED VIEW src2dst_true%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT '1947 #2 CHECK - TRUE' as test, + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds +FROM system.query_log +WHERE query like '%INSERT into src SELECT number + 100 as id, 1 FROM numbers(2)%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT '1947 #3 CHECK - TRUE' as test, + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds +FROM system.query_log +WHERE query like '%DESCRIBE ( SELECT ''1947 #3 QUERY - TRUE'',%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +DROP TABLE src2dst_true; + + +-- Retry the same but using use_index_for_in_with_subqueries = 0 + +SET use_index_for_in_with_subqueries = 0; + +CREATE MATERIALIZED VIEW src2dst_false TO dst AS +SELECT + id, + src.value - deltas_sum as delta +FROM src +LEFT JOIN +( + SELECT id, sum(delta) as deltas_sum FROM dst + WHERE id IN (SELECT id FROM src WHERE not sleepEachRow(0.001)) + GROUP BY id +) _a +USING (id); + +-- Inserting 2 numbers should require 2 calls to sleep +INSERT into src SELECT number + 200 as id, 1 FROM numbers(2); + +-- Describe should not need to call sleep +DESCRIBE ( SELECT '1947 #3 QUERY - FALSE', + id, + src.value - deltas_sum as delta + FROM src + LEFT JOIN + ( + SELECT id, sum(delta) as deltas_sum FROM dst + WHERE id IN (SELECT id FROM src WHERE not sleepEachRow(0.001)) + GROUP BY id + ) _a + USING (id) + ) FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT '1947 #1 CHECK - FALSE' as test, + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds +FROM system.query_log +WHERE query like '%CREATE MATERIALIZED VIEW src2dst_false%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT '1947 #2 CHECK - FALSE' as test, + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds +FROM system.query_log +WHERE query like '%INSERT into src SELECT number + 200 as id, 1 FROM numbers(2)%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +SELECT '1947 #3 CHECK - FALSE' as test, + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds +FROM system.query_log +WHERE query like '%DESCRIBE ( SELECT ''1947 #3 QUERY - FALSE'',%' + AND type > 1 + AND current_database = currentDatabase() + AND event_date >= yesterday() + FORMAT JSONEachRow; + +DROP TABLE src2dst_false; + +DROP TABLE src; +DROP TABLE dst; diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 838a2da9aff..d7581cc4e07 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -215,6 +215,7 @@ 01747_join_view_filter_dictionary 01748_dictionary_table_dot 01755_client_highlight_multi_line_comment_regression +01756_optimize_skip_unused_shards_rewrite_in 00950_dict_get 01683_flat_dictionary 01681_cache_dictionary_simple_key @@ -251,6 +252,16 @@ 01924_argmax_bitmap_state 01914_exchange_dictionaries 01923_different_expression_name_alias +01930_optimize_skip_unused_shards_rewrite_in 01932_null_valid_identifier 00918_json_functions 01889_sql_json_functions +01849_geoToS2 +01851_s2_to_geo +01852_s2_get_neighbours +01853_s2_cells_intersect +01854_s2_cap_contains +01854_s2_cap_union +01428_h3_range_check +01442_h3kring_range_check +01906_h3_to_geo diff --git a/tests/queries/0_stateless/data_zstd/test_01946.zstd b/tests/queries/0_stateless/data_zstd/test_01946.zstd new file mode 100644 index 00000000000..c021b112dad Binary files /dev/null and b/tests/queries/0_stateless/data_zstd/test_01946.zstd differ diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference new file mode 100644 index 00000000000..71ddd681581 --- /dev/null +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -0,0 +1,5 @@ +test hits 1 57344 7 +test hits 1 8839168 1079 +test hits 1 835584 102 +test hits 1 8003584 977 +test hits 2 581632 71 diff --git a/tests/queries/1_stateful/00166_explain_estimate.sql b/tests/queries/1_stateful/00166_explain_estimate.sql new file mode 100644 index 00000000000..06725ff7f9f --- /dev/null +++ b/tests/queries/1_stateful/00166_explain_estimate.sql @@ -0,0 +1,5 @@ +EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID = 29103473; +EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID != 29103473; +EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID > 29103473; +EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID < 29103473; +EXPLAIN ESTIMATE SELECT count() FROM test.hits WHERE CounterID = 29103473 UNION ALL SELECT count() FROM test.hits WHERE CounterID = 1704509; diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 1b544ea20d1..f2bdd8ae52d 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -176,7 +176,8 @@ /// Requires investigation "00953_zookeeper_suetin_deduplication_bug", "01783_http_chunk_size", - "01943_query_id_check" + "01943_query_id_check", + "00166_explain_estimate" ], "polymorphic-parts": [ "01508_partition_pruning_long", /// bug, shoud be fixed @@ -485,6 +486,7 @@ "01702_system_query_log", // It's ok to execute in parallel with oter tests but not several instances of the same test. "01748_dictionary_table_dot", // creates database "00950_dict_get", + "01615_random_one_shard_insertion", "01683_flat_dictionary", "01681_cache_dictionary_simple_key", "01682_cache_dictionary_complex_key", @@ -505,6 +507,7 @@ "01824_prefer_global_in_and_join", "01870_modulo_partition_key", "01870_buffer_flush", // creates database + "01889_sqlite_read_write", "01889_postgresql_protocol_null_fields", "01889_check_row_policy_defined_using_user_function", "01921_concurrent_ttl_and_normal_merges_zookeeper_long", // heavy test, better to run sequentially @@ -513,6 +516,8 @@ "01915_create_or_replace_dictionary", "01925_test_storage_merge_aliases", "01933_client_replxx_convert_history", /// Uses non unique history file - "01902_table_function_merge_db_repr" + "01902_table_function_merge_db_repr", + "01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer", + "01946_test_wrong_host_name_access" ] } diff --git a/tests/testflows/kerberos/configs/clickhouse1/config.d/kerberos.xml b/tests/testflows/kerberos/configs/clickhouse1/config.d/kerberos.xml index ceaa497c561..e45c4519c73 100644 --- a/tests/testflows/kerberos/configs/clickhouse1/config.d/kerberos.xml +++ b/tests/testflows/kerberos/configs/clickhouse1/config.d/kerberos.xml @@ -1,5 +1,6 @@ + EXAMPLE.COM - \ No newline at end of file + diff --git a/tests/testflows/kerberos/configs/clickhouse3/config.d/kerberos.xml b/tests/testflows/kerberos/configs/clickhouse3/config.d/kerberos.xml new file mode 100644 index 00000000000..e45c4519c73 --- /dev/null +++ b/tests/testflows/kerberos/configs/clickhouse3/config.d/kerberos.xml @@ -0,0 +1,6 @@ + + + + EXAMPLE.COM + + diff --git a/tests/testflows/kerberos/configs/kerberos/etc/krb5.conf b/tests/testflows/kerberos/configs/kerberos/etc/krb5.conf index b963fc25daa..602ca76abbe 100644 --- a/tests/testflows/kerberos/configs/kerberos/etc/krb5.conf +++ b/tests/testflows/kerberos/configs/kerberos/etc/krb5.conf @@ -3,17 +3,14 @@ [libdefaults] default_realm = EXAMPLE.COM - ticket_lifetime = 24000 - dns_lookup_realm = false - dns_lookup_kdc = false - dns_fallback = false - rdns = false + ticket_lifetime = 36000 + dns_lookup_kdc = false [realms] - EXAMPLE.COM = { - kdc = kerberos - admin_server = kerberos - } + EXAMPLE.COM = { + kdc = kerberos_env_kerberos_1.krbnet + admin_server = kerberos_env_kerberos_1.krbnet + } OTHER.COM = { kdc = kerberos admin_server = kerberos @@ -22,6 +19,10 @@ [domain_realm] docker-compose_default = EXAMPLE.COM .docker-compose_default = EXAMPLE.COM + krbnet = EXAMPLE.COM + .krbnet = EXAMPLE.COM + kerberos_env_default = EXAMPLE.COM + .kerberos_env_default = EXAMPLE.COM [appdefaults] validate = false diff --git a/tests/testflows/kerberos/kerberos_env/docker-compose.yml b/tests/testflows/kerberos/kerberos_env/docker-compose.yml index d1a74662a83..e89d18a5299 100644 --- a/tests/testflows/kerberos/kerberos_env/docker-compose.yml +++ b/tests/testflows/kerberos/kerberos_env/docker-compose.yml @@ -73,3 +73,8 @@ services: condition: service_healthy kerberos: condition: service_healthy + +networks: + default: + name: krbnet + driver: bridge diff --git a/tests/testflows/kerberos/kerberos_env/kerberos-service.yml b/tests/testflows/kerberos/kerberos_env/kerberos-service.yml index 3f21e93e0b6..b34751258da 100644 --- a/tests/testflows/kerberos/kerberos_env/kerberos-service.yml +++ b/tests/testflows/kerberos/kerberos_env/kerberos-service.yml @@ -3,7 +3,6 @@ version: '2.3' services: kerberos: image: zvonand/docker-krb5-server:1.0.0 - restart: always expose: - "88" - "464" @@ -17,7 +16,7 @@ services: environment: KRB5_PASS: pwd KRB5_REALM: EXAMPLE.COM - KRB5_KDC: localhost + KRB5_KDC: 0.0.0.0 volumes: - "${CLICKHOUSE_TESTS_DIR}/configs/kerberos/etc/krb5kdc/kdc.conf:/etc/krb5kdc/kdc.conf" - "${CLICKHOUSE_TESTS_DIR}/_instances/kerberos/krb5kdc/log/kdc.log:/usr/local/var/krb5kdc/kdc.log" diff --git a/tests/testflows/kerberos/regression.py b/tests/testflows/kerberos/regression.py index ca174aaff08..0e8b0a55c2e 100755 --- a/tests/testflows/kerberos/regression.py +++ b/tests/testflows/kerberos/regression.py @@ -10,6 +10,7 @@ from helpers.argparser import argparser from kerberos.requirements.requirements import * xfails = { + "config/principal and realm specified/:": [(Fail, "https://github.com/ClickHouse/ClickHouse/issues/26197")], } @@ -43,5 +44,6 @@ def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): Feature(run=load("kerberos.tests.parallel", "parallel"), flags=TE) + if main(): regression() diff --git a/tests/testflows/kerberos/requirements/requirements.md b/tests/testflows/kerberos/requirements/requirements.md index 2121dd343b8..8f2b3b7e11e 100644 --- a/tests/testflows/kerberos/requirements/requirements.md +++ b/tests/testflows/kerberos/requirements/requirements.md @@ -9,38 +9,41 @@ * 4 [Requirements](#requirements) * 4.1 [Generic](#generic) * 4.1.1 [RQ.SRS-016.Kerberos](#rqsrs-016kerberos) - * 4.2 [Configuration](#configuration) - * 4.2.1 [RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods](#rqsrs-016kerberosconfigurationmultipleauthmethods) - * 4.2.2 [RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled](#rqsrs-016kerberosconfigurationkerberosnotenabled) - * 4.2.3 [RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections](#rqsrs-016kerberosconfigurationmultiplekerberossections) - * 4.2.4 [RQ.SRS-016.Kerberos.Configuration.WrongUserRealm](#rqsrs-016kerberosconfigurationwronguserrealm) - * 4.2.5 [RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified](#rqsrs-016kerberosconfigurationprincipalandrealmspecified) - * 4.2.6 [RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections](#rqsrs-016kerberosconfigurationmultipleprincipalsections) - * 4.2.7 [RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections](#rqsrs-016kerberosconfigurationmultiplerealmsections) - * 4.3 [Valid User](#valid-user) - * 4.3.1 [RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser](#rqsrs-016kerberosvaliduserxmlconfigureduser) - * 4.3.2 [RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser](#rqsrs-016kerberosvaliduserrbacconfigureduser) - * 4.3.3 [RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured](#rqsrs-016kerberosvaliduserkerberosnotconfigured) - * 4.4 [Invalid User](#invalid-user) - * 4.4.1 [RQ.SRS-016.Kerberos.InvalidUser](#rqsrs-016kerberosinvaliduser) - * 4.4.2 [RQ.SRS-016.Kerberos.InvalidUser.UserDeleted](#rqsrs-016kerberosinvaliduseruserdeleted) - * 4.5 [Kerberos Not Available](#kerberos-not-available) - * 4.5.1 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket](#rqsrs-016kerberoskerberosnotavailableinvalidserverticket) - * 4.5.2 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket](#rqsrs-016kerberoskerberosnotavailableinvalidclientticket) - * 4.5.3 [RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets](#rqsrs-016kerberoskerberosnotavailablevalidtickets) - * 4.6 [Kerberos Restarted](#kerberos-restarted) - * 4.6.1 [RQ.SRS-016.Kerberos.KerberosServerRestarted](#rqsrs-016kerberoskerberosserverrestarted) - * 4.7 [Performance](#performance) - * 4.7.1 [RQ.SRS-016.Kerberos.Performance](#rqsrs-016kerberosperformance) - * 4.8 [Parallel Requests processing](#parallel-requests-processing) - * 4.8.1 [RQ.SRS-016.Kerberos.Parallel](#rqsrs-016kerberosparallel) - * 4.8.2 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos](#rqsrs-016kerberosparallelvalidrequestskerberosandnonkerberos) - * 4.8.3 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials](#rqsrs-016kerberosparallelvalidrequestssamecredentials) - * 4.8.4 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials](#rqsrs-016kerberosparallelvalidrequestsdifferentcredentials) - * 4.8.5 [RQ.SRS-016.Kerberos.Parallel.ValidInvalid](#rqsrs-016kerberosparallelvalidinvalid) - * 4.8.6 [RQ.SRS-016.Kerberos.Parallel.Deletion](#rqsrs-016kerberosparalleldeletion) + * 4.2 [Ping](#ping) + * 4.2.1 [RQ.SRS-016.Kerberos.Ping](#rqsrs-016kerberosping) + * 4.3 [Configuration](#configuration) + * 4.3.1 [RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods](#rqsrs-016kerberosconfigurationmultipleauthmethods) + * 4.3.2 [RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled](#rqsrs-016kerberosconfigurationkerberosnotenabled) + * 4.3.3 [RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections](#rqsrs-016kerberosconfigurationmultiplekerberossections) + * 4.3.4 [RQ.SRS-016.Kerberos.Configuration.WrongUserRealm](#rqsrs-016kerberosconfigurationwronguserrealm) + * 4.3.5 [RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified](#rqsrs-016kerberosconfigurationprincipalandrealmspecified) + * 4.3.6 [RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections](#rqsrs-016kerberosconfigurationmultipleprincipalsections) + * 4.3.7 [RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections](#rqsrs-016kerberosconfigurationmultiplerealmsections) + * 4.4 [Valid User](#valid-user) + * 4.4.1 [RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser](#rqsrs-016kerberosvaliduserxmlconfigureduser) + * 4.4.2 [RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser](#rqsrs-016kerberosvaliduserrbacconfigureduser) + * 4.4.3 [RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured](#rqsrs-016kerberosvaliduserkerberosnotconfigured) + * 4.5 [Invalid User](#invalid-user) + * 4.5.1 [RQ.SRS-016.Kerberos.InvalidUser](#rqsrs-016kerberosinvaliduser) + * 4.5.2 [RQ.SRS-016.Kerberos.InvalidUser.UserDeleted](#rqsrs-016kerberosinvaliduseruserdeleted) + * 4.6 [Kerberos Not Available](#kerberos-not-available) + * 4.6.1 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket](#rqsrs-016kerberoskerberosnotavailableinvalidserverticket) + * 4.6.2 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket](#rqsrs-016kerberoskerberosnotavailableinvalidclientticket) + * 4.6.3 [RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets](#rqsrs-016kerberoskerberosnotavailablevalidtickets) + * 4.7 [Kerberos Restarted](#kerberos-restarted) + * 4.7.1 [RQ.SRS-016.Kerberos.KerberosServerRestarted](#rqsrs-016kerberoskerberosserverrestarted) + * 4.8 [Performance](#performance) + * 4.8.1 [RQ.SRS-016.Kerberos.Performance](#rqsrs-016kerberosperformance) + * 4.9 [Parallel Requests processing](#parallel-requests-processing) + * 4.9.1 [RQ.SRS-016.Kerberos.Parallel](#rqsrs-016kerberosparallel) + * 4.9.2 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos](#rqsrs-016kerberosparallelvalidrequestskerberosandnonkerberos) + * 4.9.3 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials](#rqsrs-016kerberosparallelvalidrequestssamecredentials) + * 4.9.4 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials](#rqsrs-016kerberosparallelvalidrequestsdifferentcredentials) + * 4.9.5 [RQ.SRS-016.Kerberos.Parallel.ValidInvalid](#rqsrs-016kerberosparallelvalidinvalid) + * 4.9.6 [RQ.SRS-016.Kerberos.Parallel.Deletion](#rqsrs-016kerberosparalleldeletion) * 5 [References](#references) + ## Revision History This document is stored in an electronic form using [Git] source control management software @@ -85,6 +88,13 @@ version: 1.0 [ClickHouse] SHALL support user authentication using [Kerberos] server. +### Ping + +#### RQ.SRS-016.Kerberos.Ping +version: 1.0 + +Docker containers SHALL be able to ping each other. + ### Configuration #### RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods @@ -278,4 +288,3 @@ version: 1.0 [Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/kerberos/requirements/requirements.md [Git]: https://git-scm.com/ [Kerberos terminology]: https://web.mit.edu/kerberos/kfw-4.1/kfw-4.1/kfw-4.1-help/html/kerberos_terminology.htm - diff --git a/tests/testflows/kerberos/requirements/requirements.py b/tests/testflows/kerberos/requirements/requirements.py index 5c49e7d127f..418c51ca8b3 100644 --- a/tests/testflows/kerberos/requirements/requirements.py +++ b/tests/testflows/kerberos/requirements/requirements.py @@ -1,6 +1,6 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.201216.1172002. +# document by TestFlows v1.6.210312.1172513. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. from testflows.core import Specification @@ -23,6 +23,21 @@ RQ_SRS_016_Kerberos = Requirement( level=3, num='4.1.1') +RQ_SRS_016_Kerberos_Ping = Requirement( + name='RQ.SRS-016.Kerberos.Ping', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + 'Docker containers SHALL be able to ping each other.\n' + '\n' + ), + link=None, + level=3, + num='4.2.1') + RQ_SRS_016_Kerberos_Configuration_MultipleAuthMethods = Requirement( name='RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods', version='1.0', @@ -36,7 +51,7 @@ RQ_SRS_016_Kerberos_Configuration_MultipleAuthMethods = Requirement( ), link=None, level=3, - num='4.2.1') + num='4.3.1') RQ_SRS_016_Kerberos_Configuration_KerberosNotEnabled = Requirement( name='RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled', @@ -74,7 +89,7 @@ RQ_SRS_016_Kerberos_Configuration_KerberosNotEnabled = Requirement( ), link=None, level=3, - num='4.2.2') + num='4.3.2') RQ_SRS_016_Kerberos_Configuration_MultipleKerberosSections = Requirement( name='RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections', @@ -89,7 +104,7 @@ RQ_SRS_016_Kerberos_Configuration_MultipleKerberosSections = Requirement( ), link=None, level=3, - num='4.2.3') + num='4.3.3') RQ_SRS_016_Kerberos_Configuration_WrongUserRealm = Requirement( name='RQ.SRS-016.Kerberos.Configuration.WrongUserRealm', @@ -104,7 +119,7 @@ RQ_SRS_016_Kerberos_Configuration_WrongUserRealm = Requirement( ), link=None, level=3, - num='4.2.4') + num='4.3.4') RQ_SRS_016_Kerberos_Configuration_PrincipalAndRealmSpecified = Requirement( name='RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified', @@ -119,7 +134,7 @@ RQ_SRS_016_Kerberos_Configuration_PrincipalAndRealmSpecified = Requirement( ), link=None, level=3, - num='4.2.5') + num='4.3.5') RQ_SRS_016_Kerberos_Configuration_MultiplePrincipalSections = Requirement( name='RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections', @@ -134,7 +149,7 @@ RQ_SRS_016_Kerberos_Configuration_MultiplePrincipalSections = Requirement( ), link=None, level=3, - num='4.2.6') + num='4.3.6') RQ_SRS_016_Kerberos_Configuration_MultipleRealmSections = Requirement( name='RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections', @@ -149,7 +164,7 @@ RQ_SRS_016_Kerberos_Configuration_MultipleRealmSections = Requirement( ), link=None, level=3, - num='4.2.7') + num='4.3.7') RQ_SRS_016_Kerberos_ValidUser_XMLConfiguredUser = Requirement( name='RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser', @@ -179,7 +194,7 @@ RQ_SRS_016_Kerberos_ValidUser_XMLConfiguredUser = Requirement( ), link=None, level=3, - num='4.3.1') + num='4.4.1') RQ_SRS_016_Kerberos_ValidUser_RBACConfiguredUser = Requirement( name='RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser', @@ -204,7 +219,7 @@ RQ_SRS_016_Kerberos_ValidUser_RBACConfiguredUser = Requirement( ), link=None, level=3, - num='4.3.2') + num='4.4.2') RQ_SRS_016_Kerberos_ValidUser_KerberosNotConfigured = Requirement( name='RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured', @@ -219,7 +234,7 @@ RQ_SRS_016_Kerberos_ValidUser_KerberosNotConfigured = Requirement( ), link=None, level=3, - num='4.3.3') + num='4.4.3') RQ_SRS_016_Kerberos_InvalidUser = Requirement( name='RQ.SRS-016.Kerberos.InvalidUser', @@ -234,7 +249,7 @@ RQ_SRS_016_Kerberos_InvalidUser = Requirement( ), link=None, level=3, - num='4.4.1') + num='4.5.1') RQ_SRS_016_Kerberos_InvalidUser_UserDeleted = Requirement( name='RQ.SRS-016.Kerberos.InvalidUser.UserDeleted', @@ -249,7 +264,7 @@ RQ_SRS_016_Kerberos_InvalidUser_UserDeleted = Requirement( ), link=None, level=3, - num='4.4.2') + num='4.5.2') RQ_SRS_016_Kerberos_KerberosNotAvailable_InvalidServerTicket = Requirement( name='RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket', @@ -264,7 +279,7 @@ RQ_SRS_016_Kerberos_KerberosNotAvailable_InvalidServerTicket = Requirement( ), link=None, level=3, - num='4.5.1') + num='4.6.1') RQ_SRS_016_Kerberos_KerberosNotAvailable_InvalidClientTicket = Requirement( name='RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket', @@ -279,7 +294,7 @@ RQ_SRS_016_Kerberos_KerberosNotAvailable_InvalidClientTicket = Requirement( ), link=None, level=3, - num='4.5.2') + num='4.6.2') RQ_SRS_016_Kerberos_KerberosNotAvailable_ValidTickets = Requirement( name='RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets', @@ -294,7 +309,7 @@ RQ_SRS_016_Kerberos_KerberosNotAvailable_ValidTickets = Requirement( ), link=None, level=3, - num='4.5.3') + num='4.6.3') RQ_SRS_016_Kerberos_KerberosServerRestarted = Requirement( name='RQ.SRS-016.Kerberos.KerberosServerRestarted', @@ -309,7 +324,7 @@ RQ_SRS_016_Kerberos_KerberosServerRestarted = Requirement( ), link=None, level=3, - num='4.6.1') + num='4.7.1') RQ_SRS_016_Kerberos_Performance = Requirement( name='RQ.SRS-016.Kerberos.Performance', @@ -324,7 +339,7 @@ RQ_SRS_016_Kerberos_Performance = Requirement( ), link=None, level=3, - num='4.7.1') + num='4.8.1') RQ_SRS_016_Kerberos_Parallel = Requirement( name='RQ.SRS-016.Kerberos.Parallel', @@ -339,7 +354,7 @@ RQ_SRS_016_Kerberos_Parallel = Requirement( ), link=None, level=3, - num='4.8.1') + num='4.9.1') RQ_SRS_016_Kerberos_Parallel_ValidRequests_KerberosAndNonKerberos = Requirement( name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos', @@ -354,7 +369,7 @@ RQ_SRS_016_Kerberos_Parallel_ValidRequests_KerberosAndNonKerberos = Requirement( ), link=None, level=3, - num='4.8.2') + num='4.9.2') RQ_SRS_016_Kerberos_Parallel_ValidRequests_SameCredentials = Requirement( name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials', @@ -369,7 +384,7 @@ RQ_SRS_016_Kerberos_Parallel_ValidRequests_SameCredentials = Requirement( ), link=None, level=3, - num='4.8.3') + num='4.9.3') RQ_SRS_016_Kerberos_Parallel_ValidRequests_DifferentCredentials = Requirement( name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials', @@ -384,7 +399,7 @@ RQ_SRS_016_Kerberos_Parallel_ValidRequests_DifferentCredentials = Requirement( ), link=None, level=3, - num='4.8.4') + num='4.9.4') RQ_SRS_016_Kerberos_Parallel_ValidInvalid = Requirement( name='RQ.SRS-016.Kerberos.Parallel.ValidInvalid', @@ -399,7 +414,7 @@ RQ_SRS_016_Kerberos_Parallel_ValidInvalid = Requirement( ), link=None, level=3, - num='4.8.5') + num='4.9.5') RQ_SRS_016_Kerberos_Parallel_Deletion = Requirement( name='RQ.SRS-016.Kerberos.Parallel.Deletion', @@ -414,17 +429,17 @@ RQ_SRS_016_Kerberos_Parallel_Deletion = Requirement( ), link=None, level=3, - num='4.8.6') + num='4.9.6') QA_SRS016_ClickHouse_Kerberos_Authentication = Specification( name='QA-SRS016 ClickHouse Kerberos Authentication', description=None, - author='Andrey Zvonov', - date='December 14, 2020', - status='-', - approved_by='-', - approved_date='-', - approved_version='-', + author=None, + date=None, + status=None, + approved_by=None, + approved_date=None, + approved_version=None, version=None, group=None, type=None, @@ -439,40 +454,43 @@ QA_SRS016_ClickHouse_Kerberos_Authentication = Specification( Heading(name='Requirements', level=1, num='4'), Heading(name='Generic', level=2, num='4.1'), Heading(name='RQ.SRS-016.Kerberos', level=3, num='4.1.1'), - Heading(name='Configuration', level=2, num='4.2'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods', level=3, num='4.2.1'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled', level=3, num='4.2.2'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections', level=3, num='4.2.3'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.WrongUserRealm', level=3, num='4.2.4'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified', level=3, num='4.2.5'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections', level=3, num='4.2.6'), - Heading(name='RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections', level=3, num='4.2.7'), - Heading(name='Valid User', level=2, num='4.3'), - Heading(name='RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser', level=3, num='4.3.1'), - Heading(name='RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser', level=3, num='4.3.2'), - Heading(name='RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured', level=3, num='4.3.3'), - Heading(name='Invalid User', level=2, num='4.4'), - Heading(name='RQ.SRS-016.Kerberos.InvalidUser', level=3, num='4.4.1'), - Heading(name='RQ.SRS-016.Kerberos.InvalidUser.UserDeleted', level=3, num='4.4.2'), - Heading(name='Kerberos Not Available', level=2, num='4.5'), - Heading(name='RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket', level=3, num='4.5.1'), - Heading(name='RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket', level=3, num='4.5.2'), - Heading(name='RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets', level=3, num='4.5.3'), - Heading(name='Kerberos Restarted', level=2, num='4.6'), - Heading(name='RQ.SRS-016.Kerberos.KerberosServerRestarted', level=3, num='4.6.1'), - Heading(name='Performance', level=2, num='4.7'), - Heading(name='RQ.SRS-016.Kerberos.Performance', level=3, num='4.7.1'), - Heading(name='Parallel Requests processing', level=2, num='4.8'), - Heading(name='RQ.SRS-016.Kerberos.Parallel', level=3, num='4.8.1'), - Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos', level=3, num='4.8.2'), - Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials', level=3, num='4.8.3'), - Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials', level=3, num='4.8.4'), - Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidInvalid', level=3, num='4.8.5'), - Heading(name='RQ.SRS-016.Kerberos.Parallel.Deletion', level=3, num='4.8.6'), + Heading(name='Ping', level=2, num='4.2'), + Heading(name='RQ.SRS-016.Kerberos.Ping', level=3, num='4.2.1'), + Heading(name='Configuration', level=2, num='4.3'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods', level=3, num='4.3.1'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled', level=3, num='4.3.2'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections', level=3, num='4.3.3'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.WrongUserRealm', level=3, num='4.3.4'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified', level=3, num='4.3.5'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections', level=3, num='4.3.6'), + Heading(name='RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections', level=3, num='4.3.7'), + Heading(name='Valid User', level=2, num='4.4'), + Heading(name='RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser', level=3, num='4.4.1'), + Heading(name='RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser', level=3, num='4.4.2'), + Heading(name='RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured', level=3, num='4.4.3'), + Heading(name='Invalid User', level=2, num='4.5'), + Heading(name='RQ.SRS-016.Kerberos.InvalidUser', level=3, num='4.5.1'), + Heading(name='RQ.SRS-016.Kerberos.InvalidUser.UserDeleted', level=3, num='4.5.2'), + Heading(name='Kerberos Not Available', level=2, num='4.6'), + Heading(name='RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket', level=3, num='4.6.1'), + Heading(name='RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket', level=3, num='4.6.2'), + Heading(name='RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets', level=3, num='4.6.3'), + Heading(name='Kerberos Restarted', level=2, num='4.7'), + Heading(name='RQ.SRS-016.Kerberos.KerberosServerRestarted', level=3, num='4.7.1'), + Heading(name='Performance', level=2, num='4.8'), + Heading(name='RQ.SRS-016.Kerberos.Performance', level=3, num='4.8.1'), + Heading(name='Parallel Requests processing', level=2, num='4.9'), + Heading(name='RQ.SRS-016.Kerberos.Parallel', level=3, num='4.9.1'), + Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos', level=3, num='4.9.2'), + Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials', level=3, num='4.9.3'), + Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials', level=3, num='4.9.4'), + Heading(name='RQ.SRS-016.Kerberos.Parallel.ValidInvalid', level=3, num='4.9.5'), + Heading(name='RQ.SRS-016.Kerberos.Parallel.Deletion', level=3, num='4.9.6'), Heading(name='References', level=1, num='5'), ), requirements=( RQ_SRS_016_Kerberos, + RQ_SRS_016_Kerberos_Ping, RQ_SRS_016_Kerberos_Configuration_MultipleAuthMethods, RQ_SRS_016_Kerberos_Configuration_KerberosNotEnabled, RQ_SRS_016_Kerberos_Configuration_MultipleKerberosSections, @@ -501,25 +519,6 @@ QA_SRS016_ClickHouse_Kerberos_Authentication = Specification( # QA-SRS016 ClickHouse Kerberos Authentication # Software Requirements Specification -(c) 2020 Altinity LTD. All Rights Reserved. - -**Document status:** Confidential - -**Author:** Andrey Zvonov - -**Date:** December 14, 2020 - -## Approval - -**Status:** - - -**Version:** - - -**Approved by:** - - -**Date:** - - - ## Table of Contents * 1 [Revision History](#revision-history) @@ -528,47 +527,50 @@ QA_SRS016_ClickHouse_Kerberos_Authentication = Specification( * 4 [Requirements](#requirements) * 4.1 [Generic](#generic) * 4.1.1 [RQ.SRS-016.Kerberos](#rqsrs-016kerberos) - * 4.2 [Configuration](#configuration) - * 4.2.1 [RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods](#rqsrs-016kerberosconfigurationmultipleauthmethods) - * 4.2.2 [RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled](#rqsrs-016kerberosconfigurationkerberosnotenabled) - * 4.2.3 [RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections](#rqsrs-016kerberosconfigurationmultiplekerberossections) - * 4.2.4 [RQ.SRS-016.Kerberos.Configuration.WrongUserRealm](#rqsrs-016kerberosconfigurationwronguserrealm) - * 4.2.5 [RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified](#rqsrs-016kerberosconfigurationprincipalandrealmspecified) - * 4.2.6 [RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections](#rqsrs-016kerberosconfigurationmultipleprincipalsections) - * 4.2.7 [RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections](#rqsrs-016kerberosconfigurationmultiplerealmsections) - * 4.3 [Valid User](#valid-user) - * 4.3.1 [RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser](#rqsrs-016kerberosvaliduserxmlconfigureduser) - * 4.3.2 [RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser](#rqsrs-016kerberosvaliduserrbacconfigureduser) - * 4.3.3 [RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured](#rqsrs-016kerberosvaliduserkerberosnotconfigured) - * 4.4 [Invalid User](#invalid-user) - * 4.4.1 [RQ.SRS-016.Kerberos.InvalidUser](#rqsrs-016kerberosinvaliduser) - * 4.4.2 [RQ.SRS-016.Kerberos.InvalidUser.UserDeleted](#rqsrs-016kerberosinvaliduseruserdeleted) - * 4.5 [Kerberos Not Available](#kerberos-not-available) - * 4.5.1 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket](#rqsrs-016kerberoskerberosnotavailableinvalidserverticket) - * 4.5.2 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket](#rqsrs-016kerberoskerberosnotavailableinvalidclientticket) - * 4.5.3 [RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets](#rqsrs-016kerberoskerberosnotavailablevalidtickets) - * 4.6 [Kerberos Restarted](#kerberos-restarted) - * 4.6.1 [RQ.SRS-016.Kerberos.KerberosServerRestarted](#rqsrs-016kerberoskerberosserverrestarted) - * 4.7 [Performance](#performance) - * 4.7.1 [RQ.SRS-016.Kerberos.Performance](#rqsrs-016kerberosperformance) - * 4.8 [Parallel Requests processing](#parallel-requests-processing) - * 4.8.1 [RQ.SRS-016.Kerberos.Parallel](#rqsrs-016kerberosparallel) - * 4.8.2 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos](#rqsrs-016kerberosparallelvalidrequestskerberosandnonkerberos) - * 4.8.3 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials](#rqsrs-016kerberosparallelvalidrequestssamecredentials) - * 4.8.4 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials](#rqsrs-016kerberosparallelvalidrequestsdifferentcredentials) - * 4.8.5 [RQ.SRS-016.Kerberos.Parallel.ValidInvalid](#rqsrs-016kerberosparallelvalidinvalid) - * 4.8.6 [RQ.SRS-016.Kerberos.Parallel.Deletion](#rqsrs-016kerberosparalleldeletion) + * 4.2 [Ping](#ping) + * 4.2.1 [RQ.SRS-016.Kerberos.Ping](#rqsrs-016kerberosping) + * 4.3 [Configuration](#configuration) + * 4.3.1 [RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods](#rqsrs-016kerberosconfigurationmultipleauthmethods) + * 4.3.2 [RQ.SRS-016.Kerberos.Configuration.KerberosNotEnabled](#rqsrs-016kerberosconfigurationkerberosnotenabled) + * 4.3.3 [RQ.SRS-016.Kerberos.Configuration.MultipleKerberosSections](#rqsrs-016kerberosconfigurationmultiplekerberossections) + * 4.3.4 [RQ.SRS-016.Kerberos.Configuration.WrongUserRealm](#rqsrs-016kerberosconfigurationwronguserrealm) + * 4.3.5 [RQ.SRS-016.Kerberos.Configuration.PrincipalAndRealmSpecified](#rqsrs-016kerberosconfigurationprincipalandrealmspecified) + * 4.3.6 [RQ.SRS-016.Kerberos.Configuration.MultiplePrincipalSections](#rqsrs-016kerberosconfigurationmultipleprincipalsections) + * 4.3.7 [RQ.SRS-016.Kerberos.Configuration.MultipleRealmSections](#rqsrs-016kerberosconfigurationmultiplerealmsections) + * 4.4 [Valid User](#valid-user) + * 4.4.1 [RQ.SRS-016.Kerberos.ValidUser.XMLConfiguredUser](#rqsrs-016kerberosvaliduserxmlconfigureduser) + * 4.4.2 [RQ.SRS-016.Kerberos.ValidUser.RBACConfiguredUser](#rqsrs-016kerberosvaliduserrbacconfigureduser) + * 4.4.3 [RQ.SRS-016.Kerberos.ValidUser.KerberosNotConfigured](#rqsrs-016kerberosvaliduserkerberosnotconfigured) + * 4.5 [Invalid User](#invalid-user) + * 4.5.1 [RQ.SRS-016.Kerberos.InvalidUser](#rqsrs-016kerberosinvaliduser) + * 4.5.2 [RQ.SRS-016.Kerberos.InvalidUser.UserDeleted](#rqsrs-016kerberosinvaliduseruserdeleted) + * 4.6 [Kerberos Not Available](#kerberos-not-available) + * 4.6.1 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidServerTicket](#rqsrs-016kerberoskerberosnotavailableinvalidserverticket) + * 4.6.2 [RQ.SRS-016.Kerberos.KerberosNotAvailable.InvalidClientTicket](#rqsrs-016kerberoskerberosnotavailableinvalidclientticket) + * 4.6.3 [RQ.SRS-016.Kerberos.KerberosNotAvailable.ValidTickets](#rqsrs-016kerberoskerberosnotavailablevalidtickets) + * 4.7 [Kerberos Restarted](#kerberos-restarted) + * 4.7.1 [RQ.SRS-016.Kerberos.KerberosServerRestarted](#rqsrs-016kerberoskerberosserverrestarted) + * 4.8 [Performance](#performance) + * 4.8.1 [RQ.SRS-016.Kerberos.Performance](#rqsrs-016kerberosperformance) + * 4.9 [Parallel Requests processing](#parallel-requests-processing) + * 4.9.1 [RQ.SRS-016.Kerberos.Parallel](#rqsrs-016kerberosparallel) + * 4.9.2 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.KerberosAndNonKerberos](#rqsrs-016kerberosparallelvalidrequestskerberosandnonkerberos) + * 4.9.3 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.SameCredentials](#rqsrs-016kerberosparallelvalidrequestssamecredentials) + * 4.9.4 [RQ.SRS-016.Kerberos.Parallel.ValidRequests.DifferentCredentials](#rqsrs-016kerberosparallelvalidrequestsdifferentcredentials) + * 4.9.5 [RQ.SRS-016.Kerberos.Parallel.ValidInvalid](#rqsrs-016kerberosparallelvalidinvalid) + * 4.9.6 [RQ.SRS-016.Kerberos.Parallel.Deletion](#rqsrs-016kerberosparalleldeletion) * 5 [References](#references) + ## Revision History This document is stored in an electronic form using [Git] source control management software -hosted in a [GitLab Repository]. +hosted in a [GitHub Repository]. All the updates are tracked using the [Git]'s [Revision History]. ## Introduction -This document specifies the behavior for authenticating existing users using [Kerberos] authentication protocol. +This document specifies the behavior for authenticating existing users via [Kerberos] authentication protocol. Existing [ClickHouse] users, that are properly configured, have an ability to authenticate using [Kerberos]. Kerberos authentication is only supported for HTTP requests, and users configured to authenticate via Kerberos cannot be authenticated by any other means of authentication. In order to use Kerberos authentication, Kerberos needs to be properly configured in the environment: Kerberos server must be present and user's and server's credentials must be set up. Configuring the Kerberos environment is outside the scope of this document. @@ -604,6 +606,13 @@ version: 1.0 [ClickHouse] SHALL support user authentication using [Kerberos] server. +### Ping + +#### RQ.SRS-016.Kerberos.Ping +version: 1.0 + +Docker containers SHALL be able to ping each other. + ### Configuration #### RQ.SRS-016.Kerberos.Configuration.MultipleAuthMethods @@ -784,17 +793,17 @@ version: 1.0 ## References * **ClickHouse:** https://clickhouse.tech -* **Gitlab Repository:** https://gitlab.com/altinity-qa/documents/qa-srs016-clickhouse-kerberos-authentication/-/blob/master/QA_SRS016_ClickHouse_Kerberos_Authentication.md -* **Revision History:** https://gitlab.com/altinity-qa/documents/qa-srs016-clickhouse-kerberos-authentication/-/commits/master/QA_SRS016_ClickHouse_Kerberos_Authentication.md +* **GitHub Repository:** https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/kerberos/requirements/requirements.md +* **Revision History:** https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/kerberos/requirements/requirements.md * **Git:** https://git-scm.com/ * **Kerberos terminology:** https://web.mit.edu/kerberos/kfw-4.1/kfw-4.1/kfw-4.1-help/html/kerberos_terminology.htm [Kerberos]: https://en.wikipedia.org/wiki/Kerberos_(protocol) [SPNEGO]: https://en.wikipedia.org/wiki/SPNEGO [ClickHouse]: https://clickhouse.tech -[GitLab]: https://gitlab.com -[GitLab Repository]: https://gitlab.com/altinity-qa/documents/qa-srs016-clickhouse-kerberos-authentication/-/blob/master/QA_SRS016_ClickHouse_Kerberos_Authentication.md -[Revision History]: https://gitlab.com/altinity-qa/documents/qa-srs016-clickhouse-kerberos-authentication/-/commits/master/QA_SRS016_ClickHouse_Kerberos_Authentication.md +[GitHub]: https://gitlab.com +[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/kerberos/requirements/requirements.md +[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/kerberos/requirements/requirements.md [Git]: https://git-scm.com/ [Kerberos terminology]: https://web.mit.edu/kerberos/kfw-4.1/kfw-4.1/kfw-4.1-help/html/kerberos_terminology.htm ''') diff --git a/tests/testflows/kerberos/tests/common.py b/tests/testflows/kerberos/tests/common.py index e768a78cad5..8b72f1c2ffd 100644 --- a/tests/testflows/kerberos/tests/common.py +++ b/tests/testflows/kerberos/tests/common.py @@ -68,8 +68,8 @@ def create_server_principal(self, node): """ try: node.cmd("echo pwd | kinit admin/admin") - node.cmd(f"kadmin -w pwd -q \"add_principal -randkey HTTP/docker-compose_{node.name}_1.docker-compose_default\"") - node.cmd(f"kadmin -w pwd -q \"ktadd -k /etc/krb5.keytab HTTP/docker-compose_{node.name}_1.docker-compose_default\"") + node.cmd(f"kadmin -w pwd -q \"add_principal -randkey HTTP/kerberos_env_{node.name}_1.krbnet\"") + node.cmd(f"kadmin -w pwd -q \"ktadd -k /etc/krb5.keytab HTTP/kerberos_env_{node.name}_1.krbnet\"") yield finally: node.cmd("kdestroy") @@ -170,7 +170,7 @@ def check_wrong_config(self, node, client, config_path, modify_file, log_error=" config_contents = xmltree.tostring(root, encoding='utf8', method='xml').decode('utf-8') command = f"cat < {full_config_path}\n{config_contents}\nHEREDOC" node.command(command, steps=False, exitcode=0) - # time.sleep(1) + time.sleep(1) with Then(f"{preprocessed_name} should be updated", description=f"timeout {timeout}"): started = time.time() @@ -183,11 +183,14 @@ def check_wrong_config(self, node, client, config_path, modify_file, log_error=" assert exitcode == 0, error() with When("I restart ClickHouse to apply the config changes"): + node.cmd("kdestroy") + # time.sleep(1) if output: node.restart(safe=False, wait_healthy=True) else: node.restart(safe=False, wait_healthy=False) + if output != "": with Then(f"check {output} is in output"): time.sleep(5) @@ -201,7 +204,7 @@ def check_wrong_config(self, node, client, config_path, modify_file, log_error=" break time.sleep(1) else: - assert False, error() + assert output in r.output, error() finally: with Finally("I restore original config"): @@ -223,3 +226,19 @@ def check_wrong_config(self, node, client, config_path, modify_file, log_error=" assert exitcode == 0, error() +@TestStep(Given) +def instrument_clickhouse_server_log(self, clickhouse_server_log="/var/log/clickhouse-server/clickhouse-server.log"): + """Instrument clickhouse-server.log for the current test + by adding start and end messages that include + current test name to the clickhouse-server.log of the specified node and + if the test fails then dump the messages from + the clickhouse-server.log for this test. + """ + all_nodes = self.context.ch_nodes + [self.context.krb_server] + + for node in all_nodes: + if node.name != "kerberos": + with When(f"output stats for {node.repr()}"): + node.command(f"echo -e \"\\n-- {current().name} -- top --\\n\" && top -bn1") + node.command(f"echo -e \"\\n-- {current().name} -- df --\\n\" && df -h") + node.command(f"echo -e \"\\n-- {current().name} -- free --\\n\" && free -mh") diff --git a/tests/testflows/kerberos/tests/config.py b/tests/testflows/kerberos/tests/config.py index 3f4bf15deb5..85af0b3214e 100644 --- a/tests/testflows/kerberos/tests/config.py +++ b/tests/testflows/kerberos/tests/config.py @@ -145,12 +145,8 @@ def multiple_principal(self): log_error="Multiple principal sections are not allowed") - - - - - @TestFeature +@Name("config") def config(self): """Perform ClickHouse Kerberos authentication testing for incorrect configuration files """ diff --git a/tests/testflows/kerberos/tests/generic.py b/tests/testflows/kerberos/tests/generic.py index 3276fd5ec5f..642b99b4fc3 100644 --- a/tests/testflows/kerberos/tests/generic.py +++ b/tests/testflows/kerberos/tests/generic.py @@ -3,8 +3,22 @@ from kerberos.tests.common import * from kerberos.requirements.requirements import * import time -import datetime -import itertools + + +@TestScenario +@Requirements( + RQ_SRS_016_Kerberos_Ping("1.0") +) +def ping(self): + """Containers should be reachable + """ + ch_nodes = self.context.ch_nodes + + for i in range(3): + with When(f"curl ch_{i} kerberos"): + r = ch_nodes[i].command(f"curl kerberos -c 1") + with Then(f"return code should be 0"): + assert r.exitcode == 7, error() @TestScenario @@ -84,8 +98,10 @@ def invalid_server_ticket(self): ch_nodes[2].cmd("kdestroy") while True: kinit_no_keytab(node=ch_nodes[2]) + create_server_principal(node=ch_nodes[0]) if ch_nodes[2].cmd(test_select_query(node=ch_nodes[0])).output == "kerberos_user": break + debug(test_select_query(node=ch_nodes[0])) ch_nodes[2].cmd("kdestroy") with And("I expect the user to be default"): @@ -97,8 +113,8 @@ def invalid_server_ticket(self): RQ_SRS_016_Kerberos_KerberosNotAvailable_InvalidClientTicket("1.0") ) def invalid_client_ticket(self): - """ClickHouse SHALL reject Kerberos authentication no Kerberos server is reachable - and client has no valid ticket (or the existing ticket is outdated). + """ClickHouse SHALL reject Kerberos authentication in case client has + no valid ticket (or the existing ticket is outdated). """ ch_nodes = self.context.ch_nodes @@ -108,8 +124,8 @@ def invalid_client_ticket(self): with And("setting up server principal"): create_server_principal(node=ch_nodes[0]) - with And("I kill kerberos-server"): - self.context.krb_server.stop() + # with And("I kill kerberos-server"): + # self.context.krb_server.stop() with And("I wait until client ticket is expired"): time.sleep(10) @@ -120,17 +136,18 @@ def invalid_client_ticket(self): with Then("I expect the user to be default"): assert r.output == "default", error() - with Finally("I start kerberos server again"): - self.context.krb_server.start() - ch_nodes[2].cmd("kdestroy") + with Finally(""): + # self.context.krb_server.start() + time.sleep(1) + ch_nodes[2].cmd(f"echo pwd | kinit -l 10:00 kerberos_user") while True: - kinit_no_keytab(node=ch_nodes[2]) + time.sleep(1) if ch_nodes[2].cmd(test_select_query(node=ch_nodes[0])).output == "kerberos_user": break ch_nodes[2].cmd("kdestroy") -@TestScenario +@TestCase @Requirements( RQ_SRS_016_Kerberos_KerberosNotAvailable_ValidTickets("1.0") ) @@ -316,9 +333,6 @@ def authentication_performance(self): ch_nodes[0].query("DROP USER pwd_user") - - - @TestFeature def generic(self): """Perform ClickHouse Kerberos authentication testing @@ -329,4 +343,4 @@ def generic(self): self.context.clients = [self.context.cluster.node(f"krb-client{i}") for i in range(1, 6)] for scenario in loads(current_module(), Scenario, Suite): - Scenario(run=scenario, flags=TE) + Scenario(run=scenario, flags=TE) #, setup=instrument_clickhouse_server_log) diff --git a/tests/testflows/rbac/helper/tables.py b/tests/testflows/rbac/helper/tables.py index 5d14bb34a83..ee6289bcbb5 100755 --- a/tests/testflows/rbac/helper/tables.py +++ b/tests/testflows/rbac/helper/tables.py @@ -3,39 +3,39 @@ from collections import namedtuple table_tuple = namedtuple("table_tuple", "create_statement cluster") table_types = { - "MergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8) ENGINE = MergeTree() PARTITION BY y ORDER BY d", None), - "ReplacingMergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8) ENGINE = ReplacingMergeTree() PARTITION BY y ORDER BY d", None), - "SummingMergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8 DEFAULT 1, x String, y Int8) ENGINE = SummingMergeTree() PARTITION BY y ORDER BY d", None), - "AggregatingMergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8) ENGINE = AggregatingMergeTree() PARTITION BY y ORDER BY d", None), - "CollapsingMergeTree": table_tuple("CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, sign Int8 DEFAULT 1) ENGINE = CollapsingMergeTree(sign) PARTITION BY y ORDER BY d", None), - "VersionedCollapsingMergeTree": table_tuple("CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, version UInt64, sign Int8 DEFAULT 1) ENGINE = VersionedCollapsingMergeTree(sign, version) PARTITION BY y ORDER BY d", None), - "GraphiteMergeTree": table_tuple("CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, Path String, Time DateTime, Value Float64, col UInt64, Timestamp Int64) ENGINE = GraphiteMergeTree('graphite_rollup_example') PARTITION BY y ORDER by d", None), + "MergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8) ENGINE = MergeTree() PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), + "ReplacingMergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8) ENGINE = ReplacingMergeTree() PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), + "SummingMergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8 DEFAULT 1, x String, y Int8) ENGINE = SummingMergeTree() PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), + "AggregatingMergeTree": table_tuple("CREATE TABLE {name} (d DATE, a String, b UInt8, x String, y Int8) ENGINE = AggregatingMergeTree() PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), + "CollapsingMergeTree": table_tuple("CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, sign Int8 DEFAULT 1) ENGINE = CollapsingMergeTree(sign) PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), + "VersionedCollapsingMergeTree": table_tuple("CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, version UInt64, sign Int8 DEFAULT 1) ENGINE = VersionedCollapsingMergeTree(sign, version) PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), + "GraphiteMergeTree": table_tuple("CREATE TABLE {name} (d Date, a String, b UInt8, x String, y Int8, Path String, Time DateTime, Value Float64, col UInt64, Timestamp Int64) ENGINE = GraphiteMergeTree('graphite_rollup_example') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", None), "ReplicatedMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d DATE, a String, b UInt8, x String, y Int8) \ - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d DATE, a String, b UInt8, x String, y Int8) \ - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "one_shard_cluster"), + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), "ReplicatedReplacingMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d DATE, a String, b UInt8, x String, y Int8) \ - ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedReplacingMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d DATE, a String, b UInt8, x String, y Int8) \ - ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "one_shard_cluster"), + ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), "ReplicatedSummingMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d DATE, a String, b UInt8 DEFAULT 1, x String, y Int8) \ - ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedSummingMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d DATE, a String, b UInt8 DEFAULT 1, x String, y Int8) \ - ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "one_shard_cluster"), + ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), "ReplicatedAggregatingMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d DATE, a String, b UInt8, x String, y Int8) \ - ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedAggregatingMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d DATE, a String, b UInt8, x String, y Int8) \ - ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY d", "one_shard_cluster"), + ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), "ReplicatedCollapsingMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d Date, a String, b UInt8, x String, y Int8, sign Int8 DEFAULT 1) \ - ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign) PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign) PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedCollapsingMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d Date, a String, b UInt8, x String, y Int8, sign Int8 DEFAULT 1) \ - ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign) PARTITION BY y ORDER BY d", "one_shard_cluster"), + ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign) PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), "ReplicatedVersionedCollapsingMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d Date, a String, b UInt8, x String, y Int8, version UInt64, sign Int8 DEFAULT 1) \ - ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign, version) PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign, version) PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedVersionedCollapsingMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d Date, a String, b UInt8, x String, y Int8, version UInt64, sign Int8 DEFAULT 1) \ - ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign, version) PARTITION BY y ORDER BY d", "one_shard_cluster"), + ENGINE = ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', sign, version) PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), "ReplicatedGraphiteMergeTree-sharded_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER sharded_cluster (d Date, a String, b UInt8, x String, y Int8, Path String, Time DateTime, Value Float64, col UInt64, Timestamp Int64) \ - ENGINE = ReplicatedGraphiteMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', 'graphite_rollup_example') PARTITION BY y ORDER BY d", "sharded_cluster"), + ENGINE = ReplicatedGraphiteMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', 'graphite_rollup_example') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "sharded_cluster"), "ReplicatedGraphiteMergeTree-one_shard_cluster": table_tuple("CREATE TABLE {name} ON CLUSTER one_shard_cluster (d Date, a String, b UInt8, x String, y Int8, Path String, Time DateTime, Value Float64, col UInt64, Timestamp Int64) \ - ENGINE = ReplicatedGraphiteMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', 'graphite_rollup_example') PARTITION BY y ORDER BY d", "one_shard_cluster"), -} \ No newline at end of file + ENGINE = ReplicatedGraphiteMergeTree('/clickhouse/tables/{{shard}}/{name}', '{{replica}}', 'graphite_rollup_example') PARTITION BY y ORDER BY (b, d) PRIMARY KEY b", "one_shard_cluster"), +} diff --git a/tests/testflows/rbac/tests/privileges/alter/alter_index.py b/tests/testflows/rbac/tests/privileges/alter/alter_index.py index 379abd52d8c..78f7134a8b7 100755 --- a/tests/testflows/rbac/tests/privileges/alter/alter_index.py +++ b/tests/testflows/rbac/tests/privileges/alter/alter_index.py @@ -128,10 +128,10 @@ def check_order_by_when_privilege_is_granted(table, user, node): column = "order" with Given("I run sanity check"): - node.query(f"ALTER TABLE {table} MODIFY ORDER BY d", settings = [("user", user)]) + node.query(f"ALTER TABLE {table} MODIFY ORDER BY b", settings = [("user", user)]) with And("I add new column and modify order using that column"): - node.query(f"ALTER TABLE {table} ADD COLUMN {column} UInt32, MODIFY ORDER BY (d, {column})") + node.query(f"ALTER TABLE {table} ADD COLUMN {column} UInt32, MODIFY ORDER BY (b, {column})") with When(f"I insert random data into the ordered-by column {column}"): data = random.sample(range(1,1000),100) @@ -151,7 +151,7 @@ def check_order_by_when_privilege_is_granted(table, user, node): with And("I verify that the sorting key is present in the table"): output = json.loads(node.query(f"SHOW CREATE TABLE {table} FORMAT JSONEachRow").output) - assert f"ORDER BY (d, {column})" in output['statement'], error() + assert f"ORDER BY (b, {column})" in output['statement'], error() with But(f"I cannot drop the required column {column}"): exitcode, message = errors.missing_columns(column) @@ -163,21 +163,13 @@ def check_sample_by_when_privilege_is_granted(table, user, node): """ column = 'sample' - with Given(f"I add new column {column}"): - node.query(f"ALTER TABLE {table} ADD COLUMN {column} UInt32") - with When(f"I add sample by clause"): - node.query(f"ALTER TABLE {table} MODIFY SAMPLE BY (d, {column})", + node.query(f"ALTER TABLE {table} MODIFY SAMPLE BY b", settings = [("user", user)]) with Then("I verify that the sample is in the table"): output = json.loads(node.query(f"SHOW CREATE TABLE {table} FORMAT JSONEachRow").output) - assert f"SAMPLE BY (d, {column})" in output['statement'], error() - - with But(f"I cannot drop the required column {column}"): - exitcode, message = errors.missing_columns(column) - node.query(f"ALTER TABLE {table} DROP COLUMN {column}", - exitcode=exitcode, message=message) + assert f"SAMPLE BY b" in output['statement'], error() def check_add_index_when_privilege_is_granted(table, user, node): """Ensures ADD INDEX runs as expected when the privilege is granted to the specified user @@ -258,7 +250,7 @@ def check_order_by_when_privilege_is_not_granted(table, user, node): """ with When("I try to use privilege that has not been granted"): exitcode, message = errors.not_enough_privileges(user) - node.query(f"ALTER TABLE {table} MODIFY ORDER BY d", + node.query(f"ALTER TABLE {table} MODIFY ORDER BY b", settings = [("user", user)], exitcode=exitcode, message=message) def check_sample_by_when_privilege_is_not_granted(table, user, node): @@ -266,7 +258,7 @@ def check_sample_by_when_privilege_is_not_granted(table, user, node): """ with When("I try to use privilege that has not been granted"): exitcode, message = errors.not_enough_privileges(user) - node.query(f"ALTER TABLE {table} MODIFY SAMPLE BY d", + node.query(f"ALTER TABLE {table} MODIFY SAMPLE BY b", settings = [("user", user)], exitcode=exitcode, message=message) def check_add_index_when_privilege_is_not_granted(table, user, node): diff --git a/tests/testflows/regression.py b/tests/testflows/regression.py index 8932e6bcf8f..bcdde31e619 100755 --- a/tests/testflows/regression.py +++ b/tests/testflows/regression.py @@ -23,13 +23,13 @@ def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): with Pool(8) as pool: try: run_scenario(pool, tasks, Feature(test=load("example.regression", "regression")), args) - #run_scenario(pool, tasks, Feature(test=load("ldap.regression", "regression")), args) - run_scenario(pool, tasks, Feature(test=load("rbac.regression", "regression")), args) + # run_scenario(pool, tasks, Feature(test=load("ldap.regression", "regression")), args) + # run_scenario(pool, tasks, Feature(test=load("rbac.regression", "regression")), args) run_scenario(pool, tasks, Feature(test=load("aes_encryption.regression", "regression")), args) run_scenario(pool, tasks, Feature(test=load("map_type.regression", "regression")), args) run_scenario(pool, tasks, Feature(test=load("window_functions.regression", "regression")), args) run_scenario(pool, tasks, Feature(test=load("datetime64_extended_range.regression", "regression")), args) - #run_scenario(pool, tasks, Feature(test=load("kerberos.regression", "regression")), args) + run_scenario(pool, tasks, Feature(test=load("kerberos.regression", "regression")), args) run_scenario(pool, tasks, Feature(test=load("extended_precision_data_types.regression", "regression")), args) finally: join(tasks) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index afa6b9c8a25..a352102352d 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,4 +1,7 @@ +v21.7.4.18-stable 2021-07-17 +v21.7.3.14-stable 2021-07-13 v21.7.2.7-stable 2021-07-09 +v21.6.8.62-stable 2021-07-13 v21.6.7.57-stable 2021-07-09 v21.6.6.51-stable 2021-07-02 v21.6.5.37-stable 2021-06-19