diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 50f4f503f5d..c602a46d23c 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -272,7 +272,4 @@ jobs: cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - echo "::group::Workflow results" - python3 -m json.tool "$WORKFLOW_RESULT_FILE" - echo "::endgroup::" python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index b28d87ee31f..7c319da6045 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -138,7 +138,4 @@ jobs: cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - echo "::group::Workflow results" - python3 -m json.tool "$WORKFLOW_RESULT_FILE" - echo "::endgroup::" python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index db89825a99a..4b186241a0e 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -111,7 +111,4 @@ jobs: cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - echo "::group::Workflow results" - python3 -m json.tool "$WORKFLOW_RESULT_FILE" - echo "::endgroup::" python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index fd5b5eefcc4..84db3338065 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -57,7 +57,4 @@ jobs: cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - echo "::group::Workflow results" - python3 -m json.tool "$WORKFLOW_RESULT_FILE" - echo "::endgroup::" python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 9930cf6dde4..c7d7b28af38 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -171,9 +171,6 @@ jobs: cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - echo "::group::Workflow results" - python3 -m json.tool "$WORKFLOW_RESULT_FILE" - echo "::endgroup::" python3 ./tests/ci/ci_buddy.py --check-wf-status ################################# Stage Final ################################# diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 50565112825..bca9ff33cd0 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -492,7 +492,5 @@ jobs: cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - echo "::group::Workflow results" - python3 -m json.tool "$WORKFLOW_RESULT_FILE" - echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index bb776fa9506..d69646d3694 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54488) +SET(VERSION_REVISION 54489) SET(VERSION_MAJOR 24) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9) -SET(VERSION_DESCRIBE v24.7.1.1-testing) -SET(VERSION_STRING 24.7.1.1) +SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) +SET(VERSION_DESCRIBE v24.8.1.1-testing) +SET(VERSION_STRING 24.8.1.1) # end of autochange diff --git a/contrib/datasketches-cpp-cmake/CMakeLists.txt b/contrib/datasketches-cpp-cmake/CMakeLists.txt index b12a88ad57b..497d6956d0e 100644 --- a/contrib/datasketches-cpp-cmake/CMakeLists.txt +++ b/contrib/datasketches-cpp-cmake/CMakeLists.txt @@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta) add_library(_datasketches INTERFACE) target_include_directories(_datasketches SYSTEM BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include" + "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include" "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include") add_library(ch_contrib::datasketches ALIAS _datasketches) diff --git a/contrib/libunwind b/contrib/libunwind index 8f28e64d158..fe854449e24 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2 +Subproject commit fe854449e24bedfa26e38465b84374312dbd587f diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 82fc2598397..3b919ffb3e3 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update --yes \ - && env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \ + && env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 857385f4715..df85c047649 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -191,8 +191,8 @@ else ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" else @@ -200,7 +200,7 @@ else clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 86467394513..b21114e456f 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'" -clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" -clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" -clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" +clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" +clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" +clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 3826e4e9c94..7ffbd9a5bae 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. +- `count_min` + + [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. + ## Column-level Settings {#column-level-settings} Certain MergeTree settings can be override at column level: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9dcee2eb7b2..9934e71ece7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx) endif() if (TARGET ch_contrib::datasketches) - target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches) + dbms_target_link_libraries(PUBLIC ch_contrib::datasketches) endif () target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 16d5d1cccde..28bb861865a 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 5454cba8e2e..7b8b5036af0 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -57,7 +57,8 @@ static struct InitFiu PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \ PAUSEABLE(dummy_pausable_failpoint) \ ONCE(execute_query_calling_empty_set_result_func_on_exception) \ - ONCE(receive_timeout_on_table_status_response) + ONCE(receive_timeout_on_table_status_response) \ + REGULAR(keepermap_fail_drop_data) \ namespace FailPoints diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index 75d0e7c32d8..84ccffb8e90 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -10,6 +10,8 @@ #include #include +#include + #include #include #include @@ -33,41 +35,41 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -struct ICgroupsReader -{ - virtual ~ICgroupsReader() = default; - - virtual uint64_t readMemoryUsage() = 0; -}; - +#if defined(OS_LINUX) namespace { -#if defined(OS_LINUX) +using Metrics = std::map; + /// Format is /// kernel 5 /// rss 15 /// [...] -uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf) { + Metrics metrics; while (!buf.eof()) { std::string current_key; readStringUntilWhitespace(current_key, buf); - if (current_key != key) - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } assertChar(' ', buf); + uint64_t value = 0; readIntText(value, buf); - return value; - } + assertChar('\n', buf); + auto [_, inserted] = metrics.emplace(std::move(current_key), value); + chassert(inserted, "Duplicate keys in stat file"); + } + return metrics; +} + +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + const auto all_metrics = readAllMetricsFromStatFile(buf); + if (const auto it = all_metrics.find(key); it != all_metrics.end()) + return it->second; LOG_ERROR(getLogger("CgroupsReader"), "Cannot find '{}' in '{}'", key, buf.getFileName()); return 0; } @@ -83,6 +85,13 @@ struct CgroupsV1Reader : ICgroupsReader return readMetricFromStatFile(buf, "rss"); } + std::string dumpAllStats() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return fmt::format("{}", readAllMetricsFromStatFile(buf)); + } + private: std::mutex mutex; ReadBufferFromFile buf TSA_GUARDED_BY(mutex); @@ -99,6 +108,13 @@ struct CgroupsV2Reader : ICgroupsReader return readMetricFromStatFile(stat_buf, "anon"); } + std::string dumpAllStats() override + { + std::lock_guard lock(mutex); + stat_buf.rewind(); + return fmt::format("{}", readAllMetricsFromStatFile(stat_buf)); + } + private: std::mutex mutex; ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); @@ -147,34 +163,23 @@ std::optional getCgroupsV1Path() return {default_cgroups_mount / "memory"}; } -enum class CgroupsVersion : uint8_t -{ - V1, - V2 -}; - -std::pair getCgroupsPath() +std::pair getCgroupsPath() { auto v2_path = getCgroupsV2Path(); if (v2_path.has_value()) - return {*v2_path, CgroupsVersion::V2}; + return {*v2_path, ICgroupsReader::CgroupsVersion::V2}; auto v1_path = getCgroupsV1Path(); if (v1_path.has_value()) - return {*v1_path, CgroupsVersion::V1}; + return {*v1_path, ICgroupsReader::CgroupsVersion::V1}; throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); } -std::shared_ptr createCgroupsReader() -{ - const auto [cgroup_path, version] = getCgroupsPath(); - LOG_INFO( - getLogger("CgroupsReader"), - "Will create cgroup reader from '{}' (cgroups version: {})", - cgroup_path, - (version == CgroupsVersion::V1) ? "v1" : "v2"); +} +std::shared_ptr ICgroupsReader::createCgroupsReader(ICgroupsReader::CgroupsVersion version, const std::filesystem::path & cgroup_path) +{ if (version == CgroupsVersion::V2) return std::make_shared(cgroup_path); else @@ -182,10 +187,12 @@ std::shared_ptr createCgroupsReader() chassert(version == CgroupsVersion::V1); return std::make_shared(cgroup_path); } - } #endif +namespace +{ + std::string_view sourceToString(MemoryWorker::MemoryUsageSource source) { switch (source) @@ -212,7 +219,14 @@ MemoryWorker::MemoryWorker(uint64_t period_ms_) { static constexpr uint64_t cgroups_memory_usage_tick_ms{50}; - cgroups_reader = createCgroupsReader(); + const auto [cgroup_path, version] = getCgroupsPath(); + LOG_INFO( + getLogger("CgroupsReader"), + "Will create cgroup reader from '{}' (cgroups version: {})", + cgroup_path, + (version == ICgroupsReader::CgroupsVersion::V1) ? "v1" : "v2"); + + cgroups_reader = ICgroupsReader::createCgroupsReader(version, cgroup_path); source = MemoryUsageSource::Cgroups; if (period_ms == 0) period_ms = cgroups_memory_usage_tick_ms; @@ -284,7 +298,7 @@ uint64_t MemoryWorker::getMemoryUsage() void MemoryWorker::backgroundThread() { std::chrono::milliseconds chrono_period_ms{period_ms}; - [[maybe_unused]] bool first_run = true; + bool first_run = true; std::unique_lock lock(mutex); while (true) { diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h index b1b0495bf14..f4b0fed23ec 100644 --- a/src/Common/MemoryWorker.h +++ b/src/Common/MemoryWorker.h @@ -7,14 +7,31 @@ namespace DB { -struct ICgroupsReader; +struct ICgroupsReader +{ + enum class CgroupsVersion : uint8_t + { + V1, + V2 + }; -/// Correct MemoryTracker based on stats.resident read from jemalloc. -/// This requires jemalloc built with --enable-stats which we use. -/// The worker spawns a background thread which moves the jemalloc epoch (updates internal stats), -/// and fetches the current stats.resident whose value is sent to global MemoryTracker. -/// Additionally, if the current memory usage is higher than global hard limit, -/// jemalloc's dirty pages are forcefully purged. +#if defined(OS_LINUX) + static std::shared_ptr + createCgroupsReader(ICgroupsReader::CgroupsVersion version, const std::filesystem::path & cgroup_path); +#endif + + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; + + virtual std::string dumpAllStats() = 0; +}; + + +/// Correct MemoryTracker based on external information (e.g. Cgroups or stats.resident from jemalloc) +/// The worker spawns a background thread which periodically reads current resident memory from the source, +/// whose value is sent to global MemoryTracker. +/// It can do additional things like purging jemalloc dirty pages if the current memory usage is higher than global hard limit. class MemoryWorker { public: diff --git a/src/Common/tests/gtest_cgroups_reader.cpp b/src/Common/tests/gtest_cgroups_reader.cpp new file mode 100644 index 00000000000..e24b91a59b8 --- /dev/null +++ b/src/Common/tests/gtest_cgroups_reader.cpp @@ -0,0 +1,178 @@ +#if defined(OS_LINUX) + +#include + +#include +#include + +#include +#include +#include + +using namespace DB; + + +const std::string SAMPLE_FILE[2] = { + R"(cache 4673703936 +rss 2232029184 +rss_huge 0 +shmem 0 +mapped_file 344678400 +dirty 4730880 +writeback 135168 +swap 0 +pgpgin 2038569918 +pgpgout 2036883790 +pgfault 2055373287 +pgmajfault 0 +inactive_anon 2156335104 +active_anon 0 +inactive_file 2841305088 +active_file 1653915648 +unevictable 256008192 +hierarchical_memory_limit 8589934592 +hierarchical_memsw_limit 8589934592 +total_cache 4673703936 +total_rss 2232029184 +total_rss_huge 0 +total_shmem 0 +total_mapped_file 344678400 +total_dirty 4730880 +total_writeback 135168 +total_swap 0 +total_pgpgin 2038569918 +total_pgpgout 2036883790 +total_pgfault 2055373287 +total_pgmajfault 0 +total_inactive_anon 2156335104 +total_active_anon 0 +total_inactive_file 2841305088 +total_active_file 1653915648 +total_unevictable 256008192 +)", + R"(anon 10429399040 +file 17410793472 +kernel 1537789952 +kernel_stack 3833856 +pagetables 65441792 +sec_pagetables 0 +percpu 15232 +sock 0 +vmalloc 0 +shmem 0 +zswap 0 +zswapped 0 +file_mapped 344010752 +file_dirty 2060857344 +file_writeback 0 +swapcached 0 +anon_thp 0 +file_thp 0 +shmem_thp 0 +inactive_anon 0 +active_anon 10429370368 +inactive_file 8693084160 +active_file 8717561856 +unevictable 0 +slab_reclaimable 1460982504 +slab_unreclaimable 5152864 +slab 1466135368 +workingset_refault_anon 0 +workingset_refault_file 0 +workingset_activate_anon 0 +workingset_activate_file 0 +workingset_restore_anon 0 +workingset_restore_file 0 +workingset_nodereclaim 0 +pgscan 0 +pgsteal 0 +pgscan_kswapd 0 +pgscan_direct 0 +pgscan_khugepaged 0 +pgsteal_kswapd 0 +pgsteal_direct 0 +pgsteal_khugepaged 0 +pgfault 43026352 +pgmajfault 36762 +pgrefill 0 +pgactivate 0 +pgdeactivate 0 +pglazyfree 259 +pglazyfreed 0 +zswpin 0 +zswpout 0 +thp_fault_alloc 0 +thp_collapse_alloc 0 +)"}; + +const std::string EXPECTED[2] + = {"{\"active_anon\": 0, \"active_file\": 1653915648, \"cache\": 4673703936, \"dirty\": 4730880, \"hierarchical_memory_limit\": " + "8589934592, \"hierarchical_memsw_limit\": 8589934592, \"inactive_anon\": 2156335104, \"inactive_file\": 2841305088, " + "\"mapped_file\": 344678400, \"pgfault\": 2055373287, \"pgmajfault\": 0, \"pgpgin\": 2038569918, \"pgpgout\": 2036883790, \"rss\": " + "2232029184, \"rss_huge\": 0, \"shmem\": 0, \"swap\": 0, \"total_active_anon\": 0, \"total_active_file\": 1653915648, " + "\"total_cache\": 4673703936, \"total_dirty\": 4730880, \"total_inactive_anon\": 2156335104, \"total_inactive_file\": 2841305088, " + "\"total_mapped_file\": 344678400, \"total_pgfault\": 2055373287, \"total_pgmajfault\": 0, \"total_pgpgin\": 2038569918, " + "\"total_pgpgout\": 2036883790, \"total_rss\": 2232029184, \"total_rss_huge\": 0, \"total_shmem\": 0, \"total_swap\": 0, " + "\"total_unevictable\": 256008192, \"total_writeback\": 135168, \"unevictable\": 256008192, \"writeback\": 135168}", + "{\"active_anon\": 10429370368, \"active_file\": 8717561856, \"anon\": 10429399040, \"anon_thp\": 0, \"file\": 17410793472, " + "\"file_dirty\": 2060857344, \"file_mapped\": 344010752, \"file_thp\": 0, \"file_writeback\": 0, \"inactive_anon\": 0, " + "\"inactive_file\": 8693084160, \"kernel\": 1537789952, \"kernel_stack\": 3833856, \"pagetables\": 65441792, \"percpu\": 15232, " + "\"pgactivate\": 0, \"pgdeactivate\": 0, \"pgfault\": 43026352, \"pglazyfree\": 259, \"pglazyfreed\": 0, \"pgmajfault\": 36762, " + "\"pgrefill\": 0, \"pgscan\": 0, \"pgscan_direct\": 0, \"pgscan_khugepaged\": 0, \"pgscan_kswapd\": 0, \"pgsteal\": 0, " + "\"pgsteal_direct\": 0, \"pgsteal_khugepaged\": 0, \"pgsteal_kswapd\": 0, \"sec_pagetables\": 0, \"shmem\": 0, \"shmem_thp\": 0, " + "\"slab\": 1466135368, \"slab_reclaimable\": 1460982504, \"slab_unreclaimable\": 5152864, \"sock\": 0, \"swapcached\": 0, " + "\"thp_collapse_alloc\": 0, \"thp_fault_alloc\": 0, \"unevictable\": 0, \"vmalloc\": 0, \"workingset_activate_anon\": 0, " + "\"workingset_activate_file\": 0, \"workingset_nodereclaim\": 0, \"workingset_refault_anon\": 0, \"workingset_refault_file\": 0, " + "\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"}; + + +class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam +{ + void SetUp() override + { + const uint8_t version = static_cast(GetParam()); + tmp_dir = fmt::format("./test_cgroups_{}", magic_enum::enum_name(GetParam())); + fs::create_directories(tmp_dir); + + auto stat_file = WriteBufferFromFile(tmp_dir + "/memory.stat"); + stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size()); + stat_file.sync(); + + if (GetParam() == ICgroupsReader::CgroupsVersion::V2) + { + auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current"); + current_file.write("29645422592", 11); + current_file.sync(); + } + } + +protected: + std::string tmp_dir; +}; + + +TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest) +{ + const auto version = GetParam(); + auto reader = ICgroupsReader::createCgroupsReader(version, tmp_dir); + ASSERT_EQ( + reader->readMemoryUsage(), + version == ICgroupsReader::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184 + : /* anon from memory.stat */ 10429399040); +} + + +TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest) +{ + const auto version = GetParam(); + auto reader = ICgroupsReader::createCgroupsReader(version, tmp_dir); + ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast(version)]); +} + + +INSTANTIATE_TEST_SUITE_P( + CgroupsMemoryUsageObserverTests, + CgroupsMemoryUsageObserverFixture, + ::testing::Values(ICgroupsReader::CgroupsVersion::V1, ICgroupsReader::CgroupsVersion::V2)); + +#endif diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 184c263dbdb..1a40b780e9a 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID catch (Exception & e) { if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get(), type.getName()); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get(), type.getName()); e.addMessage(fmt::format("while converting '{}' to {}", src.get(), type.getName())); throw; diff --git a/src/Interpreters/tests/gtest_convertFieldToType.cpp b/src/Interpreters/tests/gtest_convertFieldToType.cpp index c8a9d5aa2c0..0cac9a3b59d 100644 --- a/src/Interpreters/tests/gtest_convertFieldToType.cpp +++ b/src/Interpreters/tests/gtest_convertFieldToType.cpp @@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P( DecimalField(DateTime64(123 * Day * 1'000'000), 6) } }) - ); +); INSTANTIATE_TEST_SUITE_P( DateTimeToDateTime64, @@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P( }, }) ); + +INSTANTIATE_TEST_SUITE_P( + StringToNumber, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "String", + Field("1"), + "Int8", + Field(1) + }, + { + "String", + Field("256"), + "Int8", + Field() + }, + { + "String", + Field("not a number"), + "Int8", + {} + }, + { + "String", + Field("1.1"), + "Int8", + {} /// we can not convert '1.1' to Int8 + }, + { + "String", + Field("1.1"), + "Float64", + Field(1.1) + }, + }) +); + +INSTANTIATE_TEST_SUITE_P( + NumberToString, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "Int8", + Field(1), + "String", + Field("1") + }, + { + "Int8", + Field(-1), + "String", + Field("-1") + }, + { + "Float64", + Field(1.1), + "String", + Field("1.1") + }, + }) +); + +INSTANTIATE_TEST_SUITE_P( + StringToDate, + ConvertFieldToTypeTest, + ::testing::ValuesIn(std::initializer_list{ + { + "String", + Field("2024-07-12"), + "Date", + Field(static_cast(19916)) + }, + { + "String", + Field("not a date"), + "Date", + {} + }, + }) +); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index b04ec1c22b2..230d4c778e8 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -329,19 +329,23 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format const auto * literal = arguments->children[0]->as(); const auto * function = arguments->children[0]->as(); + const auto * subquery = arguments->children[0]->as(); bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple; - // do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` + /// Do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))` bool literal_need_parens = literal && !is_tuple; - // negate always requires parentheses, otherwise -(-1) will be printed as --1 - bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate")); + /// Negate always requires parentheses, otherwise -(-1) will be printed as --1 + /// Also extra parentheses are needed for subqueries, because NOT can be parsed as a function: + /// not(SELECT 1) cannot be parsed, while not((SELECT 1)) can. + bool inside_parens = (name == "negate" && (literal_need_parens || (function && function->name == "negate"))) + || (subquery && name == "not"); /// We DO need parentheses around a single literal /// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since /// this is equal to SELECT NOT (0 + NOT 0) bool outside_parens = frame.need_parens && !inside_parens; - // do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) + /// Do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1))) if (inside_parens) nested_need_parens.need_parens = false; diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 4e3f29bfe0c..0209e785bff 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -9,7 +9,7 @@ namespace DB { -/** The SELECT subquery is in parenthesis. +/** The SELECT subquery, in parentheses. */ class ParserSubquery : public IParserBase { diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp index 92c0cfacd9b..22bbfdb03e1 100644 --- a/src/Parsers/ParserDescribeTableQuery.cpp +++ b/src/Parsers/ParserDescribeTableQuery.cpp @@ -11,15 +11,12 @@ namespace DB { - bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_describe(Keyword::DESCRIBE); ParserKeyword s_desc(Keyword::DESC); ParserKeyword s_table(Keyword::TABLE); ParserKeyword s_settings(Keyword::SETTINGS); - ParserToken s_dot(TokenType::Dot); - ParserIdentifier name_p; ParserSetQuery parser_settings(true); ASTPtr database; @@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return true; } - } diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 13394690227..aa8f51d5295 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -304,7 +304,7 @@ void RefreshTask::refreshTask() { PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); auto text = message.text; - message.text = fmt::format("Refresh failed: {}", message.text); + message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text); LOG_ERROR(log, message); exception = text; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 78a551591a6..4a4c1687a0d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP { auto stats = part->loadStatistics(); /// TODO: We only have one stats file for every part. + result.addRows(part->rows_count); for (const auto & stat : stats) - result.merge(part->info.getPartNameV1(), part->rows_count, stat); + result.merge(part->info.getPartNameV1(), stat); } catch (...) { @@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP if (!partition_pruner.canBePruned(*part)) { auto stats = part->loadStatistics(); + result.addRows(part->rows_count); for (const auto & stat : stats) - result.merge(part->info.getPartNameV1(), part->rows_count, stat); + result.merge(part->info.getPartNameV1(), stat); } } catch (...) @@ -1144,7 +1146,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); if (!filter_dag) return {}; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index b11cbf1e034..284d47ef9e7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -44,10 +44,12 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( const String & index_name_, const Block & index_sample_block_, size_t max_rows_, - MutableColumns && mutable_columns_) + MutableColumns && mutable_columns_, + std::vector && set_hyperrectangle_) : index_name(index_name_) , max_rows(max_rows_) , block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_))) + , set_hyperrectangle(std::move(set_hyperrectangle_)) { } @@ -106,6 +108,10 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; settings.position_independent_encoding = false; + set_hyperrectangle.clear(); + Field min_val; + Field max_val; + for (size_t i = 0; i < num_columns; ++i) { auto & elem = block.getByPosition(i); @@ -116,6 +122,13 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr); serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); + + if (const auto * column_nullable = typeid_cast(elem.column.get())) + column_nullable->getExtremesNullLast(min_val, max_val); + else + elem.column->getExtremes(min_val, max_val); + + set_hyperrectangle.emplace_back(min_val, true, max_val, true); } } @@ -182,10 +195,29 @@ void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size if (has_new_data) { + FieldRef field_min; + FieldRef field_max; for (size_t i = 0; i < columns.size(); ++i) { auto filtered_column = block.getByName(index_columns[i]).column->filter(filter, block.rows()); columns[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size()); + + if (const auto * column_nullable = typeid_cast(filtered_column.get())) + column_nullable->getExtremesNullLast(field_min, field_max); + else + filtered_column->getExtremes(field_min, field_max); + + if (set_hyperrectangle.size() <= i) + { + set_hyperrectangle.emplace_back(field_min, true, field_max, true); + } + else + { + set_hyperrectangle[i].left + = applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].left, field_min) ? set_hyperrectangle[i].left : field_min; + set_hyperrectangle[i].right + = applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].right, field_max) ? field_max : set_hyperrectangle[i].right; + } } } @@ -221,7 +253,7 @@ bool MergeTreeIndexAggregatorSet::buildFilter( MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() { - auto granule = std::make_shared(index_name, index_sample_block, max_rows, std::move(columns)); + auto granule = std::make_shared(index_name, index_sample_block, max_rows, std::move(columns), std::move(set_hyperrectangle)); switch (data.type) { @@ -240,17 +272,22 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() return granule; } +KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context) +{ + return KeyCondition{filter_actions_dag, context, index.column_names, index.expression}; +} MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( - const String & index_name_, - const Block & index_sample_block, size_t max_rows_, const ActionsDAGPtr & filter_dag, - ContextPtr context) - : index_name(index_name_) + ContextPtr context, + const IndexDescription & index_description) + : index_name(index_description.name) , max_rows(max_rows_) + , index_data_types(index_description.data_types) + , condition(buildCondition(index_description, filter_dag, context)) { - for (const auto & name : index_sample_block.getNames()) + for (const auto & name : index_description.sample_block.getNames()) if (!key_columns.contains(name)) key_columns.insert(name); @@ -293,6 +330,9 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (size == 0 || (max_rows != 0 && size > max_rows)) return true; + if (!condition.checkInHyperrectangle(granule.set_hyperrectangle, index_data_types).can_be_true) + return false; + Block result = granule.block; actions->execute(result); @@ -546,7 +586,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const { - return std::make_shared(index.name, index.sample_block, max_rows, filter_actions_dag, context); + return std::make_shared(max_rows, filter_actions_dag, context, index); } MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 6efc2effafd..168262360fc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -22,7 +22,8 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const String & index_name_, const Block & index_sample_block_, size_t max_rows_, - MutableColumns && columns_); + MutableColumns && columns_, + std::vector && set_hyperrectangle_); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -36,6 +37,7 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const size_t max_rows; Block block; + std::vector set_hyperrectangle; }; @@ -73,6 +75,7 @@ private: ClearableSetVariants data; Sizes key_sizes; MutableColumns columns; + std::vector set_hyperrectangle; }; @@ -80,11 +83,10 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition { public: MergeTreeIndexConditionSet( - const String & index_name_, - const Block & index_sample_block, size_t max_rows_, const ActionsDAGPtr & filter_dag, - ContextPtr context); + ContextPtr context, + const IndexDescription & index_description); bool alwaysUnknownOrTrue() const override; @@ -119,6 +121,9 @@ private: std::unordered_set key_columns; ExpressionActionsPtr actions; String actions_output_column_name; + + DataTypes index_data_types; + KeyCondition condition; }; diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp index 757136fdf42..57dff958b9a 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp @@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par part_statistics[part_name] = stats; } -Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const { if (part_statistics.empty()) return default_normal_cond_factor * rows; @@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess( return result * rows / part_rows; } -Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const { return rows - estimateLess(val, rows); } -Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const +Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const { if (part_statistics.empty()) { - if (val < - threshold || val > threshold) + auto float_val = StatisticsUtils::tryConvertToFloat64(val); + if (!float_val) + return default_unknown_cond_factor * rows; + else if (float_val.value() < - threshold || float_val.value() > threshold) return default_normal_cond_factor * rows; else return default_good_cond_factor * rows; @@ -87,7 +90,7 @@ static std::pair tryToExtractSingleColumn(const RPNBuilderTreeNod return result; } -std::pair ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const +std::pair ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const { if (!node.isFunction()) return {}; @@ -123,48 +126,35 @@ std::pair ConditionSelectivityEstimator::extractBinaryOp(const DataTypePtr output_type; if (!constant_node->tryGetConstant(output_value, output_type)) return {}; - - const auto type = output_value.getType(); - Float64 value; - if (type == Field::Types::Int64) - value = output_value.get(); - else if (type == Field::Types::UInt64) - value = output_value.get(); - else if (type == Field::Types::Float64) - value = output_value.get(); - else - return {}; - return std::make_pair(function_name, value); + return std::make_pair(function_name, output_value); } Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const { auto result = tryToExtractSingleColumn(node); if (result.second != 1) - { - return default_unknown_cond_factor; - } + return default_unknown_cond_factor * total_rows; + String col = result.first; auto it = column_estimators.find(col); /// If there the estimator of the column is not found or there are no data at all, /// we use dummy estimation. - bool dummy = total_rows == 0; + bool dummy = false; ColumnSelectivityEstimator estimator; if (it != column_estimators.end()) - { estimator = it->second; - } else - { dummy = true; - } + auto [op, val] = extractBinaryOp(node, col); + if (op == "equals") { if (dummy) { - if (val < - threshold || val > threshold) + auto float_val = StatisticsUtils::tryConvertToFloat64(val); + if (!float_val || (float_val < - threshold || float_val > threshold)) return default_normal_cond_factor * total_rows; else return default_good_cond_factor * total_rows; @@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode return default_unknown_cond_factor * total_rows; } -void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat) +void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat) { - if (!part_names.contains(part_name)) - { - total_rows += part_rows; - part_names.insert(part_name); - } if (column_stat != nullptr) column_estimators[column_stat->columnName()].merge(part_name, column_stat); } diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.h b/src/Storages/Statistics/ConditionSelectivityEstimator.h index f0599742276..ce7fdd12e92 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.h +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -10,6 +11,14 @@ class RPNBuilderTreeNode; /// It estimates the selectivity of a condition. class ConditionSelectivityEstimator { +public: + /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ... + /// Right now we only support simple condition like col = val / col < val + Float64 estimateRowCount(const RPNBuilderTreeNode & node) const; + + void merge(String part_name, ColumnStatisticsPtr column_stat); + void addRows(UInt64 part_rows) { total_rows += part_rows; } + private: friend class ColumnStatistics; struct ColumnSelectivityEstimator @@ -20,13 +29,15 @@ private: void merge(String part_name, ColumnStatisticsPtr stats); - Float64 estimateLess(Float64 val, Float64 rows) const; + Float64 estimateLess(const Field & val, Float64 rows) const; - Float64 estimateGreater(Float64 val, Float64 rows) const; + Float64 estimateGreater(const Field & val, Float64 rows) const; - Float64 estimateEqual(Float64 val, Float64 rows) const; + Float64 estimateEqual(const Field & val, Float64 rows) const; }; + std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; + static constexpr auto default_good_cond_factor = 0.1; static constexpr auto default_normal_cond_factor = 0.5; static constexpr auto default_unknown_cond_factor = 1.0; @@ -35,16 +46,7 @@ private: static constexpr auto threshold = 2; UInt64 total_rows = 0; - std::set part_names; std::map column_estimators; - std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; - -public: - /// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ... - /// Right now we only support simple condition like col = val / col < val - Float64 estimateRowCount(const RPNBuilderTreeNode & node) const; - - void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat); }; } diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 28e75c6d244..ade3326288a 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,15 +1,18 @@ #include +#include +#include +#include #include +#include #include #include #include -#include -#include -#include #include #include +#include "config.h" /// USE_DATASKETCHES + namespace DB { @@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; +std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) +{ + switch (field.getType()) + { + case Field::Types::Int64: + return field.get(); + case Field::Types::UInt64: + return field.get(); + case Field::Types::Float64: + return field.get(); + case Field::Types::Int128: + return field.get(); + case Field::Types::UInt128: + return field.get(); + case Field::Types::Int256: + return field.get(); + case Field::Types::UInt256: + return field.get(); + default: + return {}; + } +} + +std::optional StatisticsUtils::tryConvertToString(const DB::Field & field) +{ + if (field.getType() == Field::Types::String) + return field.get(); + return {}; +} + IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) { @@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics"); } -Float64 IStatistics::estimateEqual(Float64 /*val*/) const +Float64 IStatistics::estimateEqual(const Field & /*val*/) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics"); } -Float64 IStatistics::estimateLess(Float64 /*val*/) const +Float64 IStatistics::estimateLess(const Field & /*val*/) const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); } @@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const /// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics /// object that supports it natively. -Float64 ColumnStatistics::estimateLess(Float64 val) const +Float64 ColumnStatistics::estimateLess(const Field & val) const { if (stats.contains(StatisticsType::TDigest)) return stats.at(StatisticsType::TDigest)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } -Float64 ColumnStatistics::estimateGreater(Float64 val) const +Float64 ColumnStatistics::estimateGreater(const Field & val) const { return rows - estimateLess(val); } -Float64 ColumnStatistics::estimateEqual(Float64 val) const +Float64 ColumnStatistics::estimateEqual(const Field & val) const { - if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) + auto float_val = StatisticsUtils::tryConvertToFloat64(val); + if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) return stats.at(StatisticsType::TDigest)->estimateEqual(val); } - if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold) +#if USE_DATASKETCHES + if (stats.contains(StatisticsType::CountMinSketch)) + return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val); +#endif + if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold)) return rows * ConditionSelectivityEstimator::default_normal_cond_factor; else return rows * ConditionSelectivityEstimator::default_good_cond_factor; @@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerValidator(StatisticsType::TDigest, TDigestValidator); - registerCreator(StatisticsType::TDigest, TDigestCreator); + registerValidator(StatisticsType::TDigest, tdigestValidator); + registerCreator(StatisticsType::TDigest, tdigestCreator); - registerValidator(StatisticsType::Uniq, UniqValidator); - registerCreator(StatisticsType::Uniq, UniqCreator); + registerValidator(StatisticsType::Uniq, uniqValidator); + registerCreator(StatisticsType::Uniq, uniqCreator); + +#if USE_DATASKETCHES + registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator); + registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator); +#endif } MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance() @@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index d4364075d1c..16f0c67eabd 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -13,6 +14,14 @@ namespace DB constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; + +struct StatisticsUtils +{ + /// Returns std::nullopt if input Field cannot be converted to a concrete value + static std::optional tryConvertToFloat64(const Field & field); + static std::optional tryConvertToString(const Field & field); +}; + /// Statistics describe properties of the values in the column, /// e.g. how many unique values exist, /// what are the N most frequent values, @@ -34,8 +43,8 @@ public: /// Per-value estimations. /// Throws if the statistics object is not able to do a meaningful estimation. - virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column - virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column + virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column + virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column protected: SingleStatisticsDescription stat; @@ -58,9 +67,9 @@ public: void update(const ColumnPtr & column); - Float64 estimateLess(Float64 val) const; - Float64 estimateGreater(Float64 val) const; - Float64 estimateEqual(Float64 val) const; + Float64 estimateLess(const Field & val) const; + Float64 estimateGreater(const Field & val) const; + Float64 estimateEqual(const Field & val) const; private: friend class MergeTreeStatisticsFactory; diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp new file mode 100644 index 00000000000..e69bbc1515b --- /dev/null +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -0,0 +1,102 @@ + +#include +#include +#include +#include +#include +#include + +#if USE_DATASKETCHES + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int ILLEGAL_STATISTICS; +} + +/// Constants chosen based on rolling dices. +/// The values provides: +/// 1. an error tolerance of 0.1% (ε = 0.001) +/// 2. a confidence level of 99.9% (δ = 0.001). +/// And sketch the size is 152kb. +static constexpr auto num_hashes = 7uz; +static constexpr auto num_buckets = 2718uz; + +StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) + : IStatistics(stat_) + , sketch(num_hashes, num_buckets) + , data_type(data_type_) +{ +} + +Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const +{ + /// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc. + /// Return null if val larger than the range of data_type + /// + /// For example: if data_type is Int32: + /// 1. For 1.0, 1, '1', return Field(1) + /// 2. For 1.1, max_value_int64, return null + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; + + if (data_type->isValueRepresentedByNumber()) + return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); + + if (isStringOrFixedString(data_type)) + return sketch.get_estimate(val.get()); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); +} + +void StatisticsCountMinSketch::update(const ColumnPtr & column) +{ + for (size_t row = 0; row < column->size(); ++row) + { + if (column->isNullAt(row)) + continue; + auto data = column->getDataAt(row); + sketch.update(data.data, data.size, 1); + } +} + +void StatisticsCountMinSketch::serialize(WriteBuffer & buf) +{ + Sketch::vector_bytes bytes = sketch.serialize(); + writeIntBinary(static_cast(bytes.size()), buf); + buf.write(reinterpret_cast(bytes.data()), bytes.size()); +} + +void StatisticsCountMinSketch::deserialize(ReadBuffer & buf) +{ + UInt64 size; + readIntBinary(size, buf); + + Sketch::vector_bytes bytes; + bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking + buf.readStrict(reinterpret_cast(bytes.data()), size); + + sketch = Sketch::deserialize(bytes.data(), size); +} + + +void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); + if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); +} + +StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +{ + return std::make_shared(stat, data_type); +} + +} + +#endif diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.h b/src/Storages/Statistics/StatisticsCountMinSketch.h new file mode 100644 index 00000000000..6c8b74f8c35 --- /dev/null +++ b/src/Storages/Statistics/StatisticsCountMinSketch.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +#include "config.h" + +#if USE_DATASKETCHES + +#include + +namespace DB +{ + +class StatisticsCountMinSketch : public IStatistics +{ +public: + StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); + + Float64 estimateEqual(const Field & val) const override; + + void update(const ColumnPtr & column) override; + + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + +private: + using Sketch = datasketches::count_min_sketch; + Sketch sketch; + + DataTypePtr data_type; +}; + + +void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr); + +} + +#endif diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index 0747197370c..66150e00fdb 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,11 +1,13 @@ #include #include +#include namespace DB { namespace ErrorCodes { - extern const int ILLEGAL_STATISTICS; +extern const int ILLEGAL_STATISTICS; +extern const int LOGICAL_ERROR; } StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) @@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) void StatisticsTDigest::update(const ColumnPtr & column) { size_t rows = column->size(); - for (size_t row = 0; row < rows; ++row) { - /// TODO: support more types. - Float64 value = column->getFloat64(row); - t_digest.add(value, 1); + Field field; + column->get(row, field); + + if (field.isNull()) + continue; + + if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field)) + t_digest.add(*field_as_float, 1); } } @@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf) t_digest.deserialize(buf); } -Float64 StatisticsTDigest::estimateLess(Float64 val) const +Float64 StatisticsTDigest::estimateLess(const Field & val) const { - return t_digest.getCountLessThan(val); + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); + if (val_as_float) + return t_digest.getCountLessThan(*val_as_float); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); } -Float64 StatisticsTDigest::estimateEqual(Float64 val) const +Float64 StatisticsTDigest::estimateEqual(const Field & val) const { - return t_digest.getCountEqual(val); + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); + if (val_as_float) + return t_digest.getCountEqual(*val_as_float); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); } -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); if (!data_type->isValueRepresentedByNumber()) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) { return std::make_shared(stat); } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index d3a3bf115ee..614973e5d8b 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -16,14 +16,14 @@ public: void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - Float64 estimateLess(Float64 val) const override; - Float64 estimateEqual(Float64 val) const override; + Float64 estimateLess(const Field & val) const override; + Float64 estimateEqual(const Field & val) const override; private: QuantileTDigest t_digest; }; -void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index bf9a40ea8cb..8f60ffcf0b5 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const return column->getUInt(0); } -void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); if (!data_type->isValueRepresentedByNumber()) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 5290585bd94..faabde8d47c 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -27,7 +27,7 @@ private: }; -void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/tests/gtest_stats.cpp b/src/Storages/Statistics/tests/gtest_stats.cpp index c3c14632ba1..e55c52c49f3 100644 --- a/src/Storages/Statistics/tests/gtest_stats.cpp +++ b/src/Storages/Statistics/tests/gtest_stats.cpp @@ -1,6 +1,10 @@ #include #include +#include +#include + +using namespace DB; TEST(Statistics, TDigestLessThan) { @@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan) std::reverse(data.begin(), data.end()); test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001}); - - } diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index f10fb78f933..9c5fd3604b2 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -1,19 +1,14 @@ #include -#include #include #include #include #include -#include -#include #include #include #include -#include #include -#include namespace DB { @@ -54,7 +49,9 @@ static StatisticsType stringToStatisticsType(String type) return StatisticsType::TDigest; if (type == "uniq") return StatisticsType::Uniq; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type); + if (type == "count_min") + return StatisticsType::CountMinSketch; + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type); } String SingleStatisticsDescription::getTypeName() const @@ -65,8 +62,10 @@ String SingleStatisticsDescription::getTypeName() const return "TDigest"; case StatisticsType::Uniq: return "Uniq"; + case StatisticsType::CountMinSketch: + return "count_min"; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type); } } @@ -99,10 +98,9 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe chassert(merging_column_type); if (column_name.empty()) - { column_name = merging_column_name; - data_type = merging_column_type; - } + + data_type = merging_column_type; for (const auto & [stats_type, stats_desc]: other.types_to_desc) { @@ -121,6 +119,7 @@ void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & oth throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name); types_to_desc = other.types_to_desc; + data_type = other.data_type; } void ColumnStatisticsDescription::clear() @@ -159,6 +158,7 @@ std::vector ColumnStatisticsDescription::fromAST(co const auto & column = columns.getPhysical(physical_column_name); stats.column_name = column.name; + stats.data_type = column.type; stats.types_to_desc = statistics_types; result.push_back(stats); } diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index 4862fb79d45..03b8fb0d583 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -13,6 +13,7 @@ enum class StatisticsType : UInt8 { TDigest = 0, Uniq = 1, + CountMinSketch = 2, Max = 63, }; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 587cb621362..b32a2d302a7 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -64,6 +65,11 @@ namespace DB { +namespace FailPoints +{ + extern const char keepermap_fail_drop_data[]; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -411,18 +417,16 @@ StorageKeeperMap::StorageKeeperMap( auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent); - // tables_path was removed with drop - if (code == Coordination::Error::ZNONODE) - { - LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); - continue; - } - else if (code != Coordination::Error::ZOK) - { - throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); - } + /// A table on the same Keeper path already exists, we just appended our table id to subscribe as a new replica + /// We still don't know if the table matches the expected metadata so table_is_valid is not changed + /// It will be checked lazily on the first operation + if (code == Coordination::Error::ZOK) + return; - return; + if (code != Coordination::Error::ZNONODE) + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path); + + /// ZNONODE means we dropped zk_tables_path but didn't finish drop completely } if (client->exists(zk_dropped_path)) @@ -473,6 +477,7 @@ StorageKeeperMap::StorageKeeperMap( table_is_valid = true; + /// we are the first table created for the specified Keeper path, i.e. we are the first replica return; } @@ -561,6 +566,10 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock) { + fiu_do_on(FailPoints::keepermap_fail_drop_data, + { + throw zkutil::KeeperException(Coordination::Error::ZOPERATIONTIMEOUT, "Manually triggered operation timeout"); + }); zookeeper->removeChildrenRecursive(zk_data_path); bool completely_removed = false; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 444b3fbae4c..527872d701e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -505,18 +505,18 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context additional_info = fmt::format(" (TID: {}; TIDH: {})", current_tid, current_tid.getHash()); } - Int64 version; + MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); + Int64 version = increment.get(); + entry.commit(version); + String mutation_id = entry.file_name; + if (txn) + txn->addMutation(shared_from_this(), mutation_id); + + bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); + { std::lock_guard lock(currently_processing_in_background_mutex); - MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); - version = increment.get(); - entry.commit(version); - String mutation_id = entry.file_name; - if (txn) - txn->addMutation(shared_from_this(), mutation_id); - - bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second; if (!inserted) { @@ -527,9 +527,9 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context } throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); } - - LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } + + LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); background_operations_assignee.trigger(); return version; } diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 9201eef185f..35b9c0008c6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,5 +1,6 @@ // autogenerated by tests/ci/version_helper.py const char * auto_contributors[] { + "0x01f", "0xflotus", "13DaGGeR", "1lann", @@ -167,6 +168,7 @@ const char * auto_contributors[] { "AnneClickHouse", "Anselmo D. Adams", "Anthony N. Simon", + "AntiTopQuark", "Anton Ivashkin", "Anton Kobzev", "Anton Kozlov", @@ -299,6 +301,7 @@ const char * auto_contributors[] { "Dan Wu", "DanRoscigno", "Dani Pozo", + "Daniel Anugerah", "Daniel Bershatsky", "Daniel Byta", "Daniel Dao", @@ -370,6 +373,7 @@ const char * auto_contributors[] { "Elena", "Elena Baskakova", "Elena Torró", + "Elena Torró Martínez", "Elghazal Ahmed", "Eliot Hautefeuille", "Elizaveta Mironyuk", @@ -415,6 +419,7 @@ const char * auto_contributors[] { "FgoDt", "Filatenkov Artur", "Filipe Caixeta", + "Filipp Bakanov", "Filipp Ozinov", "Filippov Denis", "Fille", @@ -451,6 +456,7 @@ const char * auto_contributors[] { "Gleb Novikov", "Gleb-Tretyakov", "GoGoWen2021", + "Gosha Letov", "Gregory", "Grigorii Sokolik", "Grigory", @@ -461,6 +467,7 @@ const char * auto_contributors[] { "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", + "Guspan Tanadi", "Haavard Kvaalen", "Habibullah Oladepo", "HaiBo Li", @@ -474,6 +481,7 @@ const char * auto_contributors[] { "HarryLeeIBM", "Hasitha Kanchana", "Hasnat", + "Haydn", "Heena Bansal", "HeenaBansal2009", "Hendrik M", @@ -606,6 +614,7 @@ const char * auto_contributors[] { "Kevin Chiang", "Kevin Michel", "Kevin Mingtarja", + "Kevin Song", "Kevin Zhang", "KevinyhZou", "KinderRiven", @@ -661,6 +670,7 @@ const char * auto_contributors[] { "Lewinma", "Li Shuai", "Li Yin", + "Linh Giang", "Lino Uruñuela", "Lirikl", "Liu Cong", @@ -690,6 +700,7 @@ const char * auto_contributors[] { "Maksim Alekseev", "Maksim Buren", "Maksim Fedotov", + "Maksim Galkin", "Maksim Kita", "Maksym Sobolyev", "Mal Curtis", @@ -724,6 +735,7 @@ const char * auto_contributors[] { "Max Akhmedov", "Max Bruce", "Max K", + "Max K.", "Max Kainov", "Max Vetrov", "MaxTheHuman", @@ -811,6 +823,7 @@ const char * auto_contributors[] { "Nataly Merezhuk", "Natalya Chizhonkova", "Natasha Murashkina", + "Nathan Clevenger", "NeZeD [Mac Pro]", "Neeke Gao", "Neng Liu", @@ -946,6 +959,7 @@ const char * auto_contributors[] { "Robert Coelho", "Robert Hodges", "Robert Schulze", + "Rodolphe Dugé de Bernonville", "RogerYK", "Rohit Agarwal", "Romain Neutron", @@ -1107,6 +1121,7 @@ const char * auto_contributors[] { "Timur Solodovnikov", "TiunovNN", "Tobias Adamson", + "Tobias Florek", "Tobias Lins", "Tom Bombadil", "Tom Risse", @@ -1231,11 +1246,13 @@ const char * auto_contributors[] { "Yingchun Lai", "Yingfan Chen", "Yinzheng-Sun", + "Yinzuo Jiang", "Yiğit Konur", "Yohann Jardin", "Yong Wang", "Yong-Hao Zou", "Youenn Lebras", + "Your Name", "Yu, Peng", "Yuko Takagi", "Yuntao Wu", @@ -1250,6 +1267,7 @@ const char * auto_contributors[] { "Yury Stankevich", "Yusuke Tanaka", "Zach Naimon", + "Zawa-II", "Zheng Miao", "ZhiHong Zhang", "ZhiYong Wang", @@ -1380,6 +1398,7 @@ const char * auto_contributors[] { "conicliu", "copperybean", "coraxster", + "cw5121", "cwkyaoyao", "d.v.semenov", "dalei2019", @@ -1460,12 +1479,14 @@ const char * auto_contributors[] { "fuzzERot", "fyu", "g-arslan", + "gabrielmcg44", "ggerogery", "giordyb", "glockbender", "glushkovds", "grantovsky", "gulige", + "gun9nir", "guoleiyi", "guomaolin", "guov100", @@ -1527,6 +1548,7 @@ const char * auto_contributors[] { "jferroal", "jiahui-97", "jianmei zhang", + "jiaosenvip", "jinjunzh", "jiyoungyoooo", "jktng", @@ -1541,6 +1563,7 @@ const char * auto_contributors[] { "jun won", "jus1096", "justindeguzman", + "jwoodhead", "jyz0309", "karnevil13", "kashwy", @@ -1633,10 +1656,12 @@ const char * auto_contributors[] { "mateng0915", "mateng915", "mauidude", + "max-vostrikov", "maxim", "maxim-babenko", "maxkuzn", "maxulan", + "maxvostrikov", "mayamika", "mehanizm", "melin", @@ -1677,6 +1702,7 @@ const char * auto_contributors[] { "nathanbegbie", "nauta", "nautaa", + "nauu", "ndchikin", "nellicus", "nemonlou", @@ -1975,6 +2001,7 @@ const char * auto_contributors[] { "张健", "张风啸", "徐炘", + "忒休斯~Theseus", "曲正鹏", "木木夕120", "未来星___费", diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 27c52124e9c..151079154b1 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -271,7 +271,8 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes) + ActionsDAG::Nodes & additional_nodes, + bool allow_non_deterministic_functions) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -280,8 +281,14 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) node_copy.children.push_back(child_copy); + /// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for + /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is + /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply + /// trivial count. + else if (!allow_non_deterministic_functions) + return nullptr; if (node_copy.children.empty()) return nullptr; @@ -307,7 +314,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) return nullptr; return &node_copy; @@ -321,7 +328,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions()->clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag->getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -355,13 +362,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) { if (!predicate) return nullptr; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); if (!res) return nullptr; @@ -370,7 +377,7 @@ ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); if (dag) filterBlockWithDAG(dag, block, context); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 9045a2f5481..e5cfa47c8f6 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -32,7 +32,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic +/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr. +/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in +/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is +/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` +/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is +/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial +/// count optimization will be mistakenly applied to the query. +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true); /// Extract from the input stream a set of `name` column values template diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 688c7d59988..dfb5885270a 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -31,6 +31,7 @@ class CIBuddy: self.sha = pr_info.sha[:10] def check_workflow(self): + GHActions.print_workflow_results() res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) if res != GHActions.ActionStatuses.SUCCESS: self.post_job_error("Workflow Configuration Failed", critical=True) diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 1963e3f39d0..d42091fb0da 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -92,15 +92,33 @@ class GHActions: PENDING = "pending" SUCCESS = "success" - @staticmethod - def get_workflow_job_result(wf_job_name: str) -> Optional[str]: + @classmethod + def _get_workflow_results(cls): if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): print( f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" ) - return None + return {} with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file: - res = json.load(json_file) + try: + res = json.load(json_file) + except json.JSONDecodeError as e: + print(f"ERROR: json decoder exception {e}") + json_file.seek(0) + print(" File content:") + print(json_file.read()) + return {} + return res + + @classmethod + def print_workflow_results(cls): + res = cls._get_workflow_results() + results = [f"{job}: {data['result']}" for job, data in res.items()] + cls.print_in_group("Workflow results", results) + + @classmethod + def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]: + res = cls._get_workflow_results() if wf_job_name in res: return res[wf_job_name]["result"] # type: ignore else: diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 91ea5c6d5d3..287970cce9a 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -197,6 +197,10 @@ def get_instance_id(): return _query_imds("latest/meta-data/instance-id") +def get_instance_lifecycle(): + return _query_imds("latest/meta-data/instance-life-cycle") + + def prepare_tests_results_for_clickhouse( pr_info: PRInfo, test_results: TestResults, @@ -233,7 +237,7 @@ def prepare_tests_results_for_clickhouse( "head_ref": head_ref, "head_repo": head_repo, "task_url": pr_info.task_url, - "instance_type": get_instance_type(), + "instance_type": ",".join([get_instance_type(), get_instance_lifecycle()]), "instance_id": get_instance_id(), } diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 21fc02ce02a..413c35cbebe 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -21,7 +21,7 @@ from env_helper import ( TEMP_PATH, ) from git_helper import Git -from pr_info import PRInfo, EventType +from pr_info import PRInfo from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen @@ -375,25 +375,23 @@ def main(): tags = gen_tags(args.version, args.release_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} - if pr_info.event_type == EventType.PULL_REQUEST: - release_or_pr = str(pr_info.number) - sha = pr_info.sha - elif pr_info.event_type == EventType.PUSH and pr_info.is_master: - release_or_pr = str(0) - sha = pr_info.sha - else: - release_or_pr = f"{args.version.major}.{args.version.minor}" - sha = args.sha - assert sha for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): - if not args.bucket_prefix: + if args.bucket_prefix: + assert not args.allow_build_reuse + repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" + elif args.sha: + # CreateRelease workflow only. TODO + version = args.version repo_urls[arch] = ( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{release_or_pr}/{sha}/{build_name}" + f"{version.major}.{version.minor}/{args.sha}/{build_name}" ) else: - repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" + # In all other cases urls must be fetched from build reports. TODO: script needs refactoring + repo_urls[arch] = "" + assert args.allow_build_reuse + if args.allow_build_reuse: # read s3 urls from pre-downloaded build reports if "clickhouse-server" in image_repo: @@ -431,7 +429,6 @@ def main(): ) if test_results[-1].status != "OK": status = FAILURE - pr_info = pr_info or PRInfo() description = f"Processed tags: {', '.join(tags)}" JobReport( diff --git a/tests/ci/worker/.gitignore b/tests/ci/worker/.gitignore deleted file mode 100644 index 4ed18989e78..00000000000 --- a/tests/ci/worker/.gitignore +++ /dev/null @@ -1 +0,0 @@ -generated_*init_runner.sh diff --git a/tests/ci/worker/deploy-runner-init.sh b/tests/ci/worker/deploy-runner-init.sh deleted file mode 100755 index 96fbd82a99c..00000000000 --- a/tests/ci/worker/deploy-runner-init.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env bash - -set -e - -usage() { - echo "Usage: $0 ENVIRONMENT" >&2 - echo "Valid values for ENVIRONMENT: staging, production" >&2 - exit 1 -} - -case "$1" in - staging|production) - ENVIRONMENT="$1" ;; - --help) - usage ;; - *) - echo "Invalid argument" >&2 - usage ;; -esac - -cd "$(dirname "$0")" || exit 1 -SOURCE_SCRIPT='init_runner.sh' - -check_response() { - # Are we even in the interactive shell? - [ -t 1 ] || return 1 - local request - request="$1" - read -rp "$request (y/N): " response - case "$response" in - [Yy]) - return 0 - # Your code to continue goes here - ;; - *) - return 1 - ;; - esac -} - -check_dirty() { - if [ -n "$(git status --porcelain=v2 "$SOURCE_SCRIPT")" ]; then - echo "The $SOURCE_SCRIPT has uncommited changes, won't deploy it" >&2 - exit 1 - fi -} -GIT_HASH=$(git log -1 --format=format:%H) - -header() { - cat << EOF -#!/usr/bin/env bash - -echo 'The $ENVIRONMENT script is generated from $SOURCE_SCRIPT, commit $GIT_HASH' - -EOF -} - -body() { - local first_line - first_line=$(sed -n '/^# THE SCRIPT START$/{=;q;}' "$SOURCE_SCRIPT") - if [ -z "$first_line" ]; then - echo "The pattern '# THE SCRIPT START' is not found in $SOURCE_SCRIPT" >&2 - exit 1 - fi - tail "+$first_line" "$SOURCE_SCRIPT" -} - -GENERATED_FILE="generated_${ENVIRONMENT}_${SOURCE_SCRIPT}" - -{ header && body; } > "$GENERATED_FILE" - -echo "The file $GENERATED_FILE is generated" - -if check_response "Display the content of $GENERATED_FILE?"; then - if [ -z "$PAGER" ]; then - less "$GENERATED_FILE" - else - $PAGER "$GENERATED_FILE" - fi -fi - -check_dirty - -S3_OBJECT=${S3_OBJECT:-s3://github-runners-data/cloud-init/${ENVIRONMENT}.sh} -if check_response "Deploy the generated script to $S3_OBJECT?"; then - aws s3 mv "$GENERATED_FILE" "$S3_OBJECT" -fi diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh deleted file mode 100644 index 5177e112edd..00000000000 --- a/tests/ci/worker/init_runner.sh +++ /dev/null @@ -1,406 +0,0 @@ -#!/usr/bin/env bash - -cat > /dev/null << 'EOF' -The following content is embedded into the s3 object via the script -deploy-runner-init.sh {staging,production} -with additional helping information - -In the `user data` you should define as the following text -between `### COPY BELOW` and `### COPY ABOVE` - -### COPY BELOW -Content-Type: multipart/mixed; boundary="//" -MIME-Version: 1.0 - ---// -Content-Type: text/cloud-config; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="cloud-config.txt" - -#cloud-config -cloud_final_modules: -- [scripts-user, always] - ---// -Content-Type: text/x-shellscript; charset="us-ascii" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="userdata.txt" - -#!/bin/bash -INSTANCE_ID=$(ec2metadata --instance-id) -INIT_ENVIRONMENT=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:init-environment'].Value" --output text) -echo "Downloading and using $INIT_ENVIRONMENT cloud-init.sh" -aws s3 cp "s3://github-runners-data/cloud-init/${INIT_ENVIRONMENT:-production}.sh" /tmp/cloud-init.sh -chmod 0700 /tmp/cloud-init.sh -exec bash /tmp/cloud-init.sh ---// -### COPY ABOVE -EOF - -# THE SCRIPT START - -set -uo pipefail - -#################################### -# IMPORTANT! # -# EC2 instance should have # -# `github:runner-type` tag # -# set accordingly to a runner role # -#################################### - -echo "Running init v1.1" -export DEBIAN_FRONTEND=noninteractive -export RUNNER_HOME=/home/ubuntu/actions-runner - -export RUNNER_ORG="ClickHouse" -export RUNNER_URL="https://github.com/${RUNNER_ORG}" -# Funny fact, but metadata service has fixed IP -INSTANCE_ID=$(ec2metadata --instance-id) -export INSTANCE_ID - -bash /usr/local/share/scripts/init-network.sh - -# combine labels -RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) -LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" -export LABELS -echo "Instance Labels: $LABELS" - -LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) -export LIFE_CYCLE -echo "Instance lifecycle: $LIFE_CYCLE" - -INSTANCE_TYPE=$(ec2metadata --instance-type) -echo "Instance type: $INSTANCE_TYPE" - -# Refresh CloudWatch agent config -aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json -systemctl restart amazon-cloudwatch-agent.service - -# Refresh teams ssh keys -TEAM_KEYS_URL=$(aws ssm get-parameter --region us-east-1 --name team-keys-url --query 'Parameter.Value' --output=text) -curl -s "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2 -chown ubuntu: /home/ubuntu/.ssh -R - - -# Create a pre-run script that will provide diagnostics info -mkdir -p /tmp/actions-hooks -cat > /tmp/actions-hooks/common.sh << 'EOF' -#!/bin/bash -EOF - -terminate_delayed() { - # The function for post hook to gracefully finish the job and then tear down - # The very specific sleep time is used later to determine in the main loop if - # the instance is tearing down - # IF `sleep` IS CHANGED, CHANGE ANOTHER VALUE IN `pgrep` - sleep=13.14159265358979323846 - echo "Going to terminate the runner's instance in $sleep seconds" - # We execute it with `at` to not have it as an orphan process, but launched independently - # GH Runners kill all remain processes - echo "sleep '$sleep'; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now || \ - aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" # workaround for complete out of space or non-installed `at` - exit 0 -} - -detect_delayed_termination() { - # The function look for very specific sleep with pi - if pgrep 'sleep 13.14159265358979323846'; then - echo 'The instance has delayed termination, sleep the same time to wait if it goes down' - sleep 14 - fi -} - -declare -f terminate_delayed >> /tmp/actions-hooks/common.sh - -terminate_and_exit() { - # Terminate instance and exit from the script instantly - echo "Going to terminate the runner's instance" - aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" - exit 0 -} - -terminate_decrease_and_exit() { - # Terminate instance and exit from the script instantly - echo "Going to terminate the runner's instance and decrease asg capacity" - aws autoscaling terminate-instance-in-auto-scaling-group --instance-id "$INSTANCE_ID" --should-decrement-desired-capacity - exit 0 -} - -declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh - -check_spot_instance_is_old() { - if [ "$LIFE_CYCLE" == "spot" ]; then - local UPTIME - UPTIME=$(< /proc/uptime) - UPTIME=${UPTIME%%.*} - if (( 3600 < UPTIME )); then - echo "The spot instance has uptime $UPTIME, it's time to shut it down" - return 0 - fi - fi - return 1 -} - -check_proceed_spot_termination() { - # The function checks and proceeds spot instance termination if exists - # The event for spot instance termination - local FORCE - FORCE=${1:-} - if TERMINATION_DATA=$(curl -s --fail http://169.254.169.254/latest/meta-data/spot/instance-action); then - # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-instance-termination-notices.html#instance-action-metadata - _action=$(jq '.action' -r <<< "$TERMINATION_DATA") - _time=$(jq '.time | fromdate' <<< "$TERMINATION_DATA") - _until_action=$((_time - $(date +%s))) - echo "Received the '$_action' event that will be effective in $_until_action seconds" - if (( _until_action <= 30 )) || [ "$FORCE" == "force" ]; then - echo "The action $_action will be done in $_until_action, killing the runner and exit" - local runner_pid - runner_pid=$(pgrep Runner.Listener) - if [ -n "$runner_pid" ]; then - # Kill the runner to not allow it cancelling the job - # shellcheck disable=SC2046 - kill -9 "$runner_pid" $(list_children "$runner_pid") - fi - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" - terminate_and_exit - fi - fi -} - -no_terminating_metadata() { - # The function check that instance could continue work - # Returns 1 if any of termination events are received - - # The event for rebalance recommendation. Not strict, so we have some room to make a decision here - if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then - echo 'Received recommendation to rebalance, checking the uptime' - local UPTIME - UPTIME=$(< /proc/uptime) - UPTIME=${UPTIME%%.*} - # We don't shutdown the instances younger than 30m - if (( 1800 < UPTIME )); then - # To not shutdown everything at once, use the 66% to survive - if (( $((RANDOM % 3)) == 0 )); then - echo 'The instance is older than 30m and won the roulette' - return 1 - fi - echo 'The instance is older than 30m, but is not chosen for rebalance' - else - echo 'The instance is younger than 30m, do not shut it down' - fi - fi - - # Checks if the ASG in a lifecycle hook state - local ASG_STATUS - ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state) - if [ "$ASG_STATUS" == "Terminated" ]; then - echo 'The instance in ASG status Terminating:Wait' - return 1 - fi -} - -terminate_on_event() { - # If there is a rebalance event, then the instance could die soon - # Let's don't wait for it and terminate proactively - if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then - terminate_and_exit - fi - - # Here we check if the autoscaling group marked the instance for termination, and it's wait for the job to finish - ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state) - if [ "$ASG_STATUS" == "Terminated" ]; then - INSTANCE_ID=$(ec2metadata --instance-id) - ASG_NAME=$(aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='aws:autoscaling:groupName'].Value" --output text) - LIFECYCLE_HOOKS=$(aws autoscaling describe-lifecycle-hooks --auto-scaling-group-name "$ASG_NAME" --query "LifecycleHooks[].LifecycleHookName" --output text) - for LCH in $LIFECYCLE_HOOKS; do - aws autoscaling complete-lifecycle-action --lifecycle-action-result CONTINUE \ - --lifecycle-hook-name "$LCH" --auto-scaling-group-name "$ASG_NAME" \ - --instance-id "$INSTANCE_ID" - true # autoformat issue - done - echo 'The runner is marked as "Terminated" by the autoscaling group, we are terminating' - terminate_and_exit - fi -} - -cat > /tmp/actions-hooks/pre-run.sh << EOF -#!/bin/bash -set -uo pipefail - -echo "Runner's public DNS: $(ec2metadata --public-hostname)" -echo "Runner's labels: ${LABELS}" -echo "Runner's instance type: $(ec2metadata --instance-type)" -EOF - -# Create a post-run script that will restart docker daemon before the job started -cat > /tmp/actions-hooks/post-run.sh << 'EOF' -#!/bin/bash -set -xuo pipefail - -source /tmp/actions-hooks/common.sh - -# Free KiB, free percents -ROOT_STAT=($(df / | awk '/\// {print $4 " " int($4/$2 * 100)}')) -if [[ ${ROOT_STAT[0]} -lt 3000000 ]] || [[ ${ROOT_STAT[1]} -lt 5 ]]; then - echo "The runner has ${ROOT_STAT[0]}KiB and ${ROOT_STAT[1]}% of free space on /" - terminate_delayed -fi - -# shellcheck disable=SC2046 -docker ps --quiet | xargs --no-run-if-empty docker kill ||: -# shellcheck disable=SC2046 -docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - -# If we have hanged containers after the previous commands, than we have a hanged one -# and should restart the daemon -if [ "$(docker ps --all --quiet)" ]; then - # Systemd service of docker has StartLimitBurst=3 and StartLimitInterval=60s, - # that's why we try restarting it for long - for i in {1..25}; - do - sudo systemctl restart docker && break || sleep 5 - done - - for i in {1..10} - do - docker info && break || sleep 2 - done - # Last chance, otherwise we have to terminate poor instance - docker info 1>/dev/null || { echo Docker unable to start; terminate_delayed ; } -fi -EOF - -get_runner_token() { - /usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value -} - -is_job_assigned() { - local runner_pid - runner_pid=$(pgrep Runner.Listener) - if [ -z "$runner_pid" ]; then - # if runner has finished, it's fine - return 0 - fi - local log_file - log_file=$(lsof -p "$runner_pid" 2>/dev/null | grep -o "$RUNNER_HOME/_diag/Runner.*log") - if [ -z "$log_file" ]; then - # assume, the process is over or just started - return 0 - fi - # So far it's the only solid way to determine that the job is starting - grep -q 'Terminal] .* Running job:' "$log_file" \ - && return 0 \ - || return 1 -} - -list_children () { - local children - children=$(ps --ppid "$1" -o pid=) - if [ -z "$children" ]; then - return - fi - - for pid in $children; do - list_children "$pid" - done - echo "$children" -} - -# There's possibility that it fails because the runner's version is outdated, -# so after the first failure we'll try to launch it with enabled autoupdate. -# -# We'll fail and terminate after 10 consequent failures. -ATTEMPT=0 -# In `kill` 0 means "all processes in process group", -1 is "all but PID 1" -# We use `-2` to get an error -RUNNER_PID=-2 - -while true; do - # Does not send signal, but checks that the process $RUNNER_PID is running - if kill -0 -- $RUNNER_PID; then - ATTEMPT=0 - echo "Runner is working with pid $RUNNER_PID, checking the metadata in background" - check_proceed_spot_termination - - if ! is_job_assigned; then - RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) )) - echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job" - if (( 60 < RUNNER_AGE )); then - echo "Attempt to delete the runner for a graceful shutdown" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ - || continue - echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" - terminate_decrease_and_exit - fi - fi - else - if [ "$RUNNER_PID" != "-2" ]; then - wait $RUNNER_PID \ - && echo "Runner with PID $RUNNER_PID successfully finished" \ - || echo "Attempt $((++ATTEMPT)) to start the runner" - fi - if (( ATTEMPT > 10 )); then - echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it" - terminate_and_exit - fi - - cd $RUNNER_HOME || terminate_and_exit - detect_delayed_termination - # If runner is not active, check that it needs to terminate itself - echo "Checking if the instance suppose to terminate" - no_terminating_metadata || terminate_on_event - check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination force - - echo "Going to configure runner" - token_args=(--token "$(get_runner_token)") - config_args=( - "${token_args[@]}" --url "$RUNNER_URL" - --ephemeral --unattended --replace --runnergroup Default - --labels "$LABELS" --work _work --name "$INSTANCE_ID" - ) - if (( ATTEMPT > 1 )); then - echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.' - sudo -u ubuntu ./config.sh remove "${token_args[@]}" - sudo -u ubuntu ./config.sh "${config_args[@]}" - else - echo "Configure runner with disabled autoupdate" - config_args+=("--disableupdate") - sudo -u ubuntu ./config.sh "${config_args[@]}" - fi - - echo "Another one check to avoid race between runner and infrastructure" - no_terminating_metadata || terminate_on_event - check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination force - - # There were some failures to start the Job because of trash in _work - rm -rf _work - - # https://github.com/actions/runner/issues/3266 - # We're unable to know if the runner is failed to start. - echo 'Monkey-patching run helpers to get genuine exit code of the runner' - for script in run.sh run-helper.sh.template; do - # shellcheck disable=SC2016 - grep -q 'exit 0$' "$script" && \ - sed 's/exit 0/exit $returnCode/' -i "$script" && \ - echo "Script $script is patched" - done - - echo "Run" - sudo -u ubuntu \ - ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \ - ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ - ./run.sh & - RUNNER_PID=$! - - sleep 10 - fi - - sleep 5 -done - -# vim:ts=4:sw=4 diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index d7b4230d872..31316af7b1e 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -104,3 +104,24 @@ def test_keeper_map_without_zk(started_cluster): node.query("DETACH TABLE test_keeper_map_without_zk") client.stop() + + +def test_keeper_map_with_failed_drop(started_cluster): + run_query( + "CREATE TABLE test_keeper_map_with_failed_drop (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" + ) + + run_query("INSERT INTO test_keeper_map_with_failed_drop VALUES (1, 11)") + run_query("SYSTEM ENABLE FAILPOINT keepermap_fail_drop_data") + node.query("DROP TABLE test_keeper_map_with_failed_drop SYNC") + + zk_client = get_genuine_zk() + assert ( + zk_client.get("/test_keeper_map/test_keeper_map_with_failed_drop/data") + is not None + ) + + run_query("SYSTEM DISABLE FAILPOINT keepermap_fail_drop_data") + run_query( + "CREATE TABLE test_keeper_map_with_failed_drop_another (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);" + ) diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml index c219d73b6cf..d56c585188d 100644 --- a/tests/performance/generate_table_function.xml +++ b/tests/performance/generate_table_function.xml @@ -7,7 +7,7 @@ SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000); diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference new file mode 100644 index 00000000000..02c41656a36 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference @@ -0,0 +1,14 @@ +CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +Test statistics count_min: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) +Test statistics multi-types: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) +Test LowCardinality and Nullable data type: +tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql new file mode 100644 index 00000000000..c730aa7b4a7 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql @@ -0,0 +1,70 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS tab SYNC; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET allow_suspicious_low_cardinality_types=1; +SET mutations_sync = 2; + +CREATE TABLE tab +( + a String, + b UInt64, + c Int64, + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'Test statistics count_min:'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab ADD STATISTICS b TYPE count_min; +ALTER TABLE tab ADD STATISTICS c TYPE count_min; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS a, b, c; + + +SELECT 'Test statistics multi-types:'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS a, b, c; + +DROP TABLE IF EXISTS tab SYNC; + + +SELECT 'Test LowCardinality and Nullable data type:'; +DROP TABLE IF EXISTS tab2 SYNC; +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE tab2 +( + a LowCardinality(Int64) STATISTICS(count_min), + b Nullable(Int64) STATISTICS(count_min), + c LowCardinality(Nullable(Int64)) STATISTICS(count_min), + pk String, +) Engine = MergeTree() ORDER BY pk; + +select name from system.tables where name = 'tab2' and database = currentDatabase(); + +DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql index d496392668b..0f5f353c045 100644 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ b/tests/queries/0_stateless/02864_statistics_uniq.sql @@ -70,3 +70,4 @@ SETTINGS min_bytes_for_wide_part = 0; INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; DROP TABLE IF EXISTS t3; + diff --git a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh index 89942e25b67..9081035579d 100755 --- a/tests/queries/0_stateless/02932_refreshable_materialized_views.sh +++ b/tests/queries/0_stateless/02932_refreshable_materialized_views.sh @@ -2,8 +2,6 @@ # Tags: atomic-database CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh @@ -134,7 +132,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshe do sleep 0.1 done -sleep 1 + $CLICKHOUSE_CLIENT -nq " select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes; truncate src; @@ -172,13 +170,13 @@ $CLICKHOUSE_CLIENT -nq " drop table b; create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src; drop table src;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Exception' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ] do sleep 0.1 done # Check exception, create src, expect successful refresh. $CLICKHOUSE_CLIENT -nq " - select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' from refreshes; + select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c'; create table src (x Int64) engine Memory as select 1; system refresh view c;" while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ] @@ -224,22 +222,27 @@ done $CLICKHOUSE_CLIENT -nq " rename table e to f; select '<24: rename during refresh>', * from f; - select '<25: rename during refresh>', view, status from refreshes; + select '<25: rename during refresh>', view, status from refreshes where view = 'f'; alter table f modify refresh after 10 year;" -sleep 2 # make it likely that at least one row was processed + # Cancel. $CLICKHOUSE_CLIENT -nq " system cancel view f;" -while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Cancelled' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ] do sleep 0.1 done + +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ] +do + sleep 0.1 +done + # Check that another refresh doesn't immediately start after the cancelled one. -sleep 1 $CLICKHOUSE_CLIENT -nq " - select '<27: cancelled>', view, status from refreshes; + select '<27: cancelled>', view, status from refreshes where view = 'f'; system refresh view f;" -while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ] +while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ] do sleep 0.1 done diff --git a/tests/queries/0_stateless/02995_index.reference b/tests/queries/0_stateless/02995_index.reference deleted file mode 100644 index 1e8639caa88..00000000000 --- a/tests/queries/0_stateless/02995_index.reference +++ /dev/null @@ -1,126 +0,0 @@ -12 4 21722 2209341 4 1415 2333 4 61 64 3 -21 1134 11363 58484 1106 1458 1592 136 26 62 32 -22 210 4504 5729 196 291 767 124 47 54 8 -26 196 1327684 5221 195 4140 5661 161 28 49 19 -28 5 2034378 7102 5 325 3255 2 53 60 4 -29 53 45041 45189 45 1580 211 31 55 84 18 -38 424 1600675 4653 424 562 5944 244 60 65 6 -45 17 62743 674873 17 6239 6494 17 65 76 8 -72 1862 1210073 6200 1677 2498 528 859 51 61 11 -79 2 2255228 2255293 2 5495 7057 2 65 65 1 -85 459 1051571 1829638 459 6402 7131 334 32 61 25 -86 10 1748130 1754217 10 4374 7003 10 56 59 4 -91 165 5718 5802 75 282 7113 112 41 63 22 -94 20 1231916 2050003 20 4802 4917 19 53 59 7 -99 2 3665 36667 2 497 697 2 70 71 2 -103 1 2446615 2446615 1 2498 2498 1 58 58 1 -106 72 6149 6699 67 527 826 40 61 61 1 -111 43 2273186 5272 43 492 4923 4 54 72 15 -120 3129 45117 6735 2868 1030 1625 561 59 64 6 -138 2 49243 49374 2 1428 1519 2 47 48 2 -143 100 23321 63639 100 1115 1624 88 51 51 1 -145 1 2447976 2447976 1 6173 6173 1 44 44 1 -153 16 13748 16881 16 1506 1636 16 54 68 9 -159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 -171 5 15042 16698 5 1302 1608 5 65 65 1 -179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 -192 1 1639623 1639623 1 3406 3406 1 32 32 1 -193 1 1429969 1429969 1 7131 7131 1 45 45 1 -207 12 23057 32500 12 1491 1726 12 32 46 7 -221 5081 1366870 6649 3432 4527 5226 687 24 69 39 -228 73 12281 17929 71 1328 2034 63 49 71 18 -229 2 1617478 1723791 2 4590 5578 2 41 42 2 -230 3916 1332729 6949 3668 1330 4703 845 62 65 4 -238 25 2624456 2625673 24 2535 6465 25 58 75 14 -241 154 2554929 2616444 154 2626 7131 148 34 57 17 -248 276 15529 30987 274 1040 1222 136 37 79 27 -254 3018 33966 6635 2837 1057 1622 539 24 60 33 -255 20 1581774 1811334 20 6068 6301 18 33 57 10 -256 5 5145 6841 5 367 376 5 58 58 1 -270 2 2195579 2262119 2 7102 7123 2 33 34 2 -281 32 2379460 616215 32 6042 6086 23 53 64 12 -282 7 1292651 24244 7 1607 2455 6 46 55 5 -286 123 1521935 5269 123 3793 3940 81 40 66 22 -291 21 2419080 3567 21 297 4731 21 54 55 2 -316 4 5221 5616 4 505 558 4 32 35 3 -319 232 56480 63033 230 1599 313 50 33 64 26 -327 15 51647 51894 14 1292 1585 14 47 57 7 -332 24 23484 54948 24 1609 1726 16 32 49 11 -333 1 14189 14189 1 1550 1550 1 63 63 1 -342 49 2579220 2622432 49 4626 6933 48 34 54 14 -344 1 6486 6486 1 509 509 1 24 24 1 -346 1987 53016 6735 1823 1334 174 294 26 62 32 -358 45 59058 60844 44 6746 722 40 57 84 15 -363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 -384 150 2361175 476024 150 7008 7123 81 38 64 22 -387 277 5200 6553 252 243 521 130 65 65 1 -392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 -396 8181 1380803 6186 7920 545 798 1743 24 67 39 -398 3 5183 5213 2 291 352 3 53 59 3 -399 62 51494 59203 61 7073 754 42 55 78 18 -412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 -413 2 2036037 2064917 2 3963 4666 2 43 45 2 -431 33 2302331 2348449 33 4425 6516 32 69 69 1 -447 59 25125 33094 59 1176 1817 56 53 58 6 -456 1 53157 53157 1 1556 1556 1 26 26 1 -462 5 5456 6280 5 348 4337 5 28 40 5 -472 1 1443716 1443716 1 6122 6122 1 42 42 1 -491 34 1066102 1183673 34 6606 6822 32 46 67 15 -498 896 2230163 3054 895 537 7131 714 24 59 28 -504 108 12281 25180 108 1318 1784 94 55 66 12 -515 22 1588883 2640809 22 6554 6571 15 46 59 12 -518 1 37743 37743 1 1558 1558 1 72 72 1 -530 1 3033 3033 1 561 561 1 59 59 1 -532 26 5721 6355 25 549 665 14 44 50 7 -546 156 2577874 48517 156 1105 324 133 44 51 8 -554 12 1665194 2640066 12 1817 2951 12 57 57 1 -564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 -566 4432 50605 57509 3217 1191 267 459 26 72 39 -567 8 5221 5893 7 333 558 8 27 35 4 -582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 -584 43100 2500 5594 22561 134 4573 1660 48 84 37 -589 28 6046 6068 19 345 564 27 55 62 8 -595 139 1585165 1683606 138 2231 3598 132 54 84 28 -615 3 1056081 1116230 3 5794 5796 2 59 62 3 -619 7 1543114 5241 7 2442 3105 7 41 45 3 -634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 -635 237 2119333 4667 237 561 5999 176 49 60 12 -644 5 1774169 2056171 5 5591 6091 4 33 39 3 -647 8 51632 64403 8 1457 1624 8 26 34 5 -651 1325 1620565 6281 1301 528 792 815 62 63 2 -665 13 4598 4789 13 511 558 11 39 46 7 -679 1560 1613200 25940 1552 1569 3118 781 49 84 35 -704 2 14226 15594 2 1086 1116 2 65 71 2 -715 25 1199352 3490 25 5036 5112 23 34 55 13 -716 1253 61989 6735 1050 1203 1625 397 52 65 14 -730 2584 5560 6170 634 2421 627 293 56 69 14 -736 8 1433153 4941 8 339 4594 8 28 36 5 -749 2 1326176 1339862 2 4339 6213 2 49 50 2 -753 1 53157 53157 1 1556 1556 1 26 26 1 -761 63 1443230 6881 63 3154 3204 26 56 73 14 -762 49 1449596 1968154 49 2437 3753 48 54 62 9 -775 35107 5330 769436 2471 447 6607 656 70 81 12 -789 1 1552458 1552458 1 2441 2441 1 62 62 1 -794 158 5585 6585 155 495 929 67 24 50 20 -839 9 29223 46530 9 1336 1465 9 52 52 1 -844 5 2377545 2377635 5 5129 6321 5 53 69 5 -846 50 2172273 2589295 50 1582 3053 48 64 68 5 -847 2577 56656 63658 1582 1444 838 474 26 63 33 -861 1333 5570 6909 839 457 489 37 33 70 34 -873 2360 1519811 50487 2248 1310 1784 316 60 68 9 -879 228 6704 6785 79 279 507 121 35 66 24 -889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 -896 4 511246 859452 4 6554 6561 4 67 71 4 -912 146 1322641 2238040 146 1366 6354 143 59 59 1 -913 82 5495 6870 78 350 565 67 24 43 15 -921 763 1580790 416881 763 6191 7131 509 63 64 2 -925 318 2500952 5025 309 476 6114 182 32 56 21 -931 12 4277 4809 12 238 256 9 63 83 9 -942 954 1331 2228193 952 1121 5047 788 65 70 6 -948 14 1785593 2600431 14 6550 6598 13 34 49 9 -956 5 5755 6023 5 359 411 5 43 48 4 -963 4 3812 3835 4 444 537 4 47 53 4 -978 5 51632 58212 5 1127 1556 5 24 32 5 -980 53 47201 59744 53 1537 1625 36 41 49 9 -987 6033 2020131 763444 4306 256 792 1832 60 64 5 -993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index_1.reference b/tests/queries/0_stateless/02995_index_1.reference new file mode 100644 index 00000000000..6c3b1230db6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_1.reference @@ -0,0 +1,15 @@ +12 4 21722 2209341 4 1415 2333 4 61 64 3 +21 1134 11363 58484 1106 1458 1592 136 26 62 32 +22 210 4504 5729 196 291 767 124 47 54 8 +26 196 1327684 5221 195 4140 5661 161 28 49 19 +28 5 2034378 7102 5 325 3255 2 53 60 4 +29 53 45041 45189 45 1580 211 31 55 84 18 +38 424 1600675 4653 424 562 5944 244 60 65 6 +45 17 62743 674873 17 6239 6494 17 65 76 8 +72 1862 1210073 6200 1677 2498 528 859 51 61 11 +79 2 2255228 2255293 2 5495 7057 2 65 65 1 +85 459 1051571 1829638 459 6402 7131 334 32 61 25 +86 10 1748130 1754217 10 4374 7003 10 56 59 4 +91 165 5718 5802 75 282 7113 112 41 63 22 +94 20 1231916 2050003 20 4802 4917 19 53 59 7 +99 2 3665 36667 2 497 697 2 70 71 2 diff --git a/tests/queries/0_stateless/02995_index.sh b/tests/queries/0_stateless/02995_index_1.sh similarity index 98% rename from tests/queries/0_stateless/02995_index.sh rename to tests/queries/0_stateless/02995_index_1.sh index 5125d03904e..a5f1b30c2e8 100755 --- a/tests/queries/0_stateless/02995_index.sh +++ b/tests/queries/0_stateless/02995_index_1.sh @@ -26,7 +26,7 @@ DETACH TABLE test; ATTACH TABLE test; " -for i in {1..1000} +for i in {1..100} do echo " WITH ${i} AS try diff --git a/tests/queries/0_stateless/02995_index_10.reference b/tests/queries/0_stateless/02995_index_10.reference new file mode 100644 index 00000000000..bfa38d03801 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_10.reference @@ -0,0 +1,13 @@ +912 146 1322641 2238040 146 1366 6354 143 59 59 1 +913 82 5495 6870 78 350 565 67 24 43 15 +921 763 1580790 416881 763 6191 7131 509 63 64 2 +925 318 2500952 5025 309 476 6114 182 32 56 21 +931 12 4277 4809 12 238 256 9 63 83 9 +942 954 1331 2228193 952 1121 5047 788 65 70 6 +948 14 1785593 2600431 14 6550 6598 13 34 49 9 +956 5 5755 6023 5 359 411 5 43 48 4 +963 4 3812 3835 4 444 537 4 47 53 4 +978 5 51632 58212 5 1127 1556 5 24 32 5 +980 53 47201 59744 53 1537 1625 36 41 49 9 +987 6033 2020131 763444 4306 256 792 1832 60 64 5 +993 4 1615159 1718339 4 1570 3093 4 62 63 2 diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh new file mode 100755 index 00000000000..d72c7c72705 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_10.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {901..1000} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_2.reference b/tests/queries/0_stateless/02995_index_2.reference new file mode 100644 index 00000000000..098292a289f --- /dev/null +++ b/tests/queries/0_stateless/02995_index_2.reference @@ -0,0 +1,13 @@ +103 1 2446615 2446615 1 2498 2498 1 58 58 1 +106 72 6149 6699 67 527 826 40 61 61 1 +111 43 2273186 5272 43 492 4923 4 54 72 15 +120 3129 45117 6735 2868 1030 1625 561 59 64 6 +138 2 49243 49374 2 1428 1519 2 47 48 2 +143 100 23321 63639 100 1115 1624 88 51 51 1 +145 1 2447976 2447976 1 6173 6173 1 44 44 1 +153 16 13748 16881 16 1506 1636 16 54 68 9 +159 19952 1525336 7131 12957 1280 6163 2668 24 66 39 +171 5 15042 16698 5 1302 1608 5 65 65 1 +179 6264 1362341 2686 6244 2554 7132 2705 61 67 7 +192 1 1639623 1639623 1 3406 3406 1 32 32 1 +193 1 1429969 1429969 1 7131 7131 1 45 45 1 diff --git a/tests/queries/0_stateless/02995_index_2.sh b/tests/queries/0_stateless/02995_index_2.sh new file mode 100755 index 00000000000..e7451c7ee4b --- /dev/null +++ b/tests/queries/0_stateless/02995_index_2.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {101..200} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_3.reference b/tests/queries/0_stateless/02995_index_3.reference new file mode 100644 index 00000000000..9c2fca9fde6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_3.reference @@ -0,0 +1,16 @@ +207 12 23057 32500 12 1491 1726 12 32 46 7 +221 5081 1366870 6649 3432 4527 5226 687 24 69 39 +228 73 12281 17929 71 1328 2034 63 49 71 18 +229 2 1617478 1723791 2 4590 5578 2 41 42 2 +230 3916 1332729 6949 3668 1330 4703 845 62 65 4 +238 25 2624456 2625673 24 2535 6465 25 58 75 14 +241 154 2554929 2616444 154 2626 7131 148 34 57 17 +248 276 15529 30987 274 1040 1222 136 37 79 27 +254 3018 33966 6635 2837 1057 1622 539 24 60 33 +255 20 1581774 1811334 20 6068 6301 18 33 57 10 +256 5 5145 6841 5 367 376 5 58 58 1 +270 2 2195579 2262119 2 7102 7123 2 33 34 2 +281 32 2379460 616215 32 6042 6086 23 53 64 12 +282 7 1292651 24244 7 1607 2455 6 46 55 5 +286 123 1521935 5269 123 3793 3940 81 40 66 22 +291 21 2419080 3567 21 297 4731 21 54 55 2 diff --git a/tests/queries/0_stateless/02995_index_3.sh b/tests/queries/0_stateless/02995_index_3.sh new file mode 100755 index 00000000000..506429e2696 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_3.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {201..300} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_4.reference b/tests/queries/0_stateless/02995_index_4.reference new file mode 100644 index 00000000000..deff7afaed3 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_4.reference @@ -0,0 +1,16 @@ +316 4 5221 5616 4 505 558 4 32 35 3 +319 232 56480 63033 230 1599 313 50 33 64 26 +327 15 51647 51894 14 1292 1585 14 47 57 7 +332 24 23484 54948 24 1609 1726 16 32 49 11 +333 1 14189 14189 1 1550 1550 1 63 63 1 +342 49 2579220 2622432 49 4626 6933 48 34 54 14 +344 1 6486 6486 1 509 509 1 24 24 1 +346 1987 53016 6735 1823 1334 174 294 26 62 32 +358 45 59058 60844 44 6746 722 40 57 84 15 +363 1198 1260033 2568811 1196 5710 5790 82 55 80 26 +384 150 2361175 476024 150 7008 7123 81 38 64 22 +387 277 5200 6553 252 243 521 130 65 65 1 +392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13 +396 8181 1380803 6186 7920 545 798 1743 24 67 39 +398 3 5183 5213 2 291 352 3 53 59 3 +399 62 51494 59203 61 7073 754 42 55 78 18 diff --git a/tests/queries/0_stateless/02995_index_4.sh b/tests/queries/0_stateless/02995_index_4.sh new file mode 100755 index 00000000000..1a0458728f9 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_4.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {301..400} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_5.reference b/tests/queries/0_stateless/02995_index_5.reference new file mode 100644 index 00000000000..c5ab4d2417c --- /dev/null +++ b/tests/queries/0_stateless/02995_index_5.reference @@ -0,0 +1,9 @@ +412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5 +413 2 2036037 2064917 2 3963 4666 2 43 45 2 +431 33 2302331 2348449 33 4425 6516 32 69 69 1 +447 59 25125 33094 59 1176 1817 56 53 58 6 +456 1 53157 53157 1 1556 1556 1 26 26 1 +462 5 5456 6280 5 348 4337 5 28 40 5 +472 1 1443716 1443716 1 6122 6122 1 42 42 1 +491 34 1066102 1183673 34 6606 6822 32 46 67 15 +498 896 2230163 3054 895 537 7131 714 24 59 28 diff --git a/tests/queries/0_stateless/02995_index_5.sh b/tests/queries/0_stateless/02995_index_5.sh new file mode 100755 index 00000000000..60c12a8146d --- /dev/null +++ b/tests/queries/0_stateless/02995_index_5.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {401..500} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_6.reference b/tests/queries/0_stateless/02995_index_6.reference new file mode 100644 index 00000000000..bac19179bb6 --- /dev/null +++ b/tests/queries/0_stateless/02995_index_6.reference @@ -0,0 +1,14 @@ +504 108 12281 25180 108 1318 1784 94 55 66 12 +515 22 1588883 2640809 22 6554 6571 15 46 59 12 +518 1 37743 37743 1 1558 1558 1 72 72 1 +530 1 3033 3033 1 561 561 1 59 59 1 +532 26 5721 6355 25 549 665 14 44 50 7 +546 156 2577874 48517 156 1105 324 133 44 51 8 +554 12 1665194 2640066 12 1817 2951 12 57 57 1 +564 3865 2028049 2083433 3722 1115 985 2203 44 84 41 +566 4432 50605 57509 3217 1191 267 459 26 72 39 +567 8 5221 5893 7 333 558 8 27 35 4 +582 1172 1320619 2019743 1172 5819 7131 757 26 63 30 +584 43100 2500 5594 22561 134 4573 1660 48 84 37 +589 28 6046 6068 19 345 564 27 55 62 8 +595 139 1585165 1683606 138 2231 3598 132 54 84 28 diff --git a/tests/queries/0_stateless/02995_index_6.sh b/tests/queries/0_stateless/02995_index_6.sh new file mode 100755 index 00000000000..4936f73f36b --- /dev/null +++ b/tests/queries/0_stateless/02995_index_6.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {501..600} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_7.reference b/tests/queries/0_stateless/02995_index_7.reference new file mode 100644 index 00000000000..4f1d28ab37e --- /dev/null +++ b/tests/queries/0_stateless/02995_index_7.reference @@ -0,0 +1,9 @@ +615 3 1056081 1116230 3 5794 5796 2 59 62 3 +619 7 1543114 5241 7 2442 3105 7 41 45 3 +634 2722 1221058 4999 2686 2426 7131 1735 54 60 7 +635 237 2119333 4667 237 561 5999 176 49 60 12 +644 5 1774169 2056171 5 5591 6091 4 33 39 3 +647 8 51632 64403 8 1457 1624 8 26 34 5 +651 1325 1620565 6281 1301 528 792 815 62 63 2 +665 13 4598 4789 13 511 558 11 39 46 7 +679 1560 1613200 25940 1552 1569 3118 781 49 84 35 diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh new file mode 100755 index 00000000000..26be310abce --- /dev/null +++ b/tests/queries/0_stateless/02995_index_7.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {601..700} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_8.reference b/tests/queries/0_stateless/02995_index_8.reference new file mode 100644 index 00000000000..62fbfb2af9a --- /dev/null +++ b/tests/queries/0_stateless/02995_index_8.reference @@ -0,0 +1,12 @@ +704 2 14226 15594 2 1086 1116 2 65 71 2 +715 25 1199352 3490 25 5036 5112 23 34 55 13 +716 1253 61989 6735 1050 1203 1625 397 52 65 14 +730 2584 5560 6170 634 2421 627 293 56 69 14 +736 8 1433153 4941 8 339 4594 8 28 36 5 +749 2 1326176 1339862 2 4339 6213 2 49 50 2 +753 1 53157 53157 1 1556 1556 1 26 26 1 +761 63 1443230 6881 63 3154 3204 26 56 73 14 +762 49 1449596 1968154 49 2437 3753 48 54 62 9 +775 35107 5330 769436 2471 447 6607 656 70 81 12 +789 1 1552458 1552458 1 2441 2441 1 62 62 1 +794 158 5585 6585 155 495 929 67 24 50 20 diff --git a/tests/queries/0_stateless/02995_index_8.sh b/tests/queries/0_stateless/02995_index_8.sh new file mode 100755 index 00000000000..8c2620b59fd --- /dev/null +++ b/tests/queries/0_stateless/02995_index_8.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {701..800} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/02995_index_9.reference b/tests/queries/0_stateless/02995_index_9.reference new file mode 100644 index 00000000000..86c7be173bd --- /dev/null +++ b/tests/queries/0_stateless/02995_index_9.reference @@ -0,0 +1,9 @@ +839 9 29223 46530 9 1336 1465 9 52 52 1 +844 5 2377545 2377635 5 5129 6321 5 53 69 5 +846 50 2172273 2589295 50 1582 3053 48 64 68 5 +847 2577 56656 63658 1582 1444 838 474 26 63 33 +861 1333 5570 6909 839 457 489 37 33 70 34 +873 2360 1519811 50487 2248 1310 1784 316 60 68 9 +879 228 6704 6785 79 279 507 121 35 66 24 +889 5130 2070007 39692 5040 1151 6791 2606 44 66 23 +896 4 511246 859452 4 6554 6561 4 67 71 4 diff --git a/tests/queries/0_stateless/02995_index_9.sh b/tests/queries/0_stateless/02995_index_9.sh new file mode 100755 index 00000000000..76160c62aaa --- /dev/null +++ b/tests/queries/0_stateless/02995_index_9.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " + +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11; + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +INSERT INTO test +SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10)) +FROM numbers(100000); + +DETACH TABLE test; +ATTACH TABLE test; +" + +for i in {801..900} +do + echo " +WITH ${i} AS try +SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test +WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String + AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String + AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String + AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String + AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String + AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String +HAVING count() > 0; +" +done | ${CLICKHOUSE_CLIENT} --multiquery + +${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test" diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference deleted file mode 100644 index 65034647775..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference +++ /dev/null @@ -1,92 +0,0 @@ -MergeTree compact + horizontal merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree wide + horizontal merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree compact + vertical merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None -MergeTree wide + vertical merge -test -16667 Tuple(a Dynamic(max_types=3)):Date -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):String -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 UInt64:None -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 UInt64:None -16667 Tuple(a Dynamic(max_types=3)):DateTime -33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) -50000 Tuple(a Dynamic(max_types=3)):UInt64 -66667 Tuple(a Dynamic(max_types=3)):String -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -133333 Tuple(a Dynamic(max_types=3)):None -50000 Tuple(a Dynamic(max_types=3)):UInt64 -100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) -100000 UInt64:None -116667 Tuple(a Dynamic(max_types=3)):String -133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh deleted file mode 100755 index 5d8eac082cf..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - - $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh new file mode 100755 index 00000000000..d4b6d1f4b63 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh new file mode 100755 index 00000000000..39671a297cf --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh new file mode 100755 index 00000000000..d58545c0b13 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference new file mode 100644 index 00000000000..4be740f6050 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference @@ -0,0 +1,21 @@ +16667 Tuple(a Dynamic(max_types=3)):Date +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):String +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 UInt64:None +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 UInt64:None +16667 Tuple(a Dynamic(max_types=3)):DateTime +33333 Tuple(a Dynamic(max_types=3)):Array(UInt8) +50000 Tuple(a Dynamic(max_types=3)):UInt64 +66667 Tuple(a Dynamic(max_types=3)):String +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +133333 Tuple(a Dynamic(max_types=3)):None +50000 Tuple(a Dynamic(max_types=3)):UInt64 +100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64) +100000 UInt64:None +116667 Tuple(a Dynamic(max_types=3)):String +133333 Tuple(a Dynamic(max_types=3)):None diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh new file mode 100755 index 00000000000..39671a297cf --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" + +$CH_CLIENT -q "drop table if exists test;" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" + +$CH_CLIENT -q "system stop merges test" +$CH_CLIENT -q "insert into test select number, number from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" +$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" + +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" +$CH_CLIENT -nm -q "system start merges test; optimize table test final;" +$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" + +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql new file mode 100644 index 00000000000..bb3269da597 --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql @@ -0,0 +1,4 @@ +CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; +INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference new file mode 100644 index 00000000000..a1afeb1ab82 --- /dev/null +++ b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.reference @@ -0,0 +1 @@ +SELECT NOT ((SELECT 1)) diff --git a/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh new file mode 100755 index 00000000000..594d316b621 --- /dev/null +++ b/tests/queries/0_stateless/03208_inconsistent_formatting_of_not_subquery.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_FORMAT --oneline --query "SELECT NOT((SELECT 1))" diff --git a/tests/queries/1_stateful/00162_mmap_compression_none.sql b/tests/queries/1_stateful/00162_mmap_compression_none.sql index d2cbcea8aaa..48d6ada821e 100644 --- a/tests/queries/1_stateful/00162_mmap_compression_none.sql +++ b/tests/queries/1_stateful/00162_mmap_compression_none.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS hits_none; CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -INSERT INTO hits_none SELECT Title FROM test.hits; +INSERT INTO hits_none SELECT Title FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16; SET min_bytes_to_use_mmap_io = 1; SELECT sum(length(Title)) FROM hits_none;