Merge branch 'master' into read-cgroup-memory-usage-async-metrics

This commit is contained in:
Antonio Andelic 2024-07-24 09:55:25 +02:00
commit 7e026aec8b
95 changed files with 1717 additions and 1029 deletions

View File

@ -272,7 +272,4 @@ jobs:
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
echo "::group::Workflow results"
python3 -m json.tool "$WORKFLOW_RESULT_FILE"
echo "::endgroup::"
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -138,7 +138,4 @@ jobs:
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
echo "::group::Workflow results"
python3 -m json.tool "$WORKFLOW_RESULT_FILE"
echo "::endgroup::"
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -111,7 +111,4 @@ jobs:
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
echo "::group::Workflow results"
python3 -m json.tool "$WORKFLOW_RESULT_FILE"
echo "::endgroup::"
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -57,7 +57,4 @@ jobs:
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
echo "::group::Workflow results"
python3 -m json.tool "$WORKFLOW_RESULT_FILE"
echo "::endgroup::"
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -171,9 +171,6 @@ jobs:
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
echo "::group::Workflow results"
python3 -m json.tool "$WORKFLOW_RESULT_FILE"
echo "::endgroup::"
python3 ./tests/ci/ci_buddy.py --check-wf-status
################################# Stage Final #################################

View File

@ -492,7 +492,5 @@ jobs:
cat >> "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
echo "::group::Workflow results"
python3 -m json.tool "$WORKFLOW_RESULT_FILE"
echo "::endgroup::"
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -2,11 +2,11 @@
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54488)
SET(VERSION_REVISION 54489)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9)
SET(VERSION_DESCRIBE v24.7.1.1-testing)
SET(VERSION_STRING 24.7.1.1)
SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
SET(VERSION_DESCRIBE v24.8.1.1-testing)
SET(VERSION_STRING 24.8.1.1)
# end of autochange

View File

@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
add_library(_datasketches INTERFACE)
target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")
add_library(ch_contrib::datasketches ALIAS _datasketches)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2
Subproject commit fe854449e24bedfa26e38465b84374312dbd587f

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update --yes \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -191,8 +191,8 @@ else
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
else
@ -200,7 +200,7 @@ else
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
fi
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
fi
clickhouse-client --query "SHOW TABLES FROM test"

View File

@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"

View File

@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
- `count_min`
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:

View File

@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx)
endif()
if (TARGET ch_contrib::datasketches)
target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
endif ()
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)

View File

@ -11,6 +11,7 @@
#include <base/cgroupsv2.h>
#include <base/getMemoryAmount.h>
#include <base/sleep.h>
#include <fmt/ranges.h>
#include <cstdint>

View File

@ -57,7 +57,8 @@ static struct InitFiu
PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \
PAUSEABLE(dummy_pausable_failpoint) \
ONCE(execute_query_calling_empty_set_result_func_on_exception) \
ONCE(receive_timeout_on_table_status_response)
ONCE(receive_timeout_on_table_status_response) \
REGULAR(keepermap_fail_drop_data) \
namespace FailPoints

View File

@ -10,6 +10,8 @@
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <fmt/ranges.h>
#include <filesystem>
#include <memory>
#include <optional>
@ -33,41 +35,41 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
struct ICgroupsReader
{
virtual ~ICgroupsReader() = default;
virtual uint64_t readMemoryUsage() = 0;
};
#if defined(OS_LINUX)
namespace
{
#if defined(OS_LINUX)
using Metrics = std::map<std::string, uint64_t>;
/// Format is
/// kernel 5
/// rss 15
/// [...]
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf)
{
Metrics metrics;
while (!buf.eof())
{
std::string current_key;
readStringUntilWhitespace(current_key, buf);
if (current_key != key)
{
std::string dummy;
readStringUntilNewlineInto(dummy, buf);
buf.ignore();
continue;
}
assertChar(' ', buf);
uint64_t value = 0;
readIntText(value, buf);
return value;
}
assertChar('\n', buf);
auto [_, inserted] = metrics.emplace(std::move(current_key), value);
chassert(inserted, "Duplicate keys in stat file");
}
return metrics;
}
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
{
const auto all_metrics = readAllMetricsFromStatFile(buf);
if (const auto it = all_metrics.find(key); it != all_metrics.end())
return it->second;
LOG_ERROR(getLogger("CgroupsReader"), "Cannot find '{}' in '{}'", key, buf.getFileName());
return 0;
}
@ -83,6 +85,13 @@ struct CgroupsV1Reader : ICgroupsReader
return readMetricFromStatFile(buf, "rss");
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(buf));
}
private:
std::mutex mutex;
ReadBufferFromFile buf TSA_GUARDED_BY(mutex);
@ -99,6 +108,13 @@ struct CgroupsV2Reader : ICgroupsReader
return readMetricFromStatFile(stat_buf, "anon");
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
stat_buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(stat_buf));
}
private:
std::mutex mutex;
ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex);
@ -147,34 +163,23 @@ std::optional<std::string> getCgroupsV1Path()
return {default_cgroups_mount / "memory"};
}
enum class CgroupsVersion : uint8_t
{
V1,
V2
};
std::pair<std::string, CgroupsVersion> getCgroupsPath()
std::pair<std::string, ICgroupsReader::CgroupsVersion> getCgroupsPath()
{
auto v2_path = getCgroupsV2Path();
if (v2_path.has_value())
return {*v2_path, CgroupsVersion::V2};
return {*v2_path, ICgroupsReader::CgroupsVersion::V2};
auto v1_path = getCgroupsV1Path();
if (v1_path.has_value())
return {*v1_path, CgroupsVersion::V1};
return {*v1_path, ICgroupsReader::CgroupsVersion::V1};
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file");
}
std::shared_ptr<ICgroupsReader> createCgroupsReader()
{
const auto [cgroup_path, version] = getCgroupsPath();
LOG_INFO(
getLogger("CgroupsReader"),
"Will create cgroup reader from '{}' (cgroups version: {})",
cgroup_path,
(version == CgroupsVersion::V1) ? "v1" : "v2");
}
std::shared_ptr<ICgroupsReader> ICgroupsReader::createCgroupsReader(ICgroupsReader::CgroupsVersion version, const std::filesystem::path & cgroup_path)
{
if (version == CgroupsVersion::V2)
return std::make_shared<CgroupsV2Reader>(cgroup_path);
else
@ -182,10 +187,12 @@ std::shared_ptr<ICgroupsReader> createCgroupsReader()
chassert(version == CgroupsVersion::V1);
return std::make_shared<CgroupsV1Reader>(cgroup_path);
}
}
#endif
namespace
{
std::string_view sourceToString(MemoryWorker::MemoryUsageSource source)
{
switch (source)
@ -212,7 +219,14 @@ MemoryWorker::MemoryWorker(uint64_t period_ms_)
{
static constexpr uint64_t cgroups_memory_usage_tick_ms{50};
cgroups_reader = createCgroupsReader();
const auto [cgroup_path, version] = getCgroupsPath();
LOG_INFO(
getLogger("CgroupsReader"),
"Will create cgroup reader from '{}' (cgroups version: {})",
cgroup_path,
(version == ICgroupsReader::CgroupsVersion::V1) ? "v1" : "v2");
cgroups_reader = ICgroupsReader::createCgroupsReader(version, cgroup_path);
source = MemoryUsageSource::Cgroups;
if (period_ms == 0)
period_ms = cgroups_memory_usage_tick_ms;
@ -284,7 +298,7 @@ uint64_t MemoryWorker::getMemoryUsage()
void MemoryWorker::backgroundThread()
{
std::chrono::milliseconds chrono_period_ms{period_ms};
[[maybe_unused]] bool first_run = true;
bool first_run = true;
std::unique_lock lock(mutex);
while (true)
{

View File

@ -7,14 +7,31 @@
namespace DB
{
struct ICgroupsReader;
struct ICgroupsReader
{
enum class CgroupsVersion : uint8_t
{
V1,
V2
};
/// Correct MemoryTracker based on stats.resident read from jemalloc.
/// This requires jemalloc built with --enable-stats which we use.
/// The worker spawns a background thread which moves the jemalloc epoch (updates internal stats),
/// and fetches the current stats.resident whose value is sent to global MemoryTracker.
/// Additionally, if the current memory usage is higher than global hard limit,
/// jemalloc's dirty pages are forcefully purged.
#if defined(OS_LINUX)
static std::shared_ptr<ICgroupsReader>
createCgroupsReader(ICgroupsReader::CgroupsVersion version, const std::filesystem::path & cgroup_path);
#endif
virtual ~ICgroupsReader() = default;
virtual uint64_t readMemoryUsage() = 0;
virtual std::string dumpAllStats() = 0;
};
/// Correct MemoryTracker based on external information (e.g. Cgroups or stats.resident from jemalloc)
/// The worker spawns a background thread which periodically reads current resident memory from the source,
/// whose value is sent to global MemoryTracker.
/// It can do additional things like purging jemalloc dirty pages if the current memory usage is higher than global hard limit.
class MemoryWorker
{
public:

View File

@ -0,0 +1,178 @@
#if defined(OS_LINUX)
#include <gtest/gtest.h>
#include <cstdint>
#include <filesystem>
#include <IO/WriteBufferFromFile.h>
#include <Common/MemoryWorker.h>
#include <Common/filesystemHelpers.h>
using namespace DB;
const std::string SAMPLE_FILE[2] = {
R"(cache 4673703936
rss 2232029184
rss_huge 0
shmem 0
mapped_file 344678400
dirty 4730880
writeback 135168
swap 0
pgpgin 2038569918
pgpgout 2036883790
pgfault 2055373287
pgmajfault 0
inactive_anon 2156335104
active_anon 0
inactive_file 2841305088
active_file 1653915648
unevictable 256008192
hierarchical_memory_limit 8589934592
hierarchical_memsw_limit 8589934592
total_cache 4673703936
total_rss 2232029184
total_rss_huge 0
total_shmem 0
total_mapped_file 344678400
total_dirty 4730880
total_writeback 135168
total_swap 0
total_pgpgin 2038569918
total_pgpgout 2036883790
total_pgfault 2055373287
total_pgmajfault 0
total_inactive_anon 2156335104
total_active_anon 0
total_inactive_file 2841305088
total_active_file 1653915648
total_unevictable 256008192
)",
R"(anon 10429399040
file 17410793472
kernel 1537789952
kernel_stack 3833856
pagetables 65441792
sec_pagetables 0
percpu 15232
sock 0
vmalloc 0
shmem 0
zswap 0
zswapped 0
file_mapped 344010752
file_dirty 2060857344
file_writeback 0
swapcached 0
anon_thp 0
file_thp 0
shmem_thp 0
inactive_anon 0
active_anon 10429370368
inactive_file 8693084160
active_file 8717561856
unevictable 0
slab_reclaimable 1460982504
slab_unreclaimable 5152864
slab 1466135368
workingset_refault_anon 0
workingset_refault_file 0
workingset_activate_anon 0
workingset_activate_file 0
workingset_restore_anon 0
workingset_restore_file 0
workingset_nodereclaim 0
pgscan 0
pgsteal 0
pgscan_kswapd 0
pgscan_direct 0
pgscan_khugepaged 0
pgsteal_kswapd 0
pgsteal_direct 0
pgsteal_khugepaged 0
pgfault 43026352
pgmajfault 36762
pgrefill 0
pgactivate 0
pgdeactivate 0
pglazyfree 259
pglazyfreed 0
zswpin 0
zswpout 0
thp_fault_alloc 0
thp_collapse_alloc 0
)"};
const std::string EXPECTED[2]
= {"{\"active_anon\": 0, \"active_file\": 1653915648, \"cache\": 4673703936, \"dirty\": 4730880, \"hierarchical_memory_limit\": "
"8589934592, \"hierarchical_memsw_limit\": 8589934592, \"inactive_anon\": 2156335104, \"inactive_file\": 2841305088, "
"\"mapped_file\": 344678400, \"pgfault\": 2055373287, \"pgmajfault\": 0, \"pgpgin\": 2038569918, \"pgpgout\": 2036883790, \"rss\": "
"2232029184, \"rss_huge\": 0, \"shmem\": 0, \"swap\": 0, \"total_active_anon\": 0, \"total_active_file\": 1653915648, "
"\"total_cache\": 4673703936, \"total_dirty\": 4730880, \"total_inactive_anon\": 2156335104, \"total_inactive_file\": 2841305088, "
"\"total_mapped_file\": 344678400, \"total_pgfault\": 2055373287, \"total_pgmajfault\": 0, \"total_pgpgin\": 2038569918, "
"\"total_pgpgout\": 2036883790, \"total_rss\": 2232029184, \"total_rss_huge\": 0, \"total_shmem\": 0, \"total_swap\": 0, "
"\"total_unevictable\": 256008192, \"total_writeback\": 135168, \"unevictable\": 256008192, \"writeback\": 135168}",
"{\"active_anon\": 10429370368, \"active_file\": 8717561856, \"anon\": 10429399040, \"anon_thp\": 0, \"file\": 17410793472, "
"\"file_dirty\": 2060857344, \"file_mapped\": 344010752, \"file_thp\": 0, \"file_writeback\": 0, \"inactive_anon\": 0, "
"\"inactive_file\": 8693084160, \"kernel\": 1537789952, \"kernel_stack\": 3833856, \"pagetables\": 65441792, \"percpu\": 15232, "
"\"pgactivate\": 0, \"pgdeactivate\": 0, \"pgfault\": 43026352, \"pglazyfree\": 259, \"pglazyfreed\": 0, \"pgmajfault\": 36762, "
"\"pgrefill\": 0, \"pgscan\": 0, \"pgscan_direct\": 0, \"pgscan_khugepaged\": 0, \"pgscan_kswapd\": 0, \"pgsteal\": 0, "
"\"pgsteal_direct\": 0, \"pgsteal_khugepaged\": 0, \"pgsteal_kswapd\": 0, \"sec_pagetables\": 0, \"shmem\": 0, \"shmem_thp\": 0, "
"\"slab\": 1466135368, \"slab_reclaimable\": 1460982504, \"slab_unreclaimable\": 5152864, \"sock\": 0, \"swapcached\": 0, "
"\"thp_collapse_alloc\": 0, \"thp_fault_alloc\": 0, \"unevictable\": 0, \"vmalloc\": 0, \"workingset_activate_anon\": 0, "
"\"workingset_activate_file\": 0, \"workingset_nodereclaim\": 0, \"workingset_refault_anon\": 0, \"workingset_refault_file\": 0, "
"\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"};
class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<ICgroupsReader::CgroupsVersion>
{
void SetUp() override
{
const uint8_t version = static_cast<uint8_t>(GetParam());
tmp_dir = fmt::format("./test_cgroups_{}", magic_enum::enum_name(GetParam()));
fs::create_directories(tmp_dir);
auto stat_file = WriteBufferFromFile(tmp_dir + "/memory.stat");
stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size());
stat_file.sync();
if (GetParam() == ICgroupsReader::CgroupsVersion::V2)
{
auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current");
current_file.write("29645422592", 11);
current_file.sync();
}
}
protected:
std::string tmp_dir;
};
TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest)
{
const auto version = GetParam();
auto reader = ICgroupsReader::createCgroupsReader(version, tmp_dir);
ASSERT_EQ(
reader->readMemoryUsage(),
version == ICgroupsReader::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184
: /* anon from memory.stat */ 10429399040);
}
TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest)
{
const auto version = GetParam();
auto reader = ICgroupsReader::createCgroupsReader(version, tmp_dir);
ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast<uint8_t>(version)]);
}
INSTANTIATE_TEST_SUITE_P(
CgroupsMemoryUsageObserverTests,
CgroupsMemoryUsageObserverFixture,
::testing::Values(ICgroupsReader::CgroupsVersion::V1, ICgroupsReader::CgroupsVersion::V2));
#endif

View File

@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get<String>(), type.getName());
e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
throw;

View File

@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P(
DecimalField(DateTime64(123 * Day * 1'000'000), 6)
}
})
);
);
INSTANTIATE_TEST_SUITE_P(
DateTimeToDateTime64,
@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P(
},
})
);
INSTANTIATE_TEST_SUITE_P(
StringToNumber,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"String",
Field("1"),
"Int8",
Field(1)
},
{
"String",
Field("256"),
"Int8",
Field()
},
{
"String",
Field("not a number"),
"Int8",
{}
},
{
"String",
Field("1.1"),
"Int8",
{} /// we can not convert '1.1' to Int8
},
{
"String",
Field("1.1"),
"Float64",
Field(1.1)
},
})
);
INSTANTIATE_TEST_SUITE_P(
NumberToString,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"Int8",
Field(1),
"String",
Field("1")
},
{
"Int8",
Field(-1),
"String",
Field("-1")
},
{
"Float64",
Field(1.1),
"String",
Field("1.1")
},
})
);
INSTANTIATE_TEST_SUITE_P(
StringToDate,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"String",
Field("2024-07-12"),
"Date",
Field(static_cast<UInt16>(19916))
},
{
"String",
Field("not a date"),
"Date",
{}
},
})
);

View File

@ -329,19 +329,23 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
const auto * literal = arguments->children[0]->as<ASTLiteral>();
const auto * function = arguments->children[0]->as<ASTFunction>();
const auto * subquery = arguments->children[0]->as<ASTSubquery>();
bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple;
// do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
/// Do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
bool literal_need_parens = literal && !is_tuple;
// negate always requires parentheses, otherwise -(-1) will be printed as --1
bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate"));
/// Negate always requires parentheses, otherwise -(-1) will be printed as --1
/// Also extra parentheses are needed for subqueries, because NOT can be parsed as a function:
/// not(SELECT 1) cannot be parsed, while not((SELECT 1)) can.
bool inside_parens = (name == "negate" && (literal_need_parens || (function && function->name == "negate")))
|| (subquery && name == "not");
/// We DO need parentheses around a single literal
/// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since
/// this is equal to SELECT NOT (0 + NOT 0)
bool outside_parens = frame.need_parens && !inside_parens;
// do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
/// Do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
if (inside_parens)
nested_need_parens.need_parens = false;

View File

@ -9,7 +9,7 @@ namespace DB
{
/** The SELECT subquery is in parenthesis.
/** The SELECT subquery, in parentheses.
*/
class ParserSubquery : public IParserBase
{

View File

@ -11,15 +11,12 @@
namespace DB
{
bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_describe(Keyword::DESCRIBE);
ParserKeyword s_desc(Keyword::DESC);
ParserKeyword s_table(Keyword::TABLE);
ParserKeyword s_settings(Keyword::SETTINGS);
ParserToken s_dot(TokenType::Dot);
ParserIdentifier name_p;
ParserSetQuery parser_settings(true);
ASTPtr database;
@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
return true;
}
}

View File

@ -304,7 +304,7 @@ void RefreshTask::refreshTask()
{
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
auto text = message.text;
message.text = fmt::format("Refresh failed: {}", message.text);
message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text);
LOG_ERROR(log, message);
exception = text;
}

View File

@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
{
auto stats = part->loadStatistics();
/// TODO: We only have one stats file for every part.
result.addRows(part->rows_count);
for (const auto & stat : stats)
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
result.merge(part->info.getPartNameV1(), stat);
}
catch (...)
{
@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
if (!partition_pruner.canBePruned(*part))
{
auto stats = part->loadStatistics();
result.addRows(part->rows_count);
for (const auto & stat : stats)
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
result.merge(part->info.getPartNameV1(), stat);
}
}
catch (...)
@ -1144,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false);
if (!filter_dag)
return {};

View File

@ -44,10 +44,12 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(
const String & index_name_,
const Block & index_sample_block_,
size_t max_rows_,
MutableColumns && mutable_columns_)
MutableColumns && mutable_columns_,
std::vector<Range> && set_hyperrectangle_)
: index_name(index_name_)
, max_rows(max_rows_)
, block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_)))
, set_hyperrectangle(std::move(set_hyperrectangle_))
{
}
@ -106,6 +108,10 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
settings.position_independent_encoding = false;
set_hyperrectangle.clear();
Field min_val;
Field max_val;
for (size_t i = 0; i < num_columns; ++i)
{
auto & elem = block.getByPosition(i);
@ -116,6 +122,13 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr);
serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr);
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(elem.column.get()))
column_nullable->getExtremesNullLast(min_val, max_val);
else
elem.column->getExtremes(min_val, max_val);
set_hyperrectangle.emplace_back(min_val, true, max_val, true);
}
}
@ -182,10 +195,29 @@ void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size
if (has_new_data)
{
FieldRef field_min;
FieldRef field_max;
for (size_t i = 0; i < columns.size(); ++i)
{
auto filtered_column = block.getByName(index_columns[i]).column->filter(filter, block.rows());
columns[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size());
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(filtered_column.get()))
column_nullable->getExtremesNullLast(field_min, field_max);
else
filtered_column->getExtremes(field_min, field_max);
if (set_hyperrectangle.size() <= i)
{
set_hyperrectangle.emplace_back(field_min, true, field_max, true);
}
else
{
set_hyperrectangle[i].left
= applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].left, field_min) ? set_hyperrectangle[i].left : field_min;
set_hyperrectangle[i].right
= applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].right, field_max) ? field_max : set_hyperrectangle[i].right;
}
}
}
@ -221,7 +253,7 @@ bool MergeTreeIndexAggregatorSet::buildFilter(
MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
{
auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index_name, index_sample_block, max_rows, std::move(columns));
auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index_name, index_sample_block, max_rows, std::move(columns), std::move(set_hyperrectangle));
switch (data.type)
{
@ -240,17 +272,22 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
return granule;
}
KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context)
{
return KeyCondition{filter_actions_dag, context, index.column_names, index.expression};
}
MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
const String & index_name_,
const Block & index_sample_block,
size_t max_rows_,
const ActionsDAGPtr & filter_dag,
ContextPtr context)
: index_name(index_name_)
ContextPtr context,
const IndexDescription & index_description)
: index_name(index_description.name)
, max_rows(max_rows_)
, index_data_types(index_description.data_types)
, condition(buildCondition(index_description, filter_dag, context))
{
for (const auto & name : index_sample_block.getNames())
for (const auto & name : index_description.sample_block.getNames())
if (!key_columns.contains(name))
key_columns.insert(name);
@ -293,6 +330,9 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
if (size == 0 || (max_rows != 0 && size > max_rows))
return true;
if (!condition.checkInHyperrectangle(granule.set_hyperrectangle, index_data_types).can_be_true)
return false;
Block result = granule.block;
actions->execute(result);
@ -546,7 +586,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge
MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition(
const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
{
return std::make_shared<MergeTreeIndexConditionSet>(index.name, index.sample_block, max_rows, filter_actions_dag, context);
return std::make_shared<MergeTreeIndexConditionSet>(max_rows, filter_actions_dag, context, index);
}
MergeTreeIndexPtr setIndexCreator(const IndexDescription & index)

View File

@ -22,7 +22,8 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
const String & index_name_,
const Block & index_sample_block_,
size_t max_rows_,
MutableColumns && columns_);
MutableColumns && columns_,
std::vector<Range> && set_hyperrectangle_);
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
@ -36,6 +37,7 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
const size_t max_rows;
Block block;
std::vector<Range> set_hyperrectangle;
};
@ -73,6 +75,7 @@ private:
ClearableSetVariants data;
Sizes key_sizes;
MutableColumns columns;
std::vector<Range> set_hyperrectangle;
};
@ -80,11 +83,10 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition
{
public:
MergeTreeIndexConditionSet(
const String & index_name_,
const Block & index_sample_block,
size_t max_rows_,
const ActionsDAGPtr & filter_dag,
ContextPtr context);
ContextPtr context,
const IndexDescription & index_description);
bool alwaysUnknownOrTrue() const override;
@ -119,6 +121,9 @@ private:
std::unordered_set<String> key_columns;
ExpressionActionsPtr actions;
String actions_output_column_name;
DataTypes index_data_types;
KeyCondition condition;
};

View File

@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
part_statistics[part_name] = stats;
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
return default_normal_cond_factor * rows;
@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(
return result * rows / part_rows;
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const
{
return rows - estimateLess(val, rows);
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
{
if (val < - threshold || val > threshold)
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val)
return default_unknown_cond_factor * rows;
else if (float_val.value() < - threshold || float_val.value() > threshold)
return default_normal_cond_factor * rows;
else
return default_good_cond_factor * rows;
@ -87,7 +90,7 @@ static std::pair<String, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNod
return result;
}
std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
std::pair<String, Field> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
{
if (!node.isFunction())
return {};
@ -123,48 +126,35 @@ std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const
DataTypePtr output_type;
if (!constant_node->tryGetConstant(output_value, output_type))
return {};
const auto type = output_value.getType();
Float64 value;
if (type == Field::Types::Int64)
value = output_value.get<Int64>();
else if (type == Field::Types::UInt64)
value = output_value.get<UInt64>();
else if (type == Field::Types::Float64)
value = output_value.get<Float64>();
else
return {};
return std::make_pair(function_name, value);
return std::make_pair(function_name, output_value);
}
Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const
{
auto result = tryToExtractSingleColumn(node);
if (result.second != 1)
{
return default_unknown_cond_factor;
}
return default_unknown_cond_factor * total_rows;
String col = result.first;
auto it = column_estimators.find(col);
/// If there the estimator of the column is not found or there are no data at all,
/// we use dummy estimation.
bool dummy = total_rows == 0;
bool dummy = false;
ColumnSelectivityEstimator estimator;
if (it != column_estimators.end())
{
estimator = it->second;
}
else
{
dummy = true;
}
auto [op, val] = extractBinaryOp(node, col);
if (op == "equals")
{
if (dummy)
{
if (val < - threshold || val > threshold)
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val || (float_val < - threshold || float_val > threshold))
return default_normal_cond_factor * total_rows;
else
return default_good_cond_factor * total_rows;
@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
return default_unknown_cond_factor * total_rows;
}
void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat)
void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat)
{
if (!part_names.contains(part_name))
{
total_rows += part_rows;
part_names.insert(part_name);
}
if (column_stat != nullptr)
column_estimators[column_stat->columnName()].merge(part_name, column_stat);
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Storages/Statistics/Statistics.h>
#include <Core/Field.h>
namespace DB
{
@ -10,6 +11,14 @@ class RPNBuilderTreeNode;
/// It estimates the selectivity of a condition.
class ConditionSelectivityEstimator
{
public:
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
/// Right now we only support simple condition like col = val / col < val
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
void merge(String part_name, ColumnStatisticsPtr column_stat);
void addRows(UInt64 part_rows) { total_rows += part_rows; }
private:
friend class ColumnStatistics;
struct ColumnSelectivityEstimator
@ -20,13 +29,15 @@ private:
void merge(String part_name, ColumnStatisticsPtr stats);
Float64 estimateLess(Float64 val, Float64 rows) const;
Float64 estimateLess(const Field & val, Float64 rows) const;
Float64 estimateGreater(Float64 val, Float64 rows) const;
Float64 estimateGreater(const Field & val, Float64 rows) const;
Float64 estimateEqual(Float64 val, Float64 rows) const;
Float64 estimateEqual(const Field & val, Float64 rows) const;
};
std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
static constexpr auto default_good_cond_factor = 0.1;
static constexpr auto default_normal_cond_factor = 0.5;
static constexpr auto default_unknown_cond_factor = 1.0;
@ -35,16 +46,7 @@ private:
static constexpr auto threshold = 2;
UInt64 total_rows = 0;
std::set<String> part_names;
std::map<String, ColumnSelectivityEstimator> column_estimators;
std::pair<String, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
public:
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
/// Right now we only support simple condition like col = val / col < val
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat);
};
}

View File

@ -1,15 +1,18 @@
#include <Storages/Statistics/Statistics.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Storages/Statistics/StatisticsUniq.h>
#include <Storages/StatisticsDescription.h>
#include <Storages/ColumnsDescription.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include "config.h" /// USE_DATASKETCHES
namespace DB
{
@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16
V0 = 0,
};
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
{
switch (field.getType())
{
case Field::Types::Int64:
return field.get<Int64>();
case Field::Types::UInt64:
return field.get<UInt64>();
case Field::Types::Float64:
return field.get<Float64>();
case Field::Types::Int128:
return field.get<Int128>();
case Field::Types::UInt128:
return field.get<UInt128>();
case Field::Types::Int256:
return field.get<Int256>();
case Field::Types::UInt256:
return field.get<UInt256>();
default:
return {};
}
}
std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
{
if (field.getType() == Field::Types::String)
return field.get<String>();
return {};
}
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
: stat(stat_)
{
@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateEqual(Float64 /*val*/) const
Float64 IStatistics::estimateEqual(const Field & /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateLess(Float64 /*val*/) const
Float64 IStatistics::estimateLess(const Field & /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
}
@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
/// object that supports it natively.
Float64 ColumnStatistics::estimateLess(Float64 val) const
Float64 ColumnStatistics::estimateLess(const Field & val) const
{
if (stats.contains(StatisticsType::TDigest))
return stats.at(StatisticsType::TDigest)->estimateLess(val);
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
}
Float64 ColumnStatistics::estimateGreater(Float64 val) const
Float64 ColumnStatistics::estimateGreater(const Field & val) const
{
return rows - estimateLess(val);
}
Float64 ColumnStatistics::estimateEqual(Float64 val) const
Float64 ColumnStatistics::estimateEqual(const Field & val) const
{
if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
{
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
return stats.at(StatisticsType::TDigest)->estimateEqual(val);
}
if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
#if USE_DATASKETCHES
if (stats.contains(StatisticsType::CountMinSketch))
return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
#endif
if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
else
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
{
registerValidator(StatisticsType::TDigest, TDigestValidator);
registerCreator(StatisticsType::TDigest, TDigestCreator);
registerValidator(StatisticsType::TDigest, tdigestValidator);
registerCreator(StatisticsType::TDigest, tdigestCreator);
registerValidator(StatisticsType::Uniq, UniqValidator);
registerCreator(StatisticsType::Uniq, UniqCreator);
registerValidator(StatisticsType::Uniq, uniqValidator);
registerCreator(StatisticsType::Uniq, uniqCreator);
#if USE_DATASKETCHES
registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
#endif
}
MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
{
auto it = creators.find(type);
if (it == creators.end())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
auto stat_ptr = (it->second)(desc, stats.data_type);
column_stat->stats[type] = stat_ptr;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Block.h>
#include <Core/Field.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Storages/StatisticsDescription.h>
@ -13,6 +14,14 @@ namespace DB
constexpr auto STATS_FILE_PREFIX = "statistics_";
constexpr auto STATS_FILE_SUFFIX = ".stats";
struct StatisticsUtils
{
/// Returns std::nullopt if input Field cannot be converted to a concrete value
static std::optional<Float64> tryConvertToFloat64(const Field & field);
static std::optional<String> tryConvertToString(const Field & field);
};
/// Statistics describe properties of the values in the column,
/// e.g. how many unique values exist,
/// what are the N most frequent values,
@ -34,8 +43,8 @@ public:
/// Per-value estimations.
/// Throws if the statistics object is not able to do a meaningful estimation.
virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column
virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column
virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column
protected:
SingleStatisticsDescription stat;
@ -58,9 +67,9 @@ public:
void update(const ColumnPtr & column);
Float64 estimateLess(Float64 val) const;
Float64 estimateGreater(Float64 val) const;
Float64 estimateEqual(Float64 val) const;
Float64 estimateLess(const Field & val) const;
Float64 estimateGreater(const Field & val) const;
Float64 estimateEqual(const Field & val) const;
private:
friend class MergeTreeStatisticsFactory;

View File

@ -0,0 +1,102 @@
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/convertFieldToType.h>
#if USE_DATASKETCHES
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_STATISTICS;
}
/// Constants chosen based on rolling dices.
/// The values provides:
/// 1. an error tolerance of 0.1% (ε = 0.001)
/// 2. a confidence level of 99.9% (δ = 0.001).
/// And sketch the size is 152kb.
static constexpr auto num_hashes = 7uz;
static constexpr auto num_buckets = 2718uz;
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
: IStatistics(stat_)
, sketch(num_hashes, num_buckets)
, data_type(data_type_)
{
}
Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
{
/// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc.
/// Return null if val larger than the range of data_type
///
/// For example: if data_type is Int32:
/// 1. For 1.0, 1, '1', return Field(1)
/// 2. For 1.1, max_value_int64, return null
Field val_converted = convertFieldToType(val, *data_type);
if (val_converted.isNull())
return 0;
if (data_type->isValueRepresentedByNumber())
return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory());
if (isStringOrFixedString(data_type))
return sketch.get_estimate(val.get<String>());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName());
}
void StatisticsCountMinSketch::update(const ColumnPtr & column)
{
for (size_t row = 0; row < column->size(); ++row)
{
if (column->isNullAt(row))
continue;
auto data = column->getDataAt(row);
sketch.update(data.data, data.size, 1);
}
}
void StatisticsCountMinSketch::serialize(WriteBuffer & buf)
{
Sketch::vector_bytes bytes = sketch.serialize();
writeIntBinary(static_cast<UInt64>(bytes.size()), buf);
buf.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
}
void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
{
UInt64 size;
readIntBinary(size, buf);
Sketch::vector_bytes bytes;
bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking
buf.readStrict(reinterpret_cast<char *>(bytes.data()), size);
sketch = Sketch::deserialize(bytes.data(), size);
}
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
}
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
}
}
#endif

View File

@ -0,0 +1,39 @@
#pragma once
#include <Storages/Statistics/Statistics.h>
#include "config.h"
#if USE_DATASKETCHES
#include <count_min.hpp>
namespace DB
{
class StatisticsCountMinSketch : public IStatistics
{
public:
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
Float64 estimateEqual(const Field & val) const override;
void update(const ColumnPtr & column) override;
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
private:
using Sketch = datasketches::count_min_sketch<UInt64>;
Sketch sketch;
DataTypePtr data_type;
};
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}
#endif

View File

@ -1,11 +1,13 @@
#include <Storages/Statistics/StatisticsTDigest.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_STATISTICS;
extern const int ILLEGAL_STATISTICS;
extern const int LOGICAL_ERROR;
}
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
void StatisticsTDigest::update(const ColumnPtr & column)
{
size_t rows = column->size();
for (size_t row = 0; row < rows; ++row)
{
/// TODO: support more types.
Float64 value = column->getFloat64(row);
t_digest.add(value, 1);
Field field;
column->get(row, field);
if (field.isNull())
continue;
if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
t_digest.add(*field_as_float, 1);
}
}
@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
t_digest.deserialize(buf);
}
Float64 StatisticsTDigest::estimateLess(Float64 val) const
Float64 StatisticsTDigest::estimateLess(const Field & val) const
{
return t_digest.getCountLessThan(val);
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountLessThan(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
Float64 StatisticsTDigest::estimateEqual(Float64 val) const
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
{
return t_digest.getCountEqual(val);
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountEqual(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
}
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
{
return std::make_shared<StatisticsTDigest>(stat);
}

View File

@ -16,14 +16,14 @@ public:
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
Float64 estimateLess(Float64 val) const override;
Float64 estimateEqual(Float64 val) const override;
Float64 estimateLess(const Field & val) const override;
Float64 estimateEqual(const Field & val) const override;
private:
QuantileTDigest<Float64> t_digest;
};
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}

View File

@ -1,6 +1,7 @@
#include <Storages/Statistics/StatisticsUniq.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const
return column->getUInt(0);
}
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
}
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<StatisticsUniq>(stat, data_type);
}

View File

@ -27,7 +27,7 @@ private:
};
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
}

View File

@ -1,6 +1,10 @@
#include <gtest/gtest.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Interpreters/convertFieldToType.h>
#include <DataTypes/DataTypeFactory.h>
using namespace DB;
TEST(Statistics, TDigestLessThan)
{
@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan)
std::reverse(data.begin(), data.end());
test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
}

View File

@ -1,19 +1,14 @@
#include <Storages/StatisticsDescription.h>
#include <base/defines.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTStatisticsDeclaration.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ParserCreateQuery.h>
#include <Poco/Logger.h>
#include <Storages/extractKeyExpressionList.h>
#include <Storages/ColumnsDescription.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -54,7 +49,9 @@ static StatisticsType stringToStatisticsType(String type)
return StatisticsType::TDigest;
if (type == "uniq")
return StatisticsType::Uniq;
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
if (type == "count_min")
return StatisticsType::CountMinSketch;
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
}
String SingleStatisticsDescription::getTypeName() const
@ -65,8 +62,10 @@ String SingleStatisticsDescription::getTypeName() const
return "TDigest";
case StatisticsType::Uniq:
return "Uniq";
case StatisticsType::CountMinSketch:
return "count_min";
default:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are `tdigest` and `uniq`.", type);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type);
}
}
@ -99,10 +98,9 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
chassert(merging_column_type);
if (column_name.empty())
{
column_name = merging_column_name;
data_type = merging_column_type;
}
data_type = merging_column_type;
for (const auto & [stats_type, stats_desc]: other.types_to_desc)
{
@ -121,6 +119,7 @@ void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & oth
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
types_to_desc = other.types_to_desc;
data_type = other.data_type;
}
void ColumnStatisticsDescription::clear()
@ -159,6 +158,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
const auto & column = columns.getPhysical(physical_column_name);
stats.column_name = column.name;
stats.data_type = column.type;
stats.types_to_desc = statistics_types;
result.push_back(stats);
}

View File

@ -13,6 +13,7 @@ enum class StatisticsType : UInt8
{
TDigest = 0,
Uniq = 1,
CountMinSketch = 2,
Max = 63,
};

View File

@ -37,6 +37,7 @@
#include <Common/Base64.h>
#include <Common/Exception.h>
#include <Common/FailPoint.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h>
@ -64,6 +65,11 @@
namespace DB
{
namespace FailPoints
{
extern const char keepermap_fail_drop_data[];
}
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
@ -411,18 +417,16 @@ StorageKeeperMap::StorageKeeperMap(
auto code = client->tryCreate(zk_table_path, "", zkutil::CreateMode::Persistent);
// tables_path was removed with drop
if (code == Coordination::Error::ZNONODE)
{
LOG_INFO(log, "Metadata nodes were removed by another server, will retry");
continue;
}
else if (code != Coordination::Error::ZOK)
{
throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path);
}
/// A table on the same Keeper path already exists, we just appended our table id to subscribe as a new replica
/// We still don't know if the table matches the expected metadata so table_is_valid is not changed
/// It will be checked lazily on the first operation
if (code == Coordination::Error::ZOK)
return;
return;
if (code != Coordination::Error::ZNONODE)
throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", zk_root_path);
/// ZNONODE means we dropped zk_tables_path but didn't finish drop completely
}
if (client->exists(zk_dropped_path))
@ -473,6 +477,7 @@ StorageKeeperMap::StorageKeeperMap(
table_is_valid = true;
/// we are the first table created for the specified Keeper path, i.e. we are the first replica
return;
}
@ -561,6 +566,10 @@ void StorageKeeperMap::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
bool StorageKeeperMap::dropTable(zkutil::ZooKeeperPtr zookeeper, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock)
{
fiu_do_on(FailPoints::keepermap_fail_drop_data,
{
throw zkutil::KeeperException(Coordination::Error::ZOPERATIONTIMEOUT, "Manually triggered operation timeout");
});
zookeeper->removeChildrenRecursive(zk_data_path);
bool completely_removed = false;

View File

@ -505,18 +505,18 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
additional_info = fmt::format(" (TID: {}; TIDH: {})", current_tid, current_tid.getHash());
}
Int64 version;
MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings());
Int64 version = increment.get();
entry.commit(version);
String mutation_id = entry.file_name;
if (txn)
txn->addMutation(shared_from_this(), mutation_id);
bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false);
{
std::lock_guard lock(currently_processing_in_background_mutex);
MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings());
version = increment.get();
entry.commit(version);
String mutation_id = entry.file_name;
if (txn)
txn->addMutation(shared_from_this(), mutation_id);
bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false);
bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second;
if (!inserted)
{
@ -527,9 +527,9 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version);
}
LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info);
}
LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info);
background_operations_assignee.trigger();
return version;
}

View File

@ -1,5 +1,6 @@
// autogenerated by tests/ci/version_helper.py
const char * auto_contributors[] {
"0x01f",
"0xflotus",
"13DaGGeR",
"1lann",
@ -167,6 +168,7 @@ const char * auto_contributors[] {
"AnneClickHouse",
"Anselmo D. Adams",
"Anthony N. Simon",
"AntiTopQuark",
"Anton Ivashkin",
"Anton Kobzev",
"Anton Kozlov",
@ -299,6 +301,7 @@ const char * auto_contributors[] {
"Dan Wu",
"DanRoscigno",
"Dani Pozo",
"Daniel Anugerah",
"Daniel Bershatsky",
"Daniel Byta",
"Daniel Dao",
@ -370,6 +373,7 @@ const char * auto_contributors[] {
"Elena",
"Elena Baskakova",
"Elena Torró",
"Elena Torró Martínez",
"Elghazal Ahmed",
"Eliot Hautefeuille",
"Elizaveta Mironyuk",
@ -415,6 +419,7 @@ const char * auto_contributors[] {
"FgoDt",
"Filatenkov Artur",
"Filipe Caixeta",
"Filipp Bakanov",
"Filipp Ozinov",
"Filippov Denis",
"Fille",
@ -451,6 +456,7 @@ const char * auto_contributors[] {
"Gleb Novikov",
"Gleb-Tretyakov",
"GoGoWen2021",
"Gosha Letov",
"Gregory",
"Grigorii Sokolik",
"Grigory",
@ -461,6 +467,7 @@ const char * auto_contributors[] {
"Guillaume Tassery",
"Guo Wangyang",
"Guo Wei (William)",
"Guspan Tanadi",
"Haavard Kvaalen",
"Habibullah Oladepo",
"HaiBo Li",
@ -474,6 +481,7 @@ const char * auto_contributors[] {
"HarryLeeIBM",
"Hasitha Kanchana",
"Hasnat",
"Haydn",
"Heena Bansal",
"HeenaBansal2009",
"Hendrik M",
@ -606,6 +614,7 @@ const char * auto_contributors[] {
"Kevin Chiang",
"Kevin Michel",
"Kevin Mingtarja",
"Kevin Song",
"Kevin Zhang",
"KevinyhZou",
"KinderRiven",
@ -661,6 +670,7 @@ const char * auto_contributors[] {
"Lewinma",
"Li Shuai",
"Li Yin",
"Linh Giang",
"Lino Uruñuela",
"Lirikl",
"Liu Cong",
@ -690,6 +700,7 @@ const char * auto_contributors[] {
"Maksim Alekseev",
"Maksim Buren",
"Maksim Fedotov",
"Maksim Galkin",
"Maksim Kita",
"Maksym Sobolyev",
"Mal Curtis",
@ -724,6 +735,7 @@ const char * auto_contributors[] {
"Max Akhmedov",
"Max Bruce",
"Max K",
"Max K.",
"Max Kainov",
"Max Vetrov",
"MaxTheHuman",
@ -811,6 +823,7 @@ const char * auto_contributors[] {
"Nataly Merezhuk",
"Natalya Chizhonkova",
"Natasha Murashkina",
"Nathan Clevenger",
"NeZeD [Mac Pro]",
"Neeke Gao",
"Neng Liu",
@ -946,6 +959,7 @@ const char * auto_contributors[] {
"Robert Coelho",
"Robert Hodges",
"Robert Schulze",
"Rodolphe Dugé de Bernonville",
"RogerYK",
"Rohit Agarwal",
"Romain Neutron",
@ -1107,6 +1121,7 @@ const char * auto_contributors[] {
"Timur Solodovnikov",
"TiunovNN",
"Tobias Adamson",
"Tobias Florek",
"Tobias Lins",
"Tom Bombadil",
"Tom Risse",
@ -1231,11 +1246,13 @@ const char * auto_contributors[] {
"Yingchun Lai",
"Yingfan Chen",
"Yinzheng-Sun",
"Yinzuo Jiang",
"Yiğit Konur",
"Yohann Jardin",
"Yong Wang",
"Yong-Hao Zou",
"Youenn Lebras",
"Your Name",
"Yu, Peng",
"Yuko Takagi",
"Yuntao Wu",
@ -1250,6 +1267,7 @@ const char * auto_contributors[] {
"Yury Stankevich",
"Yusuke Tanaka",
"Zach Naimon",
"Zawa-II",
"Zheng Miao",
"ZhiHong Zhang",
"ZhiYong Wang",
@ -1380,6 +1398,7 @@ const char * auto_contributors[] {
"conicliu",
"copperybean",
"coraxster",
"cw5121",
"cwkyaoyao",
"d.v.semenov",
"dalei2019",
@ -1460,12 +1479,14 @@ const char * auto_contributors[] {
"fuzzERot",
"fyu",
"g-arslan",
"gabrielmcg44",
"ggerogery",
"giordyb",
"glockbender",
"glushkovds",
"grantovsky",
"gulige",
"gun9nir",
"guoleiyi",
"guomaolin",
"guov100",
@ -1527,6 +1548,7 @@ const char * auto_contributors[] {
"jferroal",
"jiahui-97",
"jianmei zhang",
"jiaosenvip",
"jinjunzh",
"jiyoungyoooo",
"jktng",
@ -1541,6 +1563,7 @@ const char * auto_contributors[] {
"jun won",
"jus1096",
"justindeguzman",
"jwoodhead",
"jyz0309",
"karnevil13",
"kashwy",
@ -1633,10 +1656,12 @@ const char * auto_contributors[] {
"mateng0915",
"mateng915",
"mauidude",
"max-vostrikov",
"maxim",
"maxim-babenko",
"maxkuzn",
"maxulan",
"maxvostrikov",
"mayamika",
"mehanizm",
"melin",
@ -1677,6 +1702,7 @@ const char * auto_contributors[] {
"nathanbegbie",
"nauta",
"nautaa",
"nauu",
"ndchikin",
"nellicus",
"nemonlou",
@ -1975,6 +2001,7 @@ const char * auto_contributors[] {
"张健",
"张风啸",
"徐炘",
"忒休斯~Theseus",
"曲正鹏",
"木木夕120",
"未来星___费",

View File

@ -271,7 +271,8 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node)
static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
const ActionsDAG::Node * node,
const Block * allowed_inputs,
ActionsDAG::Nodes & additional_nodes)
ActionsDAG::Nodes & additional_nodes,
bool allow_non_deterministic_functions)
{
if (node->type == ActionsDAG::ActionType::FUNCTION)
{
@ -280,8 +281,14 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto & node_copy = additional_nodes.emplace_back(*node);
node_copy.children.clear();
for (const auto * child : node->children)
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes))
if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions))
node_copy.children.push_back(child_copy);
/// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for
/// trivial count optimization, otherwise we can get incorrect results. For example, if the query is
/// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply
/// trivial count.
else if (!allow_non_deterministic_functions)
return nullptr;
if (node_copy.children.empty())
return nullptr;
@ -307,7 +314,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
{
auto & node_copy = additional_nodes.emplace_back(*node);
for (auto & child : node_copy.children)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child)
if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child)
return nullptr;
return &node_copy;
@ -321,7 +328,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
auto index_hint_dag = index_hint->getActions()->clone();
ActionsDAG::NodeRawConstPtrs atoms;
for (const auto & output : index_hint_dag->getOutputs())
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions))
atoms.push_back(child_copy);
if (!atoms.empty())
@ -355,13 +362,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
return node;
}
ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs)
ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions)
{
if (!predicate)
return nullptr;
ActionsDAG::Nodes additional_nodes;
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes);
const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions);
if (!res)
return nullptr;
@ -370,7 +377,7 @@ ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate,
void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context)
{
auto dag = splitFilterDagForAllowedInputs(predicate, &block);
auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false);
if (dag)
filterBlockWithDAG(dag, block, context);
}

View File

@ -32,7 +32,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node);
/// Extract a part of predicate that can be evaluated using only columns from input_names.
ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs);
/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic
/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr.
/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in
/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is
/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1`
/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is
/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial
/// count optimization will be mistakenly applied to the query.
ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true);
/// Extract from the input stream a set of `name` column values
template <typename T>

View File

@ -31,6 +31,7 @@ class CIBuddy:
self.sha = pr_info.sha[:10]
def check_workflow(self):
GHActions.print_workflow_results()
res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig)
if res != GHActions.ActionStatuses.SUCCESS:
self.post_job_error("Workflow Configuration Failed", critical=True)

View File

@ -92,15 +92,33 @@ class GHActions:
PENDING = "pending"
SUCCESS = "success"
@staticmethod
def get_workflow_job_result(wf_job_name: str) -> Optional[str]:
@classmethod
def _get_workflow_results(cls):
if not Path(Envs.WORKFLOW_RESULT_FILE).exists():
print(
f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]"
)
return None
return {}
with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file:
res = json.load(json_file)
try:
res = json.load(json_file)
except json.JSONDecodeError as e:
print(f"ERROR: json decoder exception {e}")
json_file.seek(0)
print(" File content:")
print(json_file.read())
return {}
return res
@classmethod
def print_workflow_results(cls):
res = cls._get_workflow_results()
results = [f"{job}: {data['result']}" for job, data in res.items()]
cls.print_in_group("Workflow results", results)
@classmethod
def get_workflow_job_result(cls, wf_job_name: str) -> Optional[str]:
res = cls._get_workflow_results()
if wf_job_name in res:
return res[wf_job_name]["result"] # type: ignore
else:

View File

@ -197,6 +197,10 @@ def get_instance_id():
return _query_imds("latest/meta-data/instance-id")
def get_instance_lifecycle():
return _query_imds("latest/meta-data/instance-life-cycle")
def prepare_tests_results_for_clickhouse(
pr_info: PRInfo,
test_results: TestResults,
@ -233,7 +237,7 @@ def prepare_tests_results_for_clickhouse(
"head_ref": head_ref,
"head_repo": head_repo,
"task_url": pr_info.task_url,
"instance_type": get_instance_type(),
"instance_type": ",".join([get_instance_type(), get_instance_lifecycle()]),
"instance_id": get_instance_id(),
}

View File

@ -21,7 +21,7 @@ from env_helper import (
TEMP_PATH,
)
from git_helper import Git
from pr_info import PRInfo, EventType
from pr_info import PRInfo
from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults
from stopwatch import Stopwatch
from tee_popen import TeePopen
@ -375,25 +375,23 @@ def main():
tags = gen_tags(args.version, args.release_type)
repo_urls = {}
direct_urls: Dict[str, List[str]] = {}
if pr_info.event_type == EventType.PULL_REQUEST:
release_or_pr = str(pr_info.number)
sha = pr_info.sha
elif pr_info.event_type == EventType.PUSH and pr_info.is_master:
release_or_pr = str(0)
sha = pr_info.sha
else:
release_or_pr = f"{args.version.major}.{args.version.minor}"
sha = args.sha
assert sha
for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")):
if not args.bucket_prefix:
if args.bucket_prefix:
assert not args.allow_build_reuse
repo_urls[arch] = f"{args.bucket_prefix}/{build_name}"
elif args.sha:
# CreateRelease workflow only. TODO
version = args.version
repo_urls[arch] = (
f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/"
f"{release_or_pr}/{sha}/{build_name}"
f"{version.major}.{version.minor}/{args.sha}/{build_name}"
)
else:
repo_urls[arch] = f"{args.bucket_prefix}/{build_name}"
# In all other cases urls must be fetched from build reports. TODO: script needs refactoring
repo_urls[arch] = ""
assert args.allow_build_reuse
if args.allow_build_reuse:
# read s3 urls from pre-downloaded build reports
if "clickhouse-server" in image_repo:
@ -431,7 +429,6 @@ def main():
)
if test_results[-1].status != "OK":
status = FAILURE
pr_info = pr_info or PRInfo()
description = f"Processed tags: {', '.join(tags)}"
JobReport(

View File

@ -1 +0,0 @@
generated_*init_runner.sh

View File

@ -1,87 +0,0 @@
#!/usr/bin/env bash
set -e
usage() {
echo "Usage: $0 ENVIRONMENT" >&2
echo "Valid values for ENVIRONMENT: staging, production" >&2
exit 1
}
case "$1" in
staging|production)
ENVIRONMENT="$1" ;;
--help)
usage ;;
*)
echo "Invalid argument" >&2
usage ;;
esac
cd "$(dirname "$0")" || exit 1
SOURCE_SCRIPT='init_runner.sh'
check_response() {
# Are we even in the interactive shell?
[ -t 1 ] || return 1
local request
request="$1"
read -rp "$request (y/N): " response
case "$response" in
[Yy])
return 0
# Your code to continue goes here
;;
*)
return 1
;;
esac
}
check_dirty() {
if [ -n "$(git status --porcelain=v2 "$SOURCE_SCRIPT")" ]; then
echo "The $SOURCE_SCRIPT has uncommited changes, won't deploy it" >&2
exit 1
fi
}
GIT_HASH=$(git log -1 --format=format:%H)
header() {
cat << EOF
#!/usr/bin/env bash
echo 'The $ENVIRONMENT script is generated from $SOURCE_SCRIPT, commit $GIT_HASH'
EOF
}
body() {
local first_line
first_line=$(sed -n '/^# THE SCRIPT START$/{=;q;}' "$SOURCE_SCRIPT")
if [ -z "$first_line" ]; then
echo "The pattern '# THE SCRIPT START' is not found in $SOURCE_SCRIPT" >&2
exit 1
fi
tail "+$first_line" "$SOURCE_SCRIPT"
}
GENERATED_FILE="generated_${ENVIRONMENT}_${SOURCE_SCRIPT}"
{ header && body; } > "$GENERATED_FILE"
echo "The file $GENERATED_FILE is generated"
if check_response "Display the content of $GENERATED_FILE?"; then
if [ -z "$PAGER" ]; then
less "$GENERATED_FILE"
else
$PAGER "$GENERATED_FILE"
fi
fi
check_dirty
S3_OBJECT=${S3_OBJECT:-s3://github-runners-data/cloud-init/${ENVIRONMENT}.sh}
if check_response "Deploy the generated script to $S3_OBJECT?"; then
aws s3 mv "$GENERATED_FILE" "$S3_OBJECT"
fi

View File

@ -1,406 +0,0 @@
#!/usr/bin/env bash
cat > /dev/null << 'EOF'
The following content is embedded into the s3 object via the script
deploy-runner-init.sh {staging,production}
with additional helping information
In the `user data` you should define as the following text
between `### COPY BELOW` and `### COPY ABOVE`
### COPY BELOW
Content-Type: multipart/mixed; boundary="//"
MIME-Version: 1.0
--//
Content-Type: text/cloud-config; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config.txt"
#cloud-config
cloud_final_modules:
- [scripts-user, always]
--//
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="userdata.txt"
#!/bin/bash
INSTANCE_ID=$(ec2metadata --instance-id)
INIT_ENVIRONMENT=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:init-environment'].Value" --output text)
echo "Downloading and using $INIT_ENVIRONMENT cloud-init.sh"
aws s3 cp "s3://github-runners-data/cloud-init/${INIT_ENVIRONMENT:-production}.sh" /tmp/cloud-init.sh
chmod 0700 /tmp/cloud-init.sh
exec bash /tmp/cloud-init.sh
--//
### COPY ABOVE
EOF
# THE SCRIPT START
set -uo pipefail
####################################
# IMPORTANT! #
# EC2 instance should have #
# `github:runner-type` tag #
# set accordingly to a runner role #
####################################
echo "Running init v1.1"
export DEBIAN_FRONTEND=noninteractive
export RUNNER_HOME=/home/ubuntu/actions-runner
export RUNNER_ORG="ClickHouse"
export RUNNER_URL="https://github.com/${RUNNER_ORG}"
# Funny fact, but metadata service has fixed IP
INSTANCE_ID=$(ec2metadata --instance-id)
export INSTANCE_ID
bash /usr/local/share/scripts/init-network.sh
# combine labels
RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text)
LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE"
export LABELS
echo "Instance Labels: $LABELS"
LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle)
export LIFE_CYCLE
echo "Instance lifecycle: $LIFE_CYCLE"
INSTANCE_TYPE=$(ec2metadata --instance-type)
echo "Instance type: $INSTANCE_TYPE"
# Refresh CloudWatch agent config
aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json
systemctl restart amazon-cloudwatch-agent.service
# Refresh teams ssh keys
TEAM_KEYS_URL=$(aws ssm get-parameter --region us-east-1 --name team-keys-url --query 'Parameter.Value' --output=text)
curl -s "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2
chown ubuntu: /home/ubuntu/.ssh -R
# Create a pre-run script that will provide diagnostics info
mkdir -p /tmp/actions-hooks
cat > /tmp/actions-hooks/common.sh << 'EOF'
#!/bin/bash
EOF
terminate_delayed() {
# The function for post hook to gracefully finish the job and then tear down
# The very specific sleep time is used later to determine in the main loop if
# the instance is tearing down
# IF `sleep` IS CHANGED, CHANGE ANOTHER VALUE IN `pgrep`
sleep=13.14159265358979323846
echo "Going to terminate the runner's instance in $sleep seconds"
# We execute it with `at` to not have it as an orphan process, but launched independently
# GH Runners kill all remain processes
echo "sleep '$sleep'; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now || \
aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" # workaround for complete out of space or non-installed `at`
exit 0
}
detect_delayed_termination() {
# The function look for very specific sleep with pi
if pgrep 'sleep 13.14159265358979323846'; then
echo 'The instance has delayed termination, sleep the same time to wait if it goes down'
sleep 14
fi
}
declare -f terminate_delayed >> /tmp/actions-hooks/common.sh
terminate_and_exit() {
# Terminate instance and exit from the script instantly
echo "Going to terminate the runner's instance"
aws ec2 terminate-instances --instance-ids "$INSTANCE_ID"
exit 0
}
terminate_decrease_and_exit() {
# Terminate instance and exit from the script instantly
echo "Going to terminate the runner's instance and decrease asg capacity"
aws autoscaling terminate-instance-in-auto-scaling-group --instance-id "$INSTANCE_ID" --should-decrement-desired-capacity
exit 0
}
declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh
check_spot_instance_is_old() {
if [ "$LIFE_CYCLE" == "spot" ]; then
local UPTIME
UPTIME=$(< /proc/uptime)
UPTIME=${UPTIME%%.*}
if (( 3600 < UPTIME )); then
echo "The spot instance has uptime $UPTIME, it's time to shut it down"
return 0
fi
fi
return 1
}
check_proceed_spot_termination() {
# The function checks and proceeds spot instance termination if exists
# The event for spot instance termination
local FORCE
FORCE=${1:-}
if TERMINATION_DATA=$(curl -s --fail http://169.254.169.254/latest/meta-data/spot/instance-action); then
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-instance-termination-notices.html#instance-action-metadata
_action=$(jq '.action' -r <<< "$TERMINATION_DATA")
_time=$(jq '.time | fromdate' <<< "$TERMINATION_DATA")
_until_action=$((_time - $(date +%s)))
echo "Received the '$_action' event that will be effective in $_until_action seconds"
if (( _until_action <= 30 )) || [ "$FORCE" == "force" ]; then
echo "The action $_action will be done in $_until_action, killing the runner and exit"
local runner_pid
runner_pid=$(pgrep Runner.Listener)
if [ -n "$runner_pid" ]; then
# Kill the runner to not allow it cancelling the job
# shellcheck disable=SC2046
kill -9 "$runner_pid" $(list_children "$runner_pid")
fi
sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)"
terminate_and_exit
fi
fi
}
no_terminating_metadata() {
# The function check that instance could continue work
# Returns 1 if any of termination events are received
# The event for rebalance recommendation. Not strict, so we have some room to make a decision here
if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then
echo 'Received recommendation to rebalance, checking the uptime'
local UPTIME
UPTIME=$(< /proc/uptime)
UPTIME=${UPTIME%%.*}
# We don't shutdown the instances younger than 30m
if (( 1800 < UPTIME )); then
# To not shutdown everything at once, use the 66% to survive
if (( $((RANDOM % 3)) == 0 )); then
echo 'The instance is older than 30m and won the roulette'
return 1
fi
echo 'The instance is older than 30m, but is not chosen for rebalance'
else
echo 'The instance is younger than 30m, do not shut it down'
fi
fi
# Checks if the ASG in a lifecycle hook state
local ASG_STATUS
ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state)
if [ "$ASG_STATUS" == "Terminated" ]; then
echo 'The instance in ASG status Terminating:Wait'
return 1
fi
}
terminate_on_event() {
# If there is a rebalance event, then the instance could die soon
# Let's don't wait for it and terminate proactively
if curl -s --fail http://169.254.169.254/latest/meta-data/events/recommendations/rebalance; then
terminate_and_exit
fi
# Here we check if the autoscaling group marked the instance for termination, and it's wait for the job to finish
ASG_STATUS=$(curl -s http://169.254.169.254/latest/meta-data/autoscaling/target-lifecycle-state)
if [ "$ASG_STATUS" == "Terminated" ]; then
INSTANCE_ID=$(ec2metadata --instance-id)
ASG_NAME=$(aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='aws:autoscaling:groupName'].Value" --output text)
LIFECYCLE_HOOKS=$(aws autoscaling describe-lifecycle-hooks --auto-scaling-group-name "$ASG_NAME" --query "LifecycleHooks[].LifecycleHookName" --output text)
for LCH in $LIFECYCLE_HOOKS; do
aws autoscaling complete-lifecycle-action --lifecycle-action-result CONTINUE \
--lifecycle-hook-name "$LCH" --auto-scaling-group-name "$ASG_NAME" \
--instance-id "$INSTANCE_ID"
true # autoformat issue
done
echo 'The runner is marked as "Terminated" by the autoscaling group, we are terminating'
terminate_and_exit
fi
}
cat > /tmp/actions-hooks/pre-run.sh << EOF
#!/bin/bash
set -uo pipefail
echo "Runner's public DNS: $(ec2metadata --public-hostname)"
echo "Runner's labels: ${LABELS}"
echo "Runner's instance type: $(ec2metadata --instance-type)"
EOF
# Create a post-run script that will restart docker daemon before the job started
cat > /tmp/actions-hooks/post-run.sh << 'EOF'
#!/bin/bash
set -xuo pipefail
source /tmp/actions-hooks/common.sh
# Free KiB, free percents
ROOT_STAT=($(df / | awk '/\// {print $4 " " int($4/$2 * 100)}'))
if [[ ${ROOT_STAT[0]} -lt 3000000 ]] || [[ ${ROOT_STAT[1]} -lt 5 ]]; then
echo "The runner has ${ROOT_STAT[0]}KiB and ${ROOT_STAT[1]}% of free space on /"
terminate_delayed
fi
# shellcheck disable=SC2046
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
# shellcheck disable=SC2046
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
# If we have hanged containers after the previous commands, than we have a hanged one
# and should restart the daemon
if [ "$(docker ps --all --quiet)" ]; then
# Systemd service of docker has StartLimitBurst=3 and StartLimitInterval=60s,
# that's why we try restarting it for long
for i in {1..25};
do
sudo systemctl restart docker && break || sleep 5
done
for i in {1..10}
do
docker info && break || sleep 2
done
# Last chance, otherwise we have to terminate poor instance
docker info 1>/dev/null || { echo Docker unable to start; terminate_delayed ; }
fi
EOF
get_runner_token() {
/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value
}
is_job_assigned() {
local runner_pid
runner_pid=$(pgrep Runner.Listener)
if [ -z "$runner_pid" ]; then
# if runner has finished, it's fine
return 0
fi
local log_file
log_file=$(lsof -p "$runner_pid" 2>/dev/null | grep -o "$RUNNER_HOME/_diag/Runner.*log")
if [ -z "$log_file" ]; then
# assume, the process is over or just started
return 0
fi
# So far it's the only solid way to determine that the job is starting
grep -q 'Terminal] .* Running job:' "$log_file" \
&& return 0 \
|| return 1
}
list_children () {
local children
children=$(ps --ppid "$1" -o pid=)
if [ -z "$children" ]; then
return
fi
for pid in $children; do
list_children "$pid"
done
echo "$children"
}
# There's possibility that it fails because the runner's version is outdated,
# so after the first failure we'll try to launch it with enabled autoupdate.
#
# We'll fail and terminate after 10 consequent failures.
ATTEMPT=0
# In `kill` 0 means "all processes in process group", -1 is "all but PID 1"
# We use `-2` to get an error
RUNNER_PID=-2
while true; do
# Does not send signal, but checks that the process $RUNNER_PID is running
if kill -0 -- $RUNNER_PID; then
ATTEMPT=0
echo "Runner is working with pid $RUNNER_PID, checking the metadata in background"
check_proceed_spot_termination
if ! is_job_assigned; then
RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$RUNNER_PID" 2>/dev/null || date +%s) ))
echo "The runner is launched $RUNNER_AGE seconds ago and still hasn't received a job"
if (( 60 < RUNNER_AGE )); then
echo "Attempt to delete the runner for a graceful shutdown"
sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \
|| continue
echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down"
terminate_decrease_and_exit
fi
fi
else
if [ "$RUNNER_PID" != "-2" ]; then
wait $RUNNER_PID \
&& echo "Runner with PID $RUNNER_PID successfully finished" \
|| echo "Attempt $((++ATTEMPT)) to start the runner"
fi
if (( ATTEMPT > 10 )); then
echo "The runner has failed to start after $ATTEMPT attempt. Give up and terminate it"
terminate_and_exit
fi
cd $RUNNER_HOME || terminate_and_exit
detect_delayed_termination
# If runner is not active, check that it needs to terminate itself
echo "Checking if the instance suppose to terminate"
no_terminating_metadata || terminate_on_event
check_spot_instance_is_old && terminate_and_exit
check_proceed_spot_termination force
echo "Going to configure runner"
token_args=(--token "$(get_runner_token)")
config_args=(
"${token_args[@]}" --url "$RUNNER_URL"
--ephemeral --unattended --replace --runnergroup Default
--labels "$LABELS" --work _work --name "$INSTANCE_ID"
)
if (( ATTEMPT > 1 )); then
echo 'The runner failed to start at least once. Removing it and then configuring with autoupdate enabled.'
sudo -u ubuntu ./config.sh remove "${token_args[@]}"
sudo -u ubuntu ./config.sh "${config_args[@]}"
else
echo "Configure runner with disabled autoupdate"
config_args+=("--disableupdate")
sudo -u ubuntu ./config.sh "${config_args[@]}"
fi
echo "Another one check to avoid race between runner and infrastructure"
no_terminating_metadata || terminate_on_event
check_spot_instance_is_old && terminate_and_exit
check_proceed_spot_termination force
# There were some failures to start the Job because of trash in _work
rm -rf _work
# https://github.com/actions/runner/issues/3266
# We're unable to know if the runner is failed to start.
echo 'Monkey-patching run helpers to get genuine exit code of the runner'
for script in run.sh run-helper.sh.template; do
# shellcheck disable=SC2016
grep -q 'exit 0$' "$script" && \
sed 's/exit 0/exit $returnCode/' -i "$script" && \
echo "Script $script is patched"
done
echo "Run"
sudo -u ubuntu \
ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \
ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \
./run.sh &
RUNNER_PID=$!
sleep 10
fi
sleep 5
done
# vim:ts=4:sw=4

View File

@ -104,3 +104,24 @@ def test_keeper_map_without_zk(started_cluster):
node.query("DETACH TABLE test_keeper_map_without_zk")
client.stop()
def test_keeper_map_with_failed_drop(started_cluster):
run_query(
"CREATE TABLE test_keeper_map_with_failed_drop (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);"
)
run_query("INSERT INTO test_keeper_map_with_failed_drop VALUES (1, 11)")
run_query("SYSTEM ENABLE FAILPOINT keepermap_fail_drop_data")
node.query("DROP TABLE test_keeper_map_with_failed_drop SYNC")
zk_client = get_genuine_zk()
assert (
zk_client.get("/test_keeper_map/test_keeper_map_with_failed_drop/data")
is not None
)
run_query("SYSTEM DISABLE FAILPOINT keepermap_fail_drop_data")
run_query(
"CREATE TABLE test_keeper_map_with_failed_drop_another (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_with_failed_drop') PRIMARY KEY(key);"
)

View File

@ -7,7 +7,7 @@
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000);</query>

View File

@ -0,0 +1,14 @@
CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192
Test statistics count_min:
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed)
Test statistics multi-types:
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed)
Test LowCardinality and Nullable data type:
tab2

View File

@ -0,0 +1,70 @@
-- Tags: no-fasttest
DROP TABLE IF EXISTS tab SYNC;
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET allow_suspicious_low_cardinality_types=1;
SET mutations_sync = 2;
CREATE TABLE tab
(
a String,
b UInt64,
c Int64,
pk String,
) Engine = MergeTree() ORDER BY pk
SETTINGS min_bytes_for_wide_part = 0;
SHOW CREATE TABLE tab;
INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000;
SELECT 'Test statistics count_min:';
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
ALTER TABLE tab ADD STATISTICS b TYPE count_min;
ALTER TABLE tab ADD STATISTICS c TYPE count_min;
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE tab DROP STATISTICS a, b, c;
SELECT 'Test statistics multi-types:';
ALTER TABLE tab ADD STATISTICS a TYPE count_min;
ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest;
ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest;
ALTER TABLE tab MATERIALIZE STATISTICS a, b, c;
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/)
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '')
FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/)
WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
ALTER TABLE tab DROP STATISTICS a, b, c;
DROP TABLE IF EXISTS tab SYNC;
SELECT 'Test LowCardinality and Nullable data type:';
DROP TABLE IF EXISTS tab2 SYNC;
SET allow_suspicious_low_cardinality_types=1;
CREATE TABLE tab2
(
a LowCardinality(Int64) STATISTICS(count_min),
b Nullable(Int64) STATISTICS(count_min),
c LowCardinality(Nullable(Int64)) STATISTICS(count_min),
pk String,
) Engine = MergeTree() ORDER BY pk;
select name from system.tables where name = 'tab2' and database = currentDatabase();
DROP TABLE IF EXISTS tab2 SYNC;

View File

@ -70,3 +70,4 @@ SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000;
DROP TABLE IF EXISTS t3;

View File

@ -2,8 +2,6 @@
# Tags: atomic-database
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
@ -134,7 +132,7 @@ while [ "`$CLICKHOUSE_CLIENT -nq "select status, next_refresh_time from refreshe
do
sleep 0.1
done
sleep 1
$CLICKHOUSE_CLIENT -nq "
select '<14: waiting for next cycle>', view, status, remaining_dependencies, next_refresh_time from refreshes;
truncate src;
@ -172,13 +170,13 @@ $CLICKHOUSE_CLIENT -nq "
drop table b;
create materialized view c refresh every 1 second (x Int64) engine Memory empty as select * from src;
drop table src;"
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Exception' ]
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'c' -- $LINENO" | xargs`" != 'Exception' ]
do
sleep 0.1
done
# Check exception, create src, expect successful refresh.
$CLICKHOUSE_CLIENT -nq "
select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' from refreshes;
select '<19: exception>', exception ilike '%UNKNOWN_TABLE%' ? '1' : exception from refreshes where view = 'c';
create table src (x Int64) engine Memory as select 1;
system refresh view c;"
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Finished' ]
@ -224,22 +222,27 @@ done
$CLICKHOUSE_CLIENT -nq "
rename table e to f;
select '<24: rename during refresh>', * from f;
select '<25: rename during refresh>', view, status from refreshes;
select '<25: rename during refresh>', view, status from refreshes where view = 'f';
alter table f modify refresh after 10 year;"
sleep 2 # make it likely that at least one row was processed
# Cancel.
$CLICKHOUSE_CLIENT -nq "
system cancel view f;"
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes -- $LINENO" | xargs`" != 'Cancelled' ]
while [ "`$CLICKHOUSE_CLIENT -nq "select last_refresh_result from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Cancelled' ]
do
sleep 0.1
done
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" = 'Running' ]
do
sleep 0.1
done
# Check that another refresh doesn't immediately start after the cancelled one.
sleep 1
$CLICKHOUSE_CLIENT -nq "
select '<27: cancelled>', view, status from refreshes;
select '<27: cancelled>', view, status from refreshes where view = 'f';
system refresh view f;"
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes -- $LINENO" | xargs`" != 'Running' ]
while [ "`$CLICKHOUSE_CLIENT -nq "select status from refreshes where view = 'f' -- $LINENO" | xargs`" != 'Running' ]
do
sleep 0.1
done

View File

@ -1,126 +0,0 @@
12 4 21722 2209341 4 1415 2333 4 61 64 3
21 1134 11363 58484 1106 1458 1592 136 26 62 32
22 210 4504 5729 196 291 767 124 47 54 8
26 196 1327684 5221 195 4140 5661 161 28 49 19
28 5 2034378 7102 5 325 3255 2 53 60 4
29 53 45041 45189 45 1580 211 31 55 84 18
38 424 1600675 4653 424 562 5944 244 60 65 6
45 17 62743 674873 17 6239 6494 17 65 76 8
72 1862 1210073 6200 1677 2498 528 859 51 61 11
79 2 2255228 2255293 2 5495 7057 2 65 65 1
85 459 1051571 1829638 459 6402 7131 334 32 61 25
86 10 1748130 1754217 10 4374 7003 10 56 59 4
91 165 5718 5802 75 282 7113 112 41 63 22
94 20 1231916 2050003 20 4802 4917 19 53 59 7
99 2 3665 36667 2 497 697 2 70 71 2
103 1 2446615 2446615 1 2498 2498 1 58 58 1
106 72 6149 6699 67 527 826 40 61 61 1
111 43 2273186 5272 43 492 4923 4 54 72 15
120 3129 45117 6735 2868 1030 1625 561 59 64 6
138 2 49243 49374 2 1428 1519 2 47 48 2
143 100 23321 63639 100 1115 1624 88 51 51 1
145 1 2447976 2447976 1 6173 6173 1 44 44 1
153 16 13748 16881 16 1506 1636 16 54 68 9
159 19952 1525336 7131 12957 1280 6163 2668 24 66 39
171 5 15042 16698 5 1302 1608 5 65 65 1
179 6264 1362341 2686 6244 2554 7132 2705 61 67 7
192 1 1639623 1639623 1 3406 3406 1 32 32 1
193 1 1429969 1429969 1 7131 7131 1 45 45 1
207 12 23057 32500 12 1491 1726 12 32 46 7
221 5081 1366870 6649 3432 4527 5226 687 24 69 39
228 73 12281 17929 71 1328 2034 63 49 71 18
229 2 1617478 1723791 2 4590 5578 2 41 42 2
230 3916 1332729 6949 3668 1330 4703 845 62 65 4
238 25 2624456 2625673 24 2535 6465 25 58 75 14
241 154 2554929 2616444 154 2626 7131 148 34 57 17
248 276 15529 30987 274 1040 1222 136 37 79 27
254 3018 33966 6635 2837 1057 1622 539 24 60 33
255 20 1581774 1811334 20 6068 6301 18 33 57 10
256 5 5145 6841 5 367 376 5 58 58 1
270 2 2195579 2262119 2 7102 7123 2 33 34 2
281 32 2379460 616215 32 6042 6086 23 53 64 12
282 7 1292651 24244 7 1607 2455 6 46 55 5
286 123 1521935 5269 123 3793 3940 81 40 66 22
291 21 2419080 3567 21 297 4731 21 54 55 2
316 4 5221 5616 4 505 558 4 32 35 3
319 232 56480 63033 230 1599 313 50 33 64 26
327 15 51647 51894 14 1292 1585 14 47 57 7
332 24 23484 54948 24 1609 1726 16 32 49 11
333 1 14189 14189 1 1550 1550 1 63 63 1
342 49 2579220 2622432 49 4626 6933 48 34 54 14
344 1 6486 6486 1 509 509 1 24 24 1
346 1987 53016 6735 1823 1334 174 294 26 62 32
358 45 59058 60844 44 6746 722 40 57 84 15
363 1198 1260033 2568811 1196 5710 5790 82 55 80 26
384 150 2361175 476024 150 7008 7123 81 38 64 22
387 277 5200 6553 252 243 521 130 65 65 1
392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13
396 8181 1380803 6186 7920 545 798 1743 24 67 39
398 3 5183 5213 2 291 352 3 53 59 3
399 62 51494 59203 61 7073 754 42 55 78 18
412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5
413 2 2036037 2064917 2 3963 4666 2 43 45 2
431 33 2302331 2348449 33 4425 6516 32 69 69 1
447 59 25125 33094 59 1176 1817 56 53 58 6
456 1 53157 53157 1 1556 1556 1 26 26 1
462 5 5456 6280 5 348 4337 5 28 40 5
472 1 1443716 1443716 1 6122 6122 1 42 42 1
491 34 1066102 1183673 34 6606 6822 32 46 67 15
498 896 2230163 3054 895 537 7131 714 24 59 28
504 108 12281 25180 108 1318 1784 94 55 66 12
515 22 1588883 2640809 22 6554 6571 15 46 59 12
518 1 37743 37743 1 1558 1558 1 72 72 1
530 1 3033 3033 1 561 561 1 59 59 1
532 26 5721 6355 25 549 665 14 44 50 7
546 156 2577874 48517 156 1105 324 133 44 51 8
554 12 1665194 2640066 12 1817 2951 12 57 57 1
564 3865 2028049 2083433 3722 1115 985 2203 44 84 41
566 4432 50605 57509 3217 1191 267 459 26 72 39
567 8 5221 5893 7 333 558 8 27 35 4
582 1172 1320619 2019743 1172 5819 7131 757 26 63 30
584 43100 2500 5594 22561 134 4573 1660 48 84 37
589 28 6046 6068 19 345 564 27 55 62 8
595 139 1585165 1683606 138 2231 3598 132 54 84 28
615 3 1056081 1116230 3 5794 5796 2 59 62 3
619 7 1543114 5241 7 2442 3105 7 41 45 3
634 2722 1221058 4999 2686 2426 7131 1735 54 60 7
635 237 2119333 4667 237 561 5999 176 49 60 12
644 5 1774169 2056171 5 5591 6091 4 33 39 3
647 8 51632 64403 8 1457 1624 8 26 34 5
651 1325 1620565 6281 1301 528 792 815 62 63 2
665 13 4598 4789 13 511 558 11 39 46 7
679 1560 1613200 25940 1552 1569 3118 781 49 84 35
704 2 14226 15594 2 1086 1116 2 65 71 2
715 25 1199352 3490 25 5036 5112 23 34 55 13
716 1253 61989 6735 1050 1203 1625 397 52 65 14
730 2584 5560 6170 634 2421 627 293 56 69 14
736 8 1433153 4941 8 339 4594 8 28 36 5
749 2 1326176 1339862 2 4339 6213 2 49 50 2
753 1 53157 53157 1 1556 1556 1 26 26 1
761 63 1443230 6881 63 3154 3204 26 56 73 14
762 49 1449596 1968154 49 2437 3753 48 54 62 9
775 35107 5330 769436 2471 447 6607 656 70 81 12
789 1 1552458 1552458 1 2441 2441 1 62 62 1
794 158 5585 6585 155 495 929 67 24 50 20
839 9 29223 46530 9 1336 1465 9 52 52 1
844 5 2377545 2377635 5 5129 6321 5 53 69 5
846 50 2172273 2589295 50 1582 3053 48 64 68 5
847 2577 56656 63658 1582 1444 838 474 26 63 33
861 1333 5570 6909 839 457 489 37 33 70 34
873 2360 1519811 50487 2248 1310 1784 316 60 68 9
879 228 6704 6785 79 279 507 121 35 66 24
889 5130 2070007 39692 5040 1151 6791 2606 44 66 23
896 4 511246 859452 4 6554 6561 4 67 71 4
912 146 1322641 2238040 146 1366 6354 143 59 59 1
913 82 5495 6870 78 350 565 67 24 43 15
921 763 1580790 416881 763 6191 7131 509 63 64 2
925 318 2500952 5025 309 476 6114 182 32 56 21
931 12 4277 4809 12 238 256 9 63 83 9
942 954 1331 2228193 952 1121 5047 788 65 70 6
948 14 1785593 2600431 14 6550 6598 13 34 49 9
956 5 5755 6023 5 359 411 5 43 48 4
963 4 3812 3835 4 444 537 4 47 53 4
978 5 51632 58212 5 1127 1556 5 24 32 5
980 53 47201 59744 53 1537 1625 36 41 49 9
987 6033 2020131 763444 4306 256 792 1832 60 64 5
993 4 1615159 1718339 4 1570 3093 4 62 63 2

View File

@ -0,0 +1,15 @@
12 4 21722 2209341 4 1415 2333 4 61 64 3
21 1134 11363 58484 1106 1458 1592 136 26 62 32
22 210 4504 5729 196 291 767 124 47 54 8
26 196 1327684 5221 195 4140 5661 161 28 49 19
28 5 2034378 7102 5 325 3255 2 53 60 4
29 53 45041 45189 45 1580 211 31 55 84 18
38 424 1600675 4653 424 562 5944 244 60 65 6
45 17 62743 674873 17 6239 6494 17 65 76 8
72 1862 1210073 6200 1677 2498 528 859 51 61 11
79 2 2255228 2255293 2 5495 7057 2 65 65 1
85 459 1051571 1829638 459 6402 7131 334 32 61 25
86 10 1748130 1754217 10 4374 7003 10 56 59 4
91 165 5718 5802 75 282 7113 112 41 63 22
94 20 1231916 2050003 20 4802 4917 19 53 59 7
99 2 3665 36667 2 497 697 2 70 71 2

View File

@ -26,7 +26,7 @@ DETACH TABLE test;
ATTACH TABLE test;
"
for i in {1..1000}
for i in {1..100}
do
echo "
WITH ${i} AS try

View File

@ -0,0 +1,13 @@
912 146 1322641 2238040 146 1366 6354 143 59 59 1
913 82 5495 6870 78 350 565 67 24 43 15
921 763 1580790 416881 763 6191 7131 509 63 64 2
925 318 2500952 5025 309 476 6114 182 32 56 21
931 12 4277 4809 12 238 256 9 63 83 9
942 954 1331 2228193 952 1121 5047 788 65 70 6
948 14 1785593 2600431 14 6550 6598 13 34 49 9
956 5 5755 6023 5 359 411 5 43 48 4
963 4 3812 3835 4 444 537 4 47 53 4
978 5 51632 58212 5 1127 1556 5 24 32 5
980 53 47201 59744 53 1537 1625 36 41 49 9
987 6033 2020131 763444 4306 256 792 1832 60 64 5
993 4 1615159 1718339 4 1570 3093 4 62 63 2

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {901..1000}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,13 @@
103 1 2446615 2446615 1 2498 2498 1 58 58 1
106 72 6149 6699 67 527 826 40 61 61 1
111 43 2273186 5272 43 492 4923 4 54 72 15
120 3129 45117 6735 2868 1030 1625 561 59 64 6
138 2 49243 49374 2 1428 1519 2 47 48 2
143 100 23321 63639 100 1115 1624 88 51 51 1
145 1 2447976 2447976 1 6173 6173 1 44 44 1
153 16 13748 16881 16 1506 1636 16 54 68 9
159 19952 1525336 7131 12957 1280 6163 2668 24 66 39
171 5 15042 16698 5 1302 1608 5 65 65 1
179 6264 1362341 2686 6244 2554 7132 2705 61 67 7
192 1 1639623 1639623 1 3406 3406 1 32 32 1
193 1 1429969 1429969 1 7131 7131 1 45 45 1

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {101..200}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,16 @@
207 12 23057 32500 12 1491 1726 12 32 46 7
221 5081 1366870 6649 3432 4527 5226 687 24 69 39
228 73 12281 17929 71 1328 2034 63 49 71 18
229 2 1617478 1723791 2 4590 5578 2 41 42 2
230 3916 1332729 6949 3668 1330 4703 845 62 65 4
238 25 2624456 2625673 24 2535 6465 25 58 75 14
241 154 2554929 2616444 154 2626 7131 148 34 57 17
248 276 15529 30987 274 1040 1222 136 37 79 27
254 3018 33966 6635 2837 1057 1622 539 24 60 33
255 20 1581774 1811334 20 6068 6301 18 33 57 10
256 5 5145 6841 5 367 376 5 58 58 1
270 2 2195579 2262119 2 7102 7123 2 33 34 2
281 32 2379460 616215 32 6042 6086 23 53 64 12
282 7 1292651 24244 7 1607 2455 6 46 55 5
286 123 1521935 5269 123 3793 3940 81 40 66 22
291 21 2419080 3567 21 297 4731 21 54 55 2

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {201..300}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,16 @@
316 4 5221 5616 4 505 558 4 32 35 3
319 232 56480 63033 230 1599 313 50 33 64 26
327 15 51647 51894 14 1292 1585 14 47 57 7
332 24 23484 54948 24 1609 1726 16 32 49 11
333 1 14189 14189 1 1550 1550 1 63 63 1
342 49 2579220 2622432 49 4626 6933 48 34 54 14
344 1 6486 6486 1 509 509 1 24 24 1
346 1987 53016 6735 1823 1334 174 294 26 62 32
358 45 59058 60844 44 6746 722 40 57 84 15
363 1198 1260033 2568811 1196 5710 5790 82 55 80 26
384 150 2361175 476024 150 7008 7123 81 38 64 22
387 277 5200 6553 252 243 521 130 65 65 1
392 1877 1607428 2030850 1875 1416 7131 1379 54 66 13
396 8181 1380803 6186 7920 545 798 1743 24 67 39
398 3 5183 5213 2 291 352 3 53 59 3
399 62 51494 59203 61 7073 754 42 55 78 18

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {301..400}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,9 @@
412 2141 1360120 2189792 2136 2491 5658 1371 71 75 5
413 2 2036037 2064917 2 3963 4666 2 43 45 2
431 33 2302331 2348449 33 4425 6516 32 69 69 1
447 59 25125 33094 59 1176 1817 56 53 58 6
456 1 53157 53157 1 1556 1556 1 26 26 1
462 5 5456 6280 5 348 4337 5 28 40 5
472 1 1443716 1443716 1 6122 6122 1 42 42 1
491 34 1066102 1183673 34 6606 6822 32 46 67 15
498 896 2230163 3054 895 537 7131 714 24 59 28

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {401..500}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,14 @@
504 108 12281 25180 108 1318 1784 94 55 66 12
515 22 1588883 2640809 22 6554 6571 15 46 59 12
518 1 37743 37743 1 1558 1558 1 72 72 1
530 1 3033 3033 1 561 561 1 59 59 1
532 26 5721 6355 25 549 665 14 44 50 7
546 156 2577874 48517 156 1105 324 133 44 51 8
554 12 1665194 2640066 12 1817 2951 12 57 57 1
564 3865 2028049 2083433 3722 1115 985 2203 44 84 41
566 4432 50605 57509 3217 1191 267 459 26 72 39
567 8 5221 5893 7 333 558 8 27 35 4
582 1172 1320619 2019743 1172 5819 7131 757 26 63 30
584 43100 2500 5594 22561 134 4573 1660 48 84 37
589 28 6046 6068 19 345 564 27 55 62 8
595 139 1585165 1683606 138 2231 3598 132 54 84 28

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {501..600}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,9 @@
615 3 1056081 1116230 3 5794 5796 2 59 62 3
619 7 1543114 5241 7 2442 3105 7 41 45 3
634 2722 1221058 4999 2686 2426 7131 1735 54 60 7
635 237 2119333 4667 237 561 5999 176 49 60 12
644 5 1774169 2056171 5 5591 6091 4 33 39 3
647 8 51632 64403 8 1457 1624 8 26 34 5
651 1325 1620565 6281 1301 528 792 815 62 63 2
665 13 4598 4789 13 511 558 11 39 46 7
679 1560 1613200 25940 1552 1569 3118 781 49 84 35

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {601..700}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,12 @@
704 2 14226 15594 2 1086 1116 2 65 71 2
715 25 1199352 3490 25 5036 5112 23 34 55 13
716 1253 61989 6735 1050 1203 1625 397 52 65 14
730 2584 5560 6170 634 2421 627 293 56 69 14
736 8 1433153 4941 8 339 4594 8 28 36 5
749 2 1326176 1339862 2 4339 6213 2 49 50 2
753 1 53157 53157 1 1556 1556 1 26 26 1
761 63 1443230 6881 63 3154 3204 26 56 73 14
762 49 1449596 1968154 49 2437 3753 48 54 62 9
775 35107 5330 769436 2471 447 6607 656 70 81 12
789 1 1552458 1552458 1 2441 2441 1 62 62 1
794 158 5585 6585 155 495 929 67 24 50 20

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {701..800}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -0,0 +1,9 @@
839 9 29223 46530 9 1336 1465 9 52 52 1
844 5 2377545 2377635 5 5129 6321 5 53 69 5
846 50 2172273 2589295 50 1582 3053 48 64 68 5
847 2577 56656 63658 1582 1444 838 474 26 63 33
861 1333 5570 6909 839 457 489 37 33 70 34
873 2360 1519811 50487 2248 1310 1784 316 60 68 9
879 228 6704 6785 79 279 507 121 35 66 24
889 5130 2070007 39692 5040 1151 6791 2606 44 66 23
896 4 511246 859452 4 6554 6561 4 67 71 4

View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiquery "
DROP TABLE IF EXISTS test;
CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/4)), round(pow(sipHash64(2, number), 1/6)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/3)), round(pow(sipHash64(2, number), 1/5)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
INSERT INTO test
SELECT round(pow(sipHash64(1, number), 1/5)), round(pow(sipHash64(2, number), 1/7)), round(pow(sipHash64(3, number), 1/10))
FROM numbers(100000);
DETACH TABLE test;
ATTACH TABLE test;
"
for i in {801..900}
do
echo "
WITH ${i} AS try
SELECT try, count(), min(a), max(a), uniqExact(a), min(b), max(b), uniqExact(b), min(c), max(c), uniqExact(c) FROM test
WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1)::String
AND a <= (a1 + round(pow(sipHash64(3, try), 1 / (3 + sipHash64(4, try) % 8))))::String
AND b >= (round(pow(sipHash64(5, try), 1 / (3 + sipHash64(6, try) % 8))) AS b1)::String
AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
HAVING count() > 0;
"
done | ${CLICKHOUSE_CLIENT} --multiquery
${CLICKHOUSE_CLIENT} --multiquery "DROP TABLE test"

View File

@ -1,92 +0,0 @@
MergeTree compact + horizontal merge
test
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None
MergeTree wide + horizontal merge
test
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None
MergeTree compact + vertical merge
test
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None
MergeTree wide + vertical merge
test
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None

View File

@ -1,53 +0,0 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0"
function test()
{
echo "test"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
}
$CH_CLIENT -q "drop table if exists test;"
echo "MergeTree compact + horizontal merge"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree wide + horizontal merge"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree compact + vertical merge"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
test
$CH_CLIENT -q "drop table test;"
echo "MergeTree wide + vertical merge"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
test
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,21 @@
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None

View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0"
$CH_CLIENT -q "drop table if exists test;"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,21 @@
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None

View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0"
$CH_CLIENT -q "drop table if exists test;"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,21 @@
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None

View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0"
$CH_CLIENT -q "drop table if exists test;"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,21 @@
16667 Tuple(a Dynamic(max_types=3)):Date
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):String
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 UInt64:None
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 UInt64:None
16667 Tuple(a Dynamic(max_types=3)):DateTime
33333 Tuple(a Dynamic(max_types=3)):Array(UInt8)
50000 Tuple(a Dynamic(max_types=3)):UInt64
66667 Tuple(a Dynamic(max_types=3)):String
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
133333 Tuple(a Dynamic(max_types=3)):None
50000 Tuple(a Dynamic(max_types=3)):UInt64
100000 Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
100000 UInt64:None
116667 Tuple(a Dynamic(max_types=3)):String
133333 Tuple(a Dynamic(max_types=3)):None

View File

@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Tags: long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# reset --log_comment
CLICKHOUSE_LOG_COMMENT=
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0"
$CH_CLIENT -q "drop table if exists test;"
$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
$CH_CLIENT -q "system stop merges test"
$CH_CLIENT -q "insert into test select number, number from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -nm -q "system start merges test; optimize table test final;"
$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
$CH_CLIENT -q "drop table test;"

View File

@ -0,0 +1,4 @@
CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x;
INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100;
SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0;
SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1;

View File

@ -0,0 +1 @@
SELECT NOT ((SELECT 1))

View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_FORMAT --oneline --query "SELECT NOT((SELECT 1))"

View File

@ -1,6 +1,6 @@
DROP TABLE IF EXISTS hits_none;
CREATE TABLE hits_none (Title String CODEC(NONE)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
INSERT INTO hits_none SELECT Title FROM test.hits;
INSERT INTO hits_none SELECT Title FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16;
SET min_bytes_to_use_mmap_io = 1;
SELECT sum(length(Title)) FROM hits_none;