Merge remote-tracking branch 'blessed/master' into groupArrayIntersect

This commit is contained in:
Raúl Marín 2024-07-24 15:16:02 +02:00
commit 0c0e2d0ffb
236 changed files with 5004 additions and 2875 deletions

View File

@ -241,8 +241,9 @@ jobs:
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !cancelled() }}
needs:
- RunConfig
- Builds_Report
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
@ -257,6 +258,7 @@ jobs:
with:
clear-repository: true
- name: Finish label
if: ${{ !failure() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
@ -264,3 +266,10 @@ jobs:
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -121,34 +121,6 @@ jobs:
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Debug
run: |
echo need with different filters
cat << 'EOF'
${{ toJSON(needs) }}
${{ toJSON(needs.*.result) }}
no failures ${{ !contains(needs.*.result, 'failure') }}
no skips ${{ !contains(needs.*.result, 'skipped') }}
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
EOF
- name: Not ready
# fail the job to be able to restart it
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
run: exit 1
- name: Check out repository code
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
uses: ClickHouse/checkout@v1
- name: Mark Commit Release Ready
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 mark_release_ready.py
FinishCheck:
if: ${{ !cancelled() }}
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
@ -160,3 +132,10 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -93,7 +93,7 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
if: ${{ !cancelled() }}
# Test_2 or Test_3 must not have jobs required for Mergeable check
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
runs-on: [self-hosted, style-checker-aarch64]
@ -101,6 +101,14 @@ jobs:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check and set merge status
if: ${{ needs.StyleCheck.result == 'success' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -44,3 +44,17 @@ jobs:
with:
data: "${{ needs.RunConfig.outputs.data }}"
set_latest: true
CheckWorkflow:
if: ${{ !cancelled() }}
needs: [RunConfig, BuildDockers]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -151,9 +151,10 @@ jobs:
data: ${{ needs.RunConfig.outputs.data }}
CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
# Test_2 or Test_3 must not have jobs required for Mergeable check
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1]
if: ${{ !cancelled() }}
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
# however, set them as "needs" to get all checks results before the automatic merge occurs.
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
@ -161,9 +162,17 @@ jobs:
with:
filter: tree:0
- name: Check and set merge status
if: ${{ needs.StyleCheck.result == 'success' }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status
################################# Stage Final #################################
#

View File

@ -441,8 +441,9 @@ jobs:
runner_type: stress-tester
data: ${{ needs.RunConfig.outputs.data }}
FinishCheck:
if: ${{ !failure() && !cancelled() }}
if: ${{ !cancelled() }}
needs:
- RunConfig
- DockerServerImage
- DockerKeeperImage
- Builds_Report
@ -478,9 +479,18 @@ jobs:
with:
clear-repository: true
- name: Finish label
if: ${{ !failure() }}
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
- name: Check Workflow results
run: |
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
${{ toJson(needs) }}
EOF
python3 ./tests/ci/ci_buddy.py --check-wf-status

View File

@ -14,3 +14,9 @@ rules:
comments:
min-spaces-from-content: 1
document-start: disable
colons: disable
indentation: disable
line-length: disable
trailing-spaces: disable
truthy: disable
new-line-at-end-of-file: disable

View File

@ -2,11 +2,11 @@
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54488)
SET(VERSION_REVISION 54489)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 7)
SET(VERSION_MINOR 8)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9)
SET(VERSION_DESCRIBE v24.7.1.1-testing)
SET(VERSION_STRING 24.7.1.1)
SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
SET(VERSION_DESCRIBE v24.8.1.1-testing)
SET(VERSION_STRING 24.8.1.1)
# end of autochange

View File

@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
add_library(_datasketches INTERFACE)
target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")
add_library(ch_contrib::datasketches ALIAS _datasketches)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2
Subproject commit fe854449e24bedfa26e38465b84374312dbd587f

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update --yes \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

View File

@ -191,8 +191,8 @@ else
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
else
@ -200,7 +200,7 @@ else
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
fi
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
fi
clickhouse-client --query "SHOW TABLES FROM test"

View File

@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"

View File

@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
- `count_min`
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:

View File

@ -49,7 +49,7 @@ enum class QueryTreeNodeType : uint8_t
/// Convert query tree node type to string
const char * toString(QueryTreeNodeType type);
/** Query tree is semantical representation of query.
/** Query tree is a semantic representation of query.
* Query tree node represent node in query tree.
* IQueryTreeNode is base class for all query tree nodes.
*

View File

@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS
auto new_query = query.clone();
adjustCreateQueryForBackup(new_query, global_context);
ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*new_query);
create.setUUID({});
create.resetUUIDs();
create.if_not_exists = false;
return new_query;
};

View File

@ -1,4 +1,5 @@
#include <Backups/RestoreCoordinationLocal.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Common/logger_useful.h>
@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
auto it = create_query_uuids.find(query_str);
if (it != create_query_uuids.end())
{
create_query.setUUID(it->second);
it->second.copyToQuery(create_query);
return true;
}
return false;
@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
return;
}
auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true);
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
new_uuids.copyToQuery(create_query);
{
std::lock_guard lock{mutex};

View File

@ -1,16 +1,17 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Common/Logger.h>
#include <mutex>
#include <set>
#include <unordered_set>
namespace Poco { class Logger; }
namespace DB
{
class ASTCreateQuery;
/// Implementation of the IRestoreCoordination interface performing coordination in memory.
class RestoreCoordinationLocal : public IRestoreCoordination
@ -55,7 +56,7 @@ private:
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
std::unordered_map<String, ASTCreateQuery::UUIDs> create_query_uuids;
std::unordered_map<String, CreateQueryUUIDs> create_query_uuids;
std::unordered_set<String /* root_zk_path */> acquired_data_in_keeper_map_tables;
mutable std::mutex mutex;

View File

@ -3,6 +3,7 @@
#include <Backups/RestoreCoordinationRemote.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Parsers/formatAST.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Common/ZooKeeper/KeeperException.h>
@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String &
void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
{
String query_str = serializeAST(create_query);
String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString();
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
String new_uuids_str = new_uuids.toString();
auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable");
holder.retries_ctl.retryLoop(
@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que
Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent);
if (res == Coordination::Error::ZOK)
{
new_uuids.copyToQuery(create_query);
return;
}
if (res == Coordination::Error::ZNODEEXISTS)
{
create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path)));
CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query);
return;
}

View File

@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx)
endif()
if (TARGET ch_contrib::datasketches)
target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
endif ()
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)

View File

@ -11,6 +11,7 @@
#include <base/cgroupsv2.h>
#include <base/getMemoryAmount.h>
#include <base/sleep.h>
#include <fmt/ranges.h>
#include <cstdint>
#include <filesystem>
@ -45,26 +46,33 @@ namespace
/// kernel 5
/// rss 15
/// [...]
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
using Metrics = std::map<std::string, uint64_t>;
Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf)
{
Metrics metrics;
while (!buf.eof())
{
std::string current_key;
readStringUntilWhitespace(current_key, buf);
if (current_key != key)
{
std::string dummy;
readStringUntilNewlineInto(dummy, buf);
buf.ignore();
continue;
}
assertChar(' ', buf);
uint64_t value = 0;
readIntText(value, buf);
return value;
}
assertChar('\n', buf);
auto [_, inserted] = metrics.emplace(std::move(current_key), value);
chassert(inserted, "Duplicate keys in stat file");
}
return metrics;
}
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
{
const auto all_metrics = readAllMetricsFromStatFile(buf);
if (const auto it = all_metrics.find(key); it != all_metrics.end())
return it->second;
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName());
}
@ -79,6 +87,13 @@ struct CgroupsV1Reader : ICgroupsReader
return readMetricFromStatFile(buf, "rss");
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(buf));
}
private:
std::mutex mutex;
ReadBufferFromFile buf TSA_GUARDED_BY(mutex);
@ -106,6 +121,13 @@ struct CgroupsV2Reader : ICgroupsReader
return mem_usage;
}
std::string dumpAllStats() override
{
std::lock_guard lock(mutex);
stat_buf.rewind();
return fmt::format("{}", readAllMetricsFromStatFile(stat_buf));
}
private:
std::mutex mutex;
ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex);
@ -178,10 +200,7 @@ CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait
{
const auto [cgroup_path, version] = getCgroupsPath();
if (version == CgroupsVersion::V2)
cgroup_reader = std::make_unique<CgroupsV2Reader>(cgroup_path);
else
cgroup_reader = std::make_unique<CgroupsV1Reader>(cgroup_path);
cgroup_reader = createCgroupsReader(version, cgroup_path);
LOG_INFO(
log,
@ -234,7 +253,12 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint
# endif
/// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them.
uint64_t memory_usage = cgroup_reader->readMemoryUsage();
LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage));
LOG_TRACE(
log,
"Read current memory usage {} bytes ({}) from cgroups, full available stats: {}",
memory_usage,
ReadableSize(memory_usage),
cgroup_reader->dumpAllStats());
MemoryTracker::setRSS(memory_usage, 0);
LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage));
@ -338,6 +362,13 @@ void CgroupsMemoryUsageObserver::runThread()
}
}
std::unique_ptr<ICgroupsReader> createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const fs::path & cgroup_path)
{
if (version == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
return std::make_unique<CgroupsV2Reader>(cgroup_path);
else
return std::make_unique<CgroupsV1Reader>(cgroup_path);
}
}
#endif

View File

@ -14,6 +14,8 @@ struct ICgroupsReader
virtual ~ICgroupsReader() = default;
virtual uint64_t readMemoryUsage() = 0;
virtual std::string dumpAllStats() = 0;
};
/// Does two things:
@ -81,6 +83,9 @@ private:
bool quit = false;
};
std::unique_ptr<ICgroupsReader>
createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const std::filesystem::path & cgroup_path);
#else
class CgroupsMemoryUsageObserver
{

View File

@ -57,7 +57,8 @@ static struct InitFiu
PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \
PAUSEABLE(dummy_pausable_failpoint) \
ONCE(execute_query_calling_empty_set_result_func_on_exception) \
ONCE(receive_timeout_on_table_status_response)
ONCE(receive_timeout_on_table_status_response) \
REGULAR(keepermap_fail_drop_data) \
namespace FailPoints

View File

@ -0,0 +1,178 @@
#if defined(OS_LINUX)
#include <gtest/gtest.h>
#include <cstdint>
#include <filesystem>
#include <IO/WriteBufferFromFile.h>
#include <Common/CgroupsMemoryUsageObserver.h>
#include <Common/filesystemHelpers.h>
using namespace DB;
const std::string SAMPLE_FILE[2] = {
R"(cache 4673703936
rss 2232029184
rss_huge 0
shmem 0
mapped_file 344678400
dirty 4730880
writeback 135168
swap 0
pgpgin 2038569918
pgpgout 2036883790
pgfault 2055373287
pgmajfault 0
inactive_anon 2156335104
active_anon 0
inactive_file 2841305088
active_file 1653915648
unevictable 256008192
hierarchical_memory_limit 8589934592
hierarchical_memsw_limit 8589934592
total_cache 4673703936
total_rss 2232029184
total_rss_huge 0
total_shmem 0
total_mapped_file 344678400
total_dirty 4730880
total_writeback 135168
total_swap 0
total_pgpgin 2038569918
total_pgpgout 2036883790
total_pgfault 2055373287
total_pgmajfault 0
total_inactive_anon 2156335104
total_active_anon 0
total_inactive_file 2841305088
total_active_file 1653915648
total_unevictable 256008192
)",
R"(anon 10429399040
file 17410793472
kernel 1537789952
kernel_stack 3833856
pagetables 65441792
sec_pagetables 0
percpu 15232
sock 0
vmalloc 0
shmem 0
zswap 0
zswapped 0
file_mapped 344010752
file_dirty 2060857344
file_writeback 0
swapcached 0
anon_thp 0
file_thp 0
shmem_thp 0
inactive_anon 0
active_anon 10429370368
inactive_file 8693084160
active_file 8717561856
unevictable 0
slab_reclaimable 1460982504
slab_unreclaimable 5152864
slab 1466135368
workingset_refault_anon 0
workingset_refault_file 0
workingset_activate_anon 0
workingset_activate_file 0
workingset_restore_anon 0
workingset_restore_file 0
workingset_nodereclaim 0
pgscan 0
pgsteal 0
pgscan_kswapd 0
pgscan_direct 0
pgscan_khugepaged 0
pgsteal_kswapd 0
pgsteal_direct 0
pgsteal_khugepaged 0
pgfault 43026352
pgmajfault 36762
pgrefill 0
pgactivate 0
pgdeactivate 0
pglazyfree 259
pglazyfreed 0
zswpin 0
zswpout 0
thp_fault_alloc 0
thp_collapse_alloc 0
)"};
const std::string EXPECTED[2]
= {"{\"active_anon\": 0, \"active_file\": 1653915648, \"cache\": 4673703936, \"dirty\": 4730880, \"hierarchical_memory_limit\": "
"8589934592, \"hierarchical_memsw_limit\": 8589934592, \"inactive_anon\": 2156335104, \"inactive_file\": 2841305088, "
"\"mapped_file\": 344678400, \"pgfault\": 2055373287, \"pgmajfault\": 0, \"pgpgin\": 2038569918, \"pgpgout\": 2036883790, \"rss\": "
"2232029184, \"rss_huge\": 0, \"shmem\": 0, \"swap\": 0, \"total_active_anon\": 0, \"total_active_file\": 1653915648, "
"\"total_cache\": 4673703936, \"total_dirty\": 4730880, \"total_inactive_anon\": 2156335104, \"total_inactive_file\": 2841305088, "
"\"total_mapped_file\": 344678400, \"total_pgfault\": 2055373287, \"total_pgmajfault\": 0, \"total_pgpgin\": 2038569918, "
"\"total_pgpgout\": 2036883790, \"total_rss\": 2232029184, \"total_rss_huge\": 0, \"total_shmem\": 0, \"total_swap\": 0, "
"\"total_unevictable\": 256008192, \"total_writeback\": 135168, \"unevictable\": 256008192, \"writeback\": 135168}",
"{\"active_anon\": 10429370368, \"active_file\": 8717561856, \"anon\": 10429399040, \"anon_thp\": 0, \"file\": 17410793472, "
"\"file_dirty\": 2060857344, \"file_mapped\": 344010752, \"file_thp\": 0, \"file_writeback\": 0, \"inactive_anon\": 0, "
"\"inactive_file\": 8693084160, \"kernel\": 1537789952, \"kernel_stack\": 3833856, \"pagetables\": 65441792, \"percpu\": 15232, "
"\"pgactivate\": 0, \"pgdeactivate\": 0, \"pgfault\": 43026352, \"pglazyfree\": 259, \"pglazyfreed\": 0, \"pgmajfault\": 36762, "
"\"pgrefill\": 0, \"pgscan\": 0, \"pgscan_direct\": 0, \"pgscan_khugepaged\": 0, \"pgscan_kswapd\": 0, \"pgsteal\": 0, "
"\"pgsteal_direct\": 0, \"pgsteal_khugepaged\": 0, \"pgsteal_kswapd\": 0, \"sec_pagetables\": 0, \"shmem\": 0, \"shmem_thp\": 0, "
"\"slab\": 1466135368, \"slab_reclaimable\": 1460982504, \"slab_unreclaimable\": 5152864, \"sock\": 0, \"swapcached\": 0, "
"\"thp_collapse_alloc\": 0, \"thp_fault_alloc\": 0, \"unevictable\": 0, \"vmalloc\": 0, \"workingset_activate_anon\": 0, "
"\"workingset_activate_file\": 0, \"workingset_nodereclaim\": 0, \"workingset_refault_anon\": 0, \"workingset_refault_file\": 0, "
"\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"};
class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<CgroupsMemoryUsageObserver::CgroupsVersion>
{
void SetUp() override
{
const uint8_t version = static_cast<uint8_t>(GetParam());
tmp_dir = fmt::format("./test_cgroups_{}", magic_enum::enum_name(GetParam()));
fs::create_directories(tmp_dir);
auto stat_file = WriteBufferFromFile(tmp_dir + "/memory.stat");
stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size());
stat_file.sync();
if (GetParam() == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
{
auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current");
current_file.write("29645422592", 11);
current_file.sync();
}
}
protected:
std::string tmp_dir;
};
TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest)
{
const auto version = GetParam();
auto reader = createCgroupsReader(version, tmp_dir);
ASSERT_EQ(
reader->readMemoryUsage(),
version == CgroupsMemoryUsageObserver::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184
: /* value from memory.current - inactive_file */ 20952338432);
}
TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest)
{
const auto version = GetParam();
auto reader = createCgroupsReader(version, tmp_dir);
ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast<uint8_t>(version)]);
}
INSTANTIATE_TEST_SUITE_P(
CgroupsMemoryUsageObserverTests,
CgroupsMemoryUsageObserverFixture,
::testing::Values(CgroupsMemoryUsageObserver::CgroupsVersion::V1, CgroupsMemoryUsageObserver::CgroupsVersion::V2));
#endif

View File

@ -33,7 +33,7 @@ size_t toMilliseconds(auto duration)
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
}
const auto epsilon = 500us;
const auto epsilon = 1ms;
class ResolvePoolMock : public DB::HostResolver
{
@ -358,53 +358,59 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses
TEST_F(ResolvePoolTest, BannedForConsiquenceFail)
{
auto history = 5ms;
auto history = 10ms;
auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr));
auto start_at = now();
failed_addr.setFail();
auto start_at = now();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
start_at = now();
resolver->update();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
failed_addr.setFail();
start_at = now();
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
start_at = now();
resolver->update();
// too much time has passed
if (now() > start_at + 2*history - epsilon)
return;
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
// ip still banned adter history_ms + update, because it was his second consiquent fail
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + 2*history - epsilon);
}
TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
{
auto history = 5ms;
auto history = 10ms;
auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr));
auto start_at = now();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
failed_addr.setFail();
auto start_at = now();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
@ -413,6 +419,7 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
sleep_until(start_at + history + epsilon);
resolver->update();
// ip is cleared after just 1 history_ms interval.
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));

View File

@ -383,7 +383,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
LockMemoryExceptionInThread::removeUniqueLock();
};
asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores();
/// At least 16 threads for network communication in asio.
/// asio is async framework, so even with 1 thread it should be ok, but
/// still as safeguard it's better to have some redundant capacity here
asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores());
if (state_manager->isSecure())
{

View File

@ -125,23 +125,6 @@ DataTypePtr DataTypeFactory::getImpl(const String & family_name_param, const AST
{
String family_name = getAliasToOrName(family_name_param);
if (endsWith(family_name, "WithDictionary"))
{
ASTPtr low_cardinality_params = std::make_shared<ASTExpressionList>();
String param_name = family_name.substr(0, family_name.size() - strlen("WithDictionary"));
if (parameters)
{
auto func = std::make_shared<ASTFunction>();
func->name = param_name;
func->arguments = parameters;
low_cardinality_params->children.push_back(func);
}
else
low_cardinality_params->children.push_back(std::make_shared<ASTIdentifier>(param_name));
return getImpl<nullptr_on_error>("LowCardinality", low_cardinality_params);
}
const auto * creator = findCreatorByName<nullptr_on_error>(family_name);
if constexpr (nullptr_on_error)
{

View File

@ -80,13 +80,20 @@ namespace
/// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
void visitCreateQuery(const ASTCreateQuery & create)
{
QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name};
if (!to_table.table.empty())
if (create.targets)
{
/// TO target_table (for materialized views)
if (to_table.database.empty())
to_table.database = current_database;
dependencies.emplace(to_table);
for (const auto & target : create.targets->targets)
{
const auto & table_id = target.table_id;
if (!table_id.table_name.empty())
{
/// TO target_table (for materialized views)
QualifiedTableName target_name{table_id.database_name, table_id.table_name};
if (target_name.database.empty())
target_name.database = current_database;
dependencies.emplace(target_name);
}
}
}
QualifiedTableName as_table{create.as_database, create.as_table};

View File

@ -86,12 +86,19 @@ namespace
create.as_table = as_table_new.table;
}
QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name};
if (!to_table.table.empty() && !to_table.database.empty())
if (create.targets)
{
auto to_table_new = data.renaming_map.getNewTableName(to_table);
if (to_table_new != to_table)
create.to_table_id = StorageID{to_table_new.database, to_table_new.table};
for (auto & target : create.targets->targets)
{
auto & table_id = target.table_id;
if (!table_id.database_name.empty() && !table_id.table_name.empty())
{
QualifiedTableName target_name{table_id.database_name, table_id.table_name};
auto new_target_name = data.renaming_map.getNewTableName(target_name);
if (new_target_name != target_name)
table_id = StorageID{new_target_name.database, new_target_name.table};
}
}
}
}

View File

@ -729,81 +729,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
if (auto * create = query->as<ASTCreateQuery>())
{
bool replicated_table = create->storage && create->storage->engine &&
(startsWith(create->storage->engine->name, "Replicated") || startsWith(create->storage->engine->name, "Shared"));
if (!replicated_table || !create->storage->engine->arguments)
return;
if (create->storage)
checkTableEngine(*create, *create->storage, query_context);
ASTs & args_ref = create->storage->engine->arguments->children;
ASTs args = args_ref;
if (args.size() < 2)
return;
/// It can be a constant expression. Try to evaluate it, ignore exception if we cannot.
bool has_expression_argument = args_ref[0]->as<ASTFunction>() || args_ref[1]->as<ASTFunction>();
if (has_expression_argument)
if (create->targets)
{
try
{
args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context);
args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context);
}
catch (...) // NOLINT(bugprone-empty-catch)
{
}
for (const auto & inner_table_engine : create->targets->getInnerEngines())
checkTableEngine(*create, *inner_table_engine, query_context);
}
ASTLiteral * arg1 = args[0]->as<ASTLiteral>();
ASTLiteral * arg2 = args[1]->as<ASTLiteral>();
if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String)
return;
String maybe_path = arg1->value.get<String>();
String maybe_replica = arg2->value.get<String>();
/// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments.
/// Let's ensure that some macros are used.
/// NOTE: we cannot check here that substituted values will be actually different on shards and replicas.
Macros::MacroExpansionInfo info;
info.table_id = {getDatabaseName(), create->getTable(), create->uuid};
info.shard = getShardName();
info.replica = getReplicaName();
query_context->getMacros()->expand(maybe_path, info);
bool maybe_shard_macros = info.expanded_other;
info.expanded_other = false;
query_context->getMacros()->expand(maybe_replica, info);
bool maybe_replica_macros = info.expanded_other;
bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros");
if (!enable_functional_tests_helper)
{
if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments)
LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments");
else
throw Exception(ErrorCodes::INCORRECT_QUERY,
"It's not allowed to specify explicit zookeeper_path and replica_name "
"for ReplicatedMergeTree arguments in Replicated database. If you really want to "
"specify them explicitly, enable setting "
"database_replicated_allow_replicated_engine_arguments.");
}
if (maybe_shard_macros && maybe_replica_macros)
return;
if (enable_functional_tests_helper && !has_expression_argument)
{
if (maybe_path.empty() || maybe_path.back() != '/')
maybe_path += '/';
args_ref[0]->as<ASTLiteral>()->value = maybe_path + "auto_{shard}";
args_ref[1]->as<ASTLiteral>()->value = maybe_replica + "auto_{replica}";
return;
}
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. "
"If you really want to specify it explicitly, then you should use some macros "
"to distinguish different shards and replicas");
}
}
@ -827,6 +760,85 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
}
}
void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const
{
bool replicated_table = storage.engine &&
(startsWith(storage.engine->name, "Replicated") || startsWith(storage.engine->name, "Shared"));
if (!replicated_table || !storage.engine->arguments)
return;
ASTs & args_ref = storage.engine->arguments->children;
ASTs args = args_ref;
if (args.size() < 2)
return;
/// It can be a constant expression. Try to evaluate it, ignore exception if we cannot.
bool has_expression_argument = args_ref[0]->as<ASTFunction>() || args_ref[1]->as<ASTFunction>();
if (has_expression_argument)
{
try
{
args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context);
args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context);
}
catch (...) // NOLINT(bugprone-empty-catch)
{
}
}
ASTLiteral * arg1 = args[0]->as<ASTLiteral>();
ASTLiteral * arg2 = args[1]->as<ASTLiteral>();
if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String)
return;
String maybe_path = arg1->value.get<String>();
String maybe_replica = arg2->value.get<String>();
/// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments.
/// Let's ensure that some macros are used.
/// NOTE: we cannot check here that substituted values will be actually different on shards and replicas.
Macros::MacroExpansionInfo info;
info.table_id = {getDatabaseName(), query.getTable(), query.uuid};
info.shard = getShardName();
info.replica = getReplicaName();
query_context->getMacros()->expand(maybe_path, info);
bool maybe_shard_macros = info.expanded_other;
info.expanded_other = false;
query_context->getMacros()->expand(maybe_replica, info);
bool maybe_replica_macros = info.expanded_other;
bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros");
if (!enable_functional_tests_helper)
{
if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments)
LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments");
else
throw Exception(ErrorCodes::INCORRECT_QUERY,
"It's not allowed to specify explicit zookeeper_path and replica_name "
"for ReplicatedMergeTree arguments in Replicated database. If you really want to "
"specify them explicitly, enable setting "
"database_replicated_allow_replicated_engine_arguments.");
}
if (maybe_shard_macros && maybe_replica_macros)
return;
if (enable_functional_tests_helper && !has_expression_argument)
{
if (maybe_path.empty() || maybe_path.back() != '/')
maybe_path += '/';
args_ref[0]->as<ASTLiteral>()->value = maybe_path + "auto_{shard}";
args_ref[1]->as<ASTLiteral>()->value = maybe_replica + "auto_{replica}";
return;
}
throw Exception(ErrorCodes::INCORRECT_QUERY,
"Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. "
"If you really want to specify it explicitly, then you should use some macros "
"to distinguish different shards and replicas");
}
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags)
{
waitDatabaseStarted();
@ -1312,11 +1324,9 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query);
bool is_materialized_view_with_inner_table = create.is_materialized_view && create.to_table_id.empty();
create.setDatabase(getDatabaseName());
create.setTable(unescapeForFileName(node_name));
create.attach = is_materialized_view_with_inner_table;
create.attach = create.is_materialized_view_with_inner_table();
return ast;
}

View File

@ -107,6 +107,7 @@ private:
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const;
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr);

View File

@ -739,7 +739,8 @@ public:
{
NumberType value;
tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error);
if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error))
return false;
auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
col_vec.insertValue(value);
return true;

View File

@ -5,11 +5,12 @@ namespace DB
{
namespace
{
struct AcoshName
{
static constexpr auto name = "acosh";
};
using FunctionAcosh = FunctionMathUnary<UnaryFunctionVectorized<AcoshName, acosh>>;
struct AcoshName
{
static constexpr auto name = "acosh";
};
using FunctionAcosh = FunctionMathUnary<UnaryFunctionVectorized<AcoshName, acosh>>;
}

View File

@ -6,6 +6,7 @@ namespace DB
{
using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval<AddMicrosecondsImpl>;
REGISTER_FUNCTION(AddMicroseconds)
{
factory.registerFunction<FunctionAddMicroseconds>();

View File

@ -6,6 +6,7 @@ namespace DB
{
using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval<AddMillisecondsImpl>;
REGISTER_FUNCTION(AddMilliseconds)
{
factory.registerFunction<FunctionAddMilliseconds>();

View File

@ -6,6 +6,7 @@ namespace DB
{
using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval<AddNanosecondsImpl>;
REGISTER_FUNCTION(AddNanoseconds)
{
factory.registerFunction<FunctionAddNanoseconds>();

View File

@ -7,7 +7,6 @@
namespace DB
{
namespace
{

View File

@ -57,7 +57,7 @@ private:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & column = arguments[0].column;
const auto & column_char = arguments[1].column;
@ -80,14 +80,13 @@ private:
auto & dst_data = col_res->getChars();
auto & dst_offsets = col_res->getOffsets();
const auto size = src_offsets.size();
dst_data.resize(src_data.size() + size);
dst_offsets.resize(size);
dst_data.resize(src_data.size() + input_rows_count);
dst_offsets.resize(input_rows_count);
ColumnString::Offset src_offset{};
ColumnString::Offset dst_offset{};
for (const auto i : collections::range(0, size))
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto src_length = src_offsets[i] - src_offset;
memcpySmallAllowReadWriteOverflow15(&dst_data[dst_offset], &src_data[src_offset], src_length);

View File

@ -45,9 +45,7 @@ struct AsciiImpl
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
res[i] = doAscii(data, i * n, n);
}
}
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)

View File

@ -5,11 +5,12 @@ namespace DB
{
namespace
{
struct AsinhName
{
static constexpr auto name = "asinh";
};
using FunctionAsinh = FunctionMathUnary<UnaryFunctionVectorized<AsinhName, asinh>>;
struct AsinhName
{
static constexpr auto name = "asinh";
};
using FunctionAsinh = FunctionMathUnary<UnaryFunctionVectorized<AsinhName, asinh>>;
}

View File

@ -5,11 +5,12 @@ namespace DB
{
namespace
{
struct Atan2Name
{
static constexpr auto name = "atan2";
};
using FunctionAtan2 = FunctionMathBinaryFloat64<BinaryFunctionVectorized<Atan2Name, atan2>>;
struct Atan2Name
{
static constexpr auto name = "atan2";
};
using FunctionAtan2 = FunctionMathBinaryFloat64<BinaryFunctionVectorized<Atan2Name, atan2>>;
}

View File

@ -5,11 +5,12 @@ namespace DB
{
namespace
{
struct AtanhName
{
static constexpr auto name = "atanh";
};
using FunctionAtanh = FunctionMathUnary<UnaryFunctionVectorized<AtanhName, atanh>>;
struct AtanhName
{
static constexpr auto name = "atanh";
};
using FunctionAtanh = FunctionMathUnary<UnaryFunctionVectorized<AtanhName, atanh>>;
}

View File

@ -3,8 +3,10 @@
namespace DB
{
REGISTER_FUNCTION(Base58Encode)
{
factory.registerFunction<FunctionBase58Conversion<Base58Encode>>();
}
}

View File

@ -5,6 +5,7 @@
namespace DB
{
REGISTER_FUNCTION(Base64Decode)
{
FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)";
@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Decode)
/// MySQL compatibility alias.
factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::Case::Insensitive);
}
}
#endif

View File

@ -5,6 +5,7 @@
namespace DB
{
REGISTER_FUNCTION(Base64Encode)
{
FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)";
@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Encode)
/// MySQL compatibility alias.
factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::Case::Insensitive);
}
}
#endif

View File

@ -5,6 +5,7 @@
namespace DB
{
REGISTER_FUNCTION(Base64URLDecode)
{
FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLDecode)
factory.registerFunction<FunctionBase64Conversion<Base64Decode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
}
}
#endif

View File

@ -5,6 +5,7 @@
namespace DB
{
REGISTER_FUNCTION(Base64URLEncode)
{
FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLEncode)
factory.registerFunction<FunctionBase64Conversion<Base64Encode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
}
}
#endif

View File

@ -67,11 +67,11 @@ public:
const IColumn * column = arguments[arg_num].column.get();
if (arg_num == 0)
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
vec_res[row_num] = column->byteSizeAt(row_num);
for (size_t row = 0; row < input_rows_count; ++row)
vec_res[row] = column->byteSizeAt(row);
else
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
vec_res[row_num] += column->byteSizeAt(row_num);
for (size_t row = 0; row < input_rows_count; ++row)
vec_res[row] += column->byteSizeAt(row);
}
return result_col;

View File

@ -10,6 +10,7 @@ extern const int NOT_IMPLEMENTED;
namespace
{
template <typename T>
requires std::is_integral_v<T>
T byteSwap(T x)

View File

@ -98,8 +98,7 @@ public:
/// Execute transform.
ColumnsWithTypeAndName transform_args{args.front(), src_array_col, dst_array_col, args.back()};
return FunctionFactory::instance().get("transform", context)->build(transform_args)
->execute(transform_args, result_type, input_rows_count);
return FunctionFactory::instance().get("transform", context)->build(transform_args)->execute(transform_args, result_type, input_rows_count);
}
private:

View File

@ -88,7 +88,8 @@ private:
static void convert(const String & from_charset, const String & to_charset,
const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets,
ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets)
ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets,
size_t input_rows_count)
{
auto converter_from = getConverter(from_charset);
auto converter_to = getConverter(to_charset);
@ -96,12 +97,11 @@ private:
ColumnString::Offset current_from_offset = 0;
ColumnString::Offset current_to_offset = 0;
size_t size = from_offsets.size();
to_offsets.resize(size);
to_offsets.resize(input_rows_count);
PODArray<UChar> uchars;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
size_t from_string_size = from_offsets[i] - current_from_offset - 1;
@ -184,7 +184,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const ColumnWithTypeAndName & arg_from = arguments[0];
const ColumnWithTypeAndName & arg_charset_from = arguments[1];
@ -204,7 +204,7 @@ public:
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arg_from.column.get()))
{
auto col_to = ColumnString::create();
convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets());
convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets(), input_rows_count);
return col_to;
}
else

View File

@ -5,11 +5,12 @@ namespace DB
{
namespace
{
struct CoshName
{
static constexpr auto name = "cosh";
};
using FunctionCosh = FunctionMathUnary<UnaryFunctionVectorized<CoshName, cosh>>;
struct CoshName
{
static constexpr auto name = "cosh";
};
using FunctionCosh = FunctionMathUnary<UnaryFunctionVectorized<CoshName, cosh>>;
}

View File

@ -13,8 +13,7 @@ struct NameCountSubstringsCaseInsensitiveUTF8
static constexpr auto name = "countSubstringsCaseInsensitiveUTF8";
};
using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch<
CountSubstringsImpl<NameCountSubstringsCaseInsensitiveUTF8, PositionCaseInsensitiveUTF8>>;
using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch<CountSubstringsImpl<NameCountSubstringsCaseInsensitiveUTF8, PositionCaseInsensitiveUTF8>>;
}

View File

@ -109,14 +109,14 @@ public:
ColumnPtr executeImpl(
const ColumnsWithTypeAndName & arguments,
const DataTypePtr & result_type,
[[maybe_unused]] size_t input_rows_count) const override
size_t input_rows_count) const override
{
ColumnPtr res;
if (!((res = executeType<DataTypeDate>(arguments, result_type))
|| (res = executeType<DataTypeDate32>(arguments, result_type))
|| (res = executeType<DataTypeDateTime>(arguments, result_type))
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
if (!((res = executeType<DataTypeDate>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDate32>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDateTime>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDateTime64>(arguments, result_type, input_rows_count))))
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of function {}, must be Date or DateTime.",
@ -127,7 +127,7 @@ public:
}
template <typename DataType>
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
{
auto * times = checkAndGetColumn<typename DataType::ColumnType>(arguments[1].column.get());
if (!times)
@ -144,7 +144,7 @@ public:
String date_part = date_part_column->getValue<String>();
const DateLUTImpl * time_zone_tmp;
if (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
if constexpr (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1);
else
time_zone_tmp = &DateLUT::instance();
@ -175,7 +175,7 @@ public:
using TimeType = DateTypeToTimeType<DataType>;
callOnDatePartWriter<TimeType>(date_part, [&](const auto & writer)
{
for (size_t i = 0; i < times_data.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
{

View File

@ -7,18 +7,20 @@ namespace DB
{
namespace
{
struct DegreesName
{
static constexpr auto name = "degrees";
};
Float64 degrees(Float64 r)
{
Float64 degrees = r * (180 / M_PI);
return degrees;
}
struct DegreesName
{
static constexpr auto name = "degrees";
};
Float64 degrees(Float64 r)
{
Float64 degrees = r * (180 / M_PI);
return degrees;
}
using FunctionDegrees = FunctionMathUnary<UnaryFunctionVectorized<DegreesName, degrees>>;
using FunctionDegrees = FunctionMathUnary<UnaryFunctionVectorized<DegreesName, degrees>>;
}
REGISTER_FUNCTION(Degrees)

View File

@ -91,7 +91,7 @@ public:
auto col_res = ColumnVector<UInt64>::create(col_str->size());
auto & data = col_res->getData();
for (size_t i = 0; i < col_str->size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
auto disk_name = col_str->getDataAt(i).toString();
if (auto it = disk_map.find(disk_name); it != disk_map.end())

View File

@ -848,7 +848,7 @@ public:
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
ColumnPtr res;
if constexpr (support_integer == SupportInteger::Yes)
@ -862,17 +862,17 @@ public:
if (!castType(arguments[0].type.get(), [&](const auto & type)
{
using FromDataType = std::decay_t<decltype(type)>;
if (!(res = executeType<FromDataType>(arguments, result_type)))
if (!(res = executeType<FromDataType>(arguments, result_type, input_rows_count)))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.",
arguments[0].column->getName(), getName());
return true;
}))
{
if (!((res = executeType<DataTypeDate>(arguments, result_type))
|| (res = executeType<DataTypeDate32>(arguments, result_type))
|| (res = executeType<DataTypeDateTime>(arguments, result_type))
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
if (!((res = executeType<DataTypeDate>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDate32>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDateTime>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDateTime64>(arguments, result_type, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of function {}, must be Integer or DateTime.",
arguments[0].column->getName(), getName());
@ -881,10 +881,10 @@ public:
}
else
{
if (!((res = executeType<DataTypeDate>(arguments, result_type))
|| (res = executeType<DataTypeDate32>(arguments, result_type))
|| (res = executeType<DataTypeDateTime>(arguments, result_type))
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
if (!((res = executeType<DataTypeDate>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDate32>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDateTime>(arguments, result_type, input_rows_count))
|| (res = executeType<DataTypeDateTime64>(arguments, result_type, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of function {}, must be Date or DateTime.",
arguments[0].column->getName(), getName());
@ -894,7 +894,7 @@ public:
}
template <typename DataType>
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
{
auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst();
auto * times = checkAndGetColumn<typename DataType::ColumnType>(non_const_datetime.get());
@ -955,13 +955,11 @@ public:
else
time_zone = &DateLUT::instance();
const auto & vec = times->getData();
auto col_res = ColumnString::create();
auto & res_data = col_res->getChars();
auto & res_offsets = col_res->getOffsets();
res_data.resize(vec.size() * (out_template_size + 1));
res_offsets.resize(vec.size());
res_data.resize(input_rows_count * (out_template_size + 1));
res_offsets.resize(input_rows_count);
if constexpr (format_syntax == FormatSyntax::MySQL)
{
@ -990,9 +988,11 @@ public:
}
}
const auto & vec = times->getData();
auto * begin = reinterpret_cast<char *>(res_data.data());
auto * pos = begin;
for (size_t i = 0; i < vec.size(); ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
if (!const_time_zone_column && arguments.size() > 2)
{

View File

@ -75,7 +75,7 @@ public:
if (const ColumnString * col_query_string = checkAndGetColumn<ColumnString>(col_query.get()))
{
auto col_res = ColumnString::create();
formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map);
formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map, input_rows_count);
if (error_handling == ErrorHandling::Null)
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
@ -92,16 +92,16 @@ private:
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets,
ColumnUInt8::MutablePtr & res_null_map) const
ColumnUInt8::MutablePtr & res_null_map,
size_t input_rows_count) const
{
const size_t size = offsets.size();
res_offsets.resize(size);
res_offsets.resize(input_rows_count);
res_data.resize(data.size());
size_t prev_offset = 0;
size_t res_data_size = 0;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const char * begin = reinterpret_cast<const char *>(&data[prev_offset]);
const char * end = begin + offsets[i] - prev_offset - 1;

View File

@ -55,19 +55,19 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
ColumnPtr res;
if (!((res = executeType<UInt8>(arguments))
|| (res = executeType<UInt16>(arguments))
|| (res = executeType<UInt32>(arguments))
|| (res = executeType<UInt64>(arguments))
|| (res = executeType<Int8>(arguments))
|| (res = executeType<Int16>(arguments))
|| (res = executeType<Int32>(arguments))
|| (res = executeType<Int64>(arguments))
|| (res = executeType<Float32>(arguments))
|| (res = executeType<Float64>(arguments))))
if (!((res = executeType<UInt8>(arguments, input_rows_count))
|| (res = executeType<UInt16>(arguments, input_rows_count))
|| (res = executeType<UInt32>(arguments, input_rows_count))
|| (res = executeType<UInt64>(arguments, input_rows_count))
|| (res = executeType<Int8>(arguments, input_rows_count))
|| (res = executeType<Int16>(arguments, input_rows_count))
|| (res = executeType<Int32>(arguments, input_rows_count))
|| (res = executeType<Int64>(arguments, input_rows_count))
|| (res = executeType<Float32>(arguments, input_rows_count))
|| (res = executeType<Float64>(arguments, input_rows_count))))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
@ -76,7 +76,7 @@ public:
private:
template <typename T>
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
{
if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get()))
{
@ -85,13 +85,12 @@ private:
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
ColumnString::Chars & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
size_t size = vec_from.size();
data_to.resize(size * 2);
offsets_to.resize(size);
data_to.resize(input_rows_count * 2);
offsets_to.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
Impl::format(static_cast<double>(vec_from[i]), buf_to);
writeChar(0, buf_to);

View File

@ -51,21 +51,19 @@ public:
}
template <typename ColumnTypeEncoded>
bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) const
bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column, size_t input_rows_count) const
{
const auto * encoded = checkAndGetColumn<ColumnTypeEncoded>(encoded_column);
if (!encoded)
return false;
const size_t count = encoded->size();
auto latitude = ColumnFloat64::create(count);
auto longitude = ColumnFloat64::create(count);
auto latitude = ColumnFloat64::create(input_rows_count);
auto longitude = ColumnFloat64::create(input_rows_count);
ColumnFloat64::Container & lon_data = longitude->getData();
ColumnFloat64::Container & lat_data = latitude->getData();
for (size_t i = 0; i < count; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
std::string_view encoded_string = encoded->getDataAt(i).toView();
geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]);
@ -79,13 +77,13 @@ public:
return true;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IColumn * encoded = arguments[0].column.get();
ColumnPtr res_column;
if (tryExecute<ColumnString>(encoded, res_column) ||
tryExecute<ColumnFixedString>(encoded, res_column))
if (tryExecute<ColumnString>(encoded, res_column, input_rows_count) ||
tryExecute<ColumnFixedString>(encoded, res_column, input_rows_count))
return res_column;
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}",

View File

@ -53,7 +53,7 @@ public:
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const IColumn * longitude = arguments[0].column.get();
const IColumn * latitude = arguments[1].column.get();
@ -65,26 +65,24 @@ public:
precision = arguments[2].column;
ColumnPtr res_column;
vector(longitude, latitude, precision.get(), res_column);
vector(longitude, latitude, precision.get(), res_column, input_rows_count);
return res_column;
}
private:
void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const
void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result, size_t input_rows_count) const
{
auto col_str = ColumnString::create();
ColumnString::Chars & out_vec = col_str->getChars();
ColumnString::Offsets & out_offsets = col_str->getOffsets();
const size_t size = lat_column->size();
out_offsets.resize(size);
out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1));
out_offsets.resize(input_rows_count);
out_vec.resize(input_rows_count * (GEOHASH_MAX_TEXT_LENGTH + 1));
char * begin = reinterpret_cast<char *>(out_vec.data());
char * pos = begin;
for (size_t i = 0; i < size; ++i)
for (size_t i = 0; i < input_rows_count; ++i)
{
const Float64 longitude_value = lon_column->getFloat64(i);
const Float64 latitude_value = lat_column->getFloat64(i);

View File

@ -138,8 +138,7 @@ namespace
}
}
ColumnPtr executeImpl(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
std::call_once(once, [&] { initialize(arguments, result_type); });

View File

@ -17,13 +17,19 @@
namespace DB
{
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_,
const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_)
: query_ptr(query_ptr_)
, context(context_)
, options(options_)
, max_streams(context->getSettingsRef().max_threads)
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_,
const ContextMutablePtr & context_, const SelectQueryOptions & options_)
: query_ptr(query_ptr_)
, context(context_)
, options(options_)
, max_streams(context->getSettingsRef().max_threads)
{
/// FIXME All code here will work with the old analyzer, however for views over Distributed tables
/// it's possible that new analyzer will be enabled in ::getQueryProcessingStage method
/// of the underlying storage when all other parts of infrastructure are not ready for it
/// (built with old analyzer).
context->setSetting("allow_experimental_analyzer", false);
if (options.shard_num)
context->addSpecialScalar(
"_shard_num",

View File

@ -949,7 +949,7 @@ namespace
throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines");
}
void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine)
void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine)
{
if (engine == DefaultTableEngine::None)
throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query");
@ -969,9 +969,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view)
return;
if (create.is_materialized_view && create.to_table_id)
return;
if (create.temporary)
{
/// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting.
@ -986,22 +983,44 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
}
if (!create.storage->engine)
{
setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value);
}
checkTemporaryTableEngineName(create.storage->engine->name);
return;
}
if (create.is_materialized_view)
{
/// A materialized view with an external target doesn't need a table engine.
if (create.is_materialized_view_with_external_target())
return;
if (auto to_engine = create.getTargetInnerEngine(ViewTarget::To))
{
/// This materialized view already has a storage definition.
if (!to_engine->engine)
{
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value);
}
return;
}
}
if (create.storage)
{
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
/// This table already has a storage definition.
if (!create.storage->engine)
{
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value);
}
return;
}
/// We'll try to extract a storage definition from clause `AS`:
/// CREATE TABLE table_name AS other_table_name
std::shared_ptr<ASTStorage> storage_def;
if (!create.as_table.empty())
{
/// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table.
@ -1017,12 +1036,14 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (as_create.is_ordinary_view)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name);
if (as_create.is_materialized_view && as_create.to_table_id)
if (as_create.is_materialized_view_with_external_target())
{
throw Exception(
ErrorCodes::INCORRECT_QUERY,
"Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead",
"Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS {}\" instead",
qualified_name,
as_create.to_table_id.getQualifiedName());
as_create.getTargetTableID(ViewTarget::To).getFullTableName());
}
if (as_create.is_live_view)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name);
@ -1033,18 +1054,37 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (as_create.is_dictionary)
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name);
if (as_create.storage)
create.set(create.storage, as_create.storage->ptr());
if (as_create.is_materialized_view)
{
storage_def = as_create.getTargetInnerEngine(ViewTarget::To);
}
else if (as_create.as_table_function)
{
create.set(create.as_table_function, as_create.as_table_function->ptr());
return;
}
else if (as_create.storage)
{
storage_def = typeid_cast<std::shared_ptr<ASTStorage>>(as_create.storage->ptr());
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
return;
}
}
create.set(create.storage, std::make_shared<ASTStorage>());
setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value);
if (!storage_def)
{
/// Set ENGINE by default.
storage_def = std::make_shared<ASTStorage>();
setDefaultTableEngine(*storage_def, getContext()->getSettingsRef().default_table_engine.value);
}
/// Use the found table engine to modify the create query.
if (create.is_materialized_view)
create.setTargetInnerEngine(ViewTarget::To, storage_def);
else
create.set(create.storage, storage_def);
}
void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const
@ -1086,11 +1126,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
kind_upper, create.table);
}
create.generateRandomUUID();
create.generateRandomUUIDs();
}
else
{
bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil;
bool has_uuid = (create.uuid != UUIDHelpers::Nil) || create.hasInnerUUIDs();
if (has_uuid && !is_on_cluster && !internal)
{
/// We don't show the following error message either
@ -1105,8 +1145,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
/// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either
/// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or
/// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts.
create.uuid = UUIDHelpers::Nil;
create.to_inner_uuid = UUIDHelpers::Nil;
create.resetUUIDs();
}
}
@ -1130,6 +1169,14 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create,
DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies);
}
bool isReplicated(const ASTStorage & storage)
{
if (!storage.engine)
return false;
const auto & storage_name = storage.engine->name;
return storage_name.starts_with("Replicated") || storage_name.starts_with("Shared");
}
}
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
@ -1246,8 +1293,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (!create.temporary && !create.database)
create.setDatabase(current_database);
if (create.to_table_id && create.to_table_id.database_name.empty())
create.to_table_id.database_name = current_database;
if (create.targets)
create.targets->setCurrentDatabase(current_database);
if (create.select && create.isView())
{
@ -1281,12 +1329,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode);
/// Check type compatible for materialized dest table and select columns
if (create.select && create.is_materialized_view && create.to_table_id && mode <= LoadingStrictnessLevel::CREATE)
if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE)
{
if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(
{create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid},
getContext()
))
if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext()))
{
Block input_block;
@ -1332,11 +1377,17 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
{
bool is_storage_replicated = false;
if (create.storage && create.storage->engine)
if (create.storage && isReplicated(*create.storage))
is_storage_replicated = true;
if (create.targets)
{
const auto & storage_name = create.storage->engine->name;
if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared"))
is_storage_replicated = true;
for (const auto & inner_table_engine : create.targets->getInnerEngines())
{
if (isReplicated(*inner_table_engine))
is_storage_replicated = true;
}
}
const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
@ -1795,7 +1846,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont
/// For CREATE query generate UUID on initiator, so it will be the same on all hosts.
/// It will be ignored if database does not support UUIDs.
create.generateRandomUUID();
create.generateRandomUUIDs();
/// For cross-replication cluster we cannot use UUID in replica path.
String cluster_name_expanded = local_context->getMacros()->expand(cluster_name);
@ -1917,8 +1968,15 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const
}
}
if (create.to_table_id)
required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name);
if (create.targets)
{
for (const auto & target : create.targets->targets)
{
const auto & target_id = target.table_id;
if (target_id)
required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, target_id.database_name, target_id.table_name);
}
}
if (create.storage && create.storage->engine)
required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name);

View File

@ -75,7 +75,6 @@
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Storages/StorageDistributed.h>
#include <Storages/StorageDummy.h>
#include <Storages/StorageMerge.h>
#include <Storages/StorageValues.h>
#include <Storages/StorageView.h>
@ -214,11 +213,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
{}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const ContextPtr & context_,
Pipe input_pipe_,
const SelectQueryOptions & options_)
: InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery())
const ASTPtr & query_ptr_,
const ContextPtr & context_,
Pipe input_pipe_,
const SelectQueryOptions & options_)
: InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery())
{}
InterpreterSelectQuery::InterpreterSelectQuery(
@ -227,18 +226,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const StoragePtr & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const SelectQueryOptions & options_)
: InterpreterSelectQuery(
query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
{
}
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
{}
InterpreterSelectQuery::InterpreterSelectQuery(
const ASTPtr & query_ptr_,
const ContextPtr & context_,
const SelectQueryOptions & options_,
PreparedSetsPtr prepared_sets_)
: InterpreterSelectQuery(
query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
{}
InterpreterSelectQuery::~InterpreterSelectQuery() = default;

View File

@ -26,7 +26,6 @@ class Logger;
namespace DB
{
class SubqueryForSet;
class InterpreterSelectWithUnionQuery;
class Context;
class QueryPlan;

View File

@ -94,7 +94,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl()
{
auto & create = create_query->as<ASTCreateQuery &>();
create.uuid = UUIDHelpers::Nil;
create.to_inner_uuid = UUIDHelpers::Nil;
if (create.targets)
create.targets->resetInnerUUIDs();
}
MutableColumnPtr column = ColumnString::create();

View File

@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get<String>(), type.getName());
e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
throw;

View File

@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P(
DecimalField(DateTime64(123 * Day * 1'000'000), 6)
}
})
);
);
INSTANTIATE_TEST_SUITE_P(
DateTimeToDateTime64,
@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P(
},
})
);
INSTANTIATE_TEST_SUITE_P(
StringToNumber,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"String",
Field("1"),
"Int8",
Field(1)
},
{
"String",
Field("256"),
"Int8",
Field()
},
{
"String",
Field("not a number"),
"Int8",
{}
},
{
"String",
Field("1.1"),
"Int8",
{} /// we can not convert '1.1' to Int8
},
{
"String",
Field("1.1"),
"Float64",
Field(1.1)
},
})
);
INSTANTIATE_TEST_SUITE_P(
NumberToString,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"Int8",
Field(1),
"String",
Field("1")
},
{
"Int8",
Field(-1),
"String",
Field("-1")
},
{
"Float64",
Field(1.1),
"String",
Field("1.1")
},
})
);
INSTANTIATE_TEST_SUITE_P(
StringToDate,
ConvertFieldToTypeTest,
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
{
"String",
Field("2024-07-12"),
"Date",
Field(static_cast<UInt16>(19916))
},
{
"String",
Field("not a date"),
"Date",
{}
},
})
);

View File

@ -2,6 +2,8 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/CreateQueryUUIDs.h>
#include <Common/quoteString.h>
#include <Interpreters/StorageID.h>
#include <IO/Operators.h>
@ -240,12 +242,12 @@ ASTPtr ASTCreateQuery::clone() const
res->set(res->columns_list, columns_list->clone());
if (storage)
res->set(res->storage, storage->clone());
if (inner_storage)
res->set(res->inner_storage, inner_storage->clone());
if (select)
res->set(res->select, select->clone());
if (table_overrides)
res->set(res->table_overrides, table_overrides->clone());
if (targets)
res->set(res->targets, targets->clone());
if (dictionary)
{
@ -398,20 +400,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
refresh_strategy->formatImpl(settings, state, frame);
}
if (to_table_id)
if (auto to_table_id = getTargetTableID(ViewTarget::To))
{
assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil);
settings.ostr
<< (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "")
<< (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "")
<< backQuoteIfNeed(to_table_id.table_name);
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO)
<< (settings.hilite ? hilite_none : "") << " "
<< (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "")
<< backQuoteIfNeed(to_table_id.table_name);
}
if (to_inner_uuid != UUIDHelpers::Nil)
if (auto to_inner_uuid = getTargetInnerUUID(ViewTarget::To); to_inner_uuid != UUIDHelpers::Nil)
{
assert(is_materialized_view && !to_table_id);
settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO INNER UUID " << (settings.hilite ? hilite_none : "")
<< quoteString(toString(to_inner_uuid));
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO_INNER_UUID)
<< (settings.hilite ? hilite_none : "") << " " << quoteString(toString(to_inner_uuid));
}
bool should_add_empty = is_create_empty;
@ -471,14 +471,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
frame.expression_list_always_start_on_new_line = false;
if (inner_storage)
if (storage)
storage->formatImpl(settings, state, frame);
if (auto inner_storage = getTargetInnerEngine(ViewTarget::Inner))
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : "");
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::INNER) << (settings.hilite ? hilite_none : "");
inner_storage->formatImpl(settings, state, frame);
}
if (storage)
storage->formatImpl(settings, state, frame);
if (auto to_storage = getTargetInnerEngine(ViewTarget::To))
to_storage->formatImpl(settings, state, frame);
if (dictionary)
dictionary->formatImpl(settings, state, frame);
@ -538,48 +541,57 @@ bool ASTCreateQuery::isParameterizedView() const
}
ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query)
: uuid(query.uuid)
, to_inner_uuid(query.to_inner_uuid)
void ASTCreateQuery::generateRandomUUIDs()
{
CreateQueryUUIDs{*this, /* generate_random= */ true}.copyToQuery(*this);
}
String ASTCreateQuery::UUIDs::toString() const
void ASTCreateQuery::resetUUIDs()
{
WriteBufferFromOwnString out;
out << "{" << uuid << "," << to_inner_uuid << "}";
return out.str();
CreateQueryUUIDs{}.copyToQuery(*this);
}
ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str)
StorageID ASTCreateQuery::getTargetTableID(ViewTarget::Kind target_kind) const
{
ReadBufferFromString in{str};
ASTCreateQuery::UUIDs res;
in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}";
return res;
if (targets)
return targets->getTableID(target_kind);
return StorageID::createEmpty();
}
ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid)
bool ASTCreateQuery::hasTargetTableID(ViewTarget::Kind target_kind) const
{
if (always_generate_new_uuid)
setUUID({});
if (uuid == UUIDHelpers::Nil)
uuid = UUIDHelpers::generateV4();
/// If destination table (to_table_id) is not specified for materialized view,
/// then MV will create inner table. We should generate UUID of inner table here.
bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id;
if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil))
to_inner_uuid = UUIDHelpers::generateV4();
return UUIDs{*this};
if (targets)
return targets->hasTableID(target_kind);
return false;
}
void ASTCreateQuery::setUUID(const UUIDs & uuids)
UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const
{
uuid = uuids.uuid;
to_inner_uuid = uuids.to_inner_uuid;
if (targets)
return targets->getInnerUUID(target_kind);
return UUIDHelpers::Nil;
}
bool ASTCreateQuery::hasInnerUUIDs() const
{
if (targets)
return targets->hasInnerUUIDs();
return false;
}
std::shared_ptr<ASTStorage> ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const
{
if (targets)
return targets->getInnerEngine(target_kind);
return nullptr;
}
void ASTCreateQuery::setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def)
{
if (!targets)
set(targets, std::make_shared<ASTViewTargets>());
targets->setInnerEngine(target_kind, storage_def);
}
}

View File

@ -5,6 +5,7 @@
#include <Parsers/ASTDictionary.h>
#include <Parsers/ASTDictionaryAttributeDeclaration.h>
#include <Parsers/ASTTableOverrides.h>
#include <Parsers/ASTViewTargets.h>
#include <Parsers/ASTSQLSecurity.h>
#include <Parsers/ASTRefreshStrategy.h>
#include <Interpreters/StorageID.h>
@ -15,6 +16,7 @@ namespace DB
class ASTFunction;
class ASTSetQuery;
class ASTSelectWithUnionQuery;
struct CreateQueryUUIDs;
class ASTStorage : public IAST
@ -101,17 +103,15 @@ public:
bool has_uuid{false}; // CREATE TABLE x UUID '...'
ASTColumns * columns_list = nullptr;
StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table.
UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table
ASTStorage * inner_storage = nullptr; /// For window view with inner table
ASTStorage * storage = nullptr;
ASTPtr watermark_function;
ASTPtr lateness_function;
String as_database;
String as_table;
IAST * as_table_function = nullptr;
ASTSelectWithUnionQuery * select = nullptr;
ASTViewTargets * targets = nullptr;
IAST * comment = nullptr;
ASTPtr sql_security = nullptr;
@ -153,17 +153,26 @@ public:
QueryKind getQueryKind() const override { return QueryKind::Create; }
struct UUIDs
{
UUID uuid = UUIDHelpers::Nil;
UUID to_inner_uuid = UUIDHelpers::Nil;
UUIDs() = default;
explicit UUIDs(const ASTCreateQuery & query);
String toString() const;
static UUIDs fromString(const String & str);
};
UUIDs generateRandomUUID(bool always_generate_new_uuid = false);
void setUUID(const UUIDs & uuids);
/// Generates a random UUID for this create query if it's not specified already.
/// The function also generates random UUIDs for inner target tables if this create query implies that
/// (for example, if it's a `CREATE MATERIALIZED VIEW` query with an inner storage).
void generateRandomUUIDs();
/// Removes UUID from this create query.
/// The function also removes UUIDs for inner target tables from this create query (see also generateRandomUUID()).
void resetUUIDs();
/// Returns information about a target table.
/// If that information isn't specified in this create query (or even not allowed) then the function returns an empty value.
StorageID getTargetTableID(ViewTarget::Kind target_kind) const;
bool hasTargetTableID(ViewTarget::Kind target_kind) const;
UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const;
bool hasInnerUUIDs() const;
std::shared_ptr<ASTStorage> getTargetInnerEngine(ViewTarget::Kind target_kind) const;
void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def);
bool is_materialized_view_with_external_target() const { return is_materialized_view && hasTargetTableID(ViewTarget::To); }
bool is_materialized_view_with_inner_table() const { return is_materialized_view && !hasTargetTableID(ViewTarget::To); }
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
@ -171,8 +180,8 @@ protected:
void forEachPointerToChild(std::function<void(void**)> f) override
{
f(reinterpret_cast<void **>(&columns_list));
f(reinterpret_cast<void **>(&inner_storage));
f(reinterpret_cast<void **>(&storage));
f(reinterpret_cast<void **>(&targets));
f(reinterpret_cast<void **>(&as_table_function));
f(reinterpret_cast<void **>(&select));
f(reinterpret_cast<void **>(&comment));

View File

@ -329,19 +329,23 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
const auto * literal = arguments->children[0]->as<ASTLiteral>();
const auto * function = arguments->children[0]->as<ASTFunction>();
const auto * subquery = arguments->children[0]->as<ASTSubquery>();
bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple;
// do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
/// Do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
bool literal_need_parens = literal && !is_tuple;
// negate always requires parentheses, otherwise -(-1) will be printed as --1
bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate"));
/// Negate always requires parentheses, otherwise -(-1) will be printed as --1
/// Also extra parentheses are needed for subqueries, because NOT can be parsed as a function:
/// not(SELECT 1) cannot be parsed, while not((SELECT 1)) can.
bool inside_parens = (name == "negate" && (literal_need_parens || (function && function->name == "negate")))
|| (subquery && name == "not");
/// We DO need parentheses around a single literal
/// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since
/// this is equal to SELECT NOT (0 + NOT 0)
bool outside_parens = frame.need_parens && !inside_parens;
// do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
/// Do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
if (inside_parens)
nested_need_parens.need_parens = false;

View File

@ -0,0 +1,300 @@
#include <Parsers/ASTViewTargets.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/CommonParsers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
std::string_view toString(ViewTarget::Kind kind)
{
switch (kind)
{
case ViewTarget::To: return "to";
case ViewTarget::Inner: return "inner";
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind);
}
void parseFromString(ViewTarget::Kind & out, std::string_view str)
{
for (auto kind : magic_enum::enum_values<ViewTarget::Kind>())
{
if (toString(kind) == str)
{
out = kind;
return;
}
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: Unexpected string {}", __FUNCTION__, str);
}
std::vector<ViewTarget::Kind> ASTViewTargets::getKinds() const
{
std::vector<ViewTarget::Kind> kinds;
kinds.reserve(targets.size());
for (const auto & target : targets)
kinds.push_back(target.kind);
return kinds;
}
void ASTViewTargets::setTableID(ViewTarget::Kind kind, const StorageID & table_id_)
{
for (auto & target : targets)
{
if (target.kind == kind)
{
target.table_id = table_id_;
return;
}
}
if (table_id_)
targets.emplace_back(kind).table_id = table_id_;
}
StorageID ASTViewTargets::getTableID(ViewTarget::Kind kind) const
{
if (const auto * target = tryGetTarget(kind))
return target->table_id;
return StorageID::createEmpty();
}
bool ASTViewTargets::hasTableID(ViewTarget::Kind kind) const
{
if (const auto * target = tryGetTarget(kind))
return !target->table_id.empty();
return false;
}
void ASTViewTargets::setCurrentDatabase(const String & current_database)
{
for (auto & target : targets)
{
auto & table_id = target.table_id;
if (!table_id.table_name.empty() && table_id.database_name.empty())
table_id.database_name = current_database;
}
}
void ASTViewTargets::setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_)
{
for (auto & target : targets)
{
if (target.kind == kind)
{
target.inner_uuid = inner_uuid_;
return;
}
}
if (inner_uuid_ != UUIDHelpers::Nil)
targets.emplace_back(kind).inner_uuid = inner_uuid_;
}
UUID ASTViewTargets::getInnerUUID(ViewTarget::Kind kind) const
{
if (const auto * target = tryGetTarget(kind))
return target->inner_uuid;
return UUIDHelpers::Nil;
}
bool ASTViewTargets::hasInnerUUID(ViewTarget::Kind kind) const
{
return getInnerUUID(kind) != UUIDHelpers::Nil;
}
void ASTViewTargets::resetInnerUUIDs()
{
for (auto & target : targets)
target.inner_uuid = UUIDHelpers::Nil;
}
bool ASTViewTargets::hasInnerUUIDs() const
{
for (const auto & target : targets)
{
if (target.inner_uuid != UUIDHelpers::Nil)
return true;
}
return false;
}
void ASTViewTargets::setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def)
{
auto new_inner_engine = typeid_cast<std::shared_ptr<ASTStorage>>(storage_def);
if (!new_inner_engine && storage_def)
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Bad cast from type {} to ASTStorage", storage_def->getID());
for (auto & target : targets)
{
if (target.kind == kind)
{
if (target.inner_engine == new_inner_engine)
return;
if (new_inner_engine)
children.push_back(new_inner_engine);
if (target.inner_engine)
std::erase(children, target.inner_engine);
target.inner_engine = new_inner_engine;
return;
}
}
if (new_inner_engine)
{
targets.emplace_back(kind).inner_engine = new_inner_engine;
children.push_back(new_inner_engine);
}
}
std::shared_ptr<ASTStorage> ASTViewTargets::getInnerEngine(ViewTarget::Kind kind) const
{
if (const auto * target = tryGetTarget(kind))
return target->inner_engine;
return nullptr;
}
std::vector<std::shared_ptr<ASTStorage>> ASTViewTargets::getInnerEngines() const
{
std::vector<std::shared_ptr<ASTStorage>> res;
res.reserve(targets.size());
for (const auto & target : targets)
{
if (target.inner_engine)
res.push_back(target.inner_engine);
}
return res;
}
const ViewTarget * ASTViewTargets::tryGetTarget(ViewTarget::Kind kind) const
{
for (const auto & target : targets)
{
if (target.kind == kind)
return &target;
}
return nullptr;
}
ASTPtr ASTViewTargets::clone() const
{
auto res = std::make_shared<ASTViewTargets>(*this);
res->children.clear();
for (auto & target : res->targets)
{
if (target.inner_engine)
{
target.inner_engine = typeid_cast<std::shared_ptr<ASTStorage>>(target.inner_engine->clone());
res->children.push_back(target.inner_engine);
}
}
return res;
}
void ASTViewTargets::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
{
for (const auto & target : targets)
formatTarget(target, s, state, frame);
}
void ASTViewTargets::formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
{
for (const auto & target : targets)
{
if (target.kind == kind)
formatTarget(target, s, state, frame);
}
}
void ASTViewTargets::formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame)
{
if (target.table_id)
{
auto keyword = getKeywordForTableID(target.kind);
if (!keyword)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No keyword for table name of kind {}", toString(target.kind));
s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword)
<< (s.hilite ? hilite_none : "") << " "
<< (!target.table_id.database_name.empty() ? backQuoteIfNeed(target.table_id.database_name) + "." : "")
<< backQuoteIfNeed(target.table_id.table_name);
}
if (target.inner_uuid != UUIDHelpers::Nil)
{
auto keyword = getKeywordForInnerUUID(target.kind);
if (!keyword)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for inner UUID of kind {}", toString(target.kind));
s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword)
<< (s.hilite ? hilite_none : "") << " " << quoteString(toString(target.inner_uuid));
}
if (target.inner_engine)
{
auto keyword = getKeywordForInnerStorage(target.kind);
if (!keyword)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for table engine of kind {}", toString(target.kind));
s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) << (s.hilite ? hilite_none : "");
target.inner_engine->formatImpl(s, state, frame);
}
}
std::optional<Keyword> ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kind)
{
switch (kind)
{
case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata
case ViewTarget::Inner: return std::nullopt;
}
UNREACHABLE();
}
std::optional<Keyword> ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kind kind)
{
switch (kind)
{
case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree()
case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree()
}
UNREACHABLE();
}
std::optional<Keyword> ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind kind)
{
switch (kind)
{
case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX'
case ViewTarget::Inner: return std::nullopt;
}
UNREACHABLE();
}
void ASTViewTargets::forEachPointerToChild(std::function<void(void**)> f)
{
for (auto & target : targets)
{
if (target.inner_engine)
{
ASTStorage * new_inner_engine = target.inner_engine.get();
f(reinterpret_cast<void **>(&new_inner_engine));
if (new_inner_engine != target.inner_engine.get())
{
if (new_inner_engine)
target.inner_engine = typeid_cast<std::shared_ptr<ASTStorage>>(new_inner_engine->ptr());
else
target.inner_engine.reset();
}
}
}
}
}

View File

@ -0,0 +1,115 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/StorageID.h>
namespace DB
{
class ASTStorage;
enum class Keyword : size_t;
/// Information about target tables (external or inner) of a materialized view or a window view.
/// See ASTViewTargets for more details.
struct ViewTarget
{
enum Kind
{
/// If `kind == ViewTarget::To` then `ViewTarget` contains information about the "TO" table of a materialized view or a window view:
/// CREATE MATERIALIZED VIEW db.mv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ...
/// or
/// CREATE WINDOW VIEW db.wv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ...
To,
/// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view:
/// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ...
Inner,
};
Kind kind = To;
/// StorageID of the target table, if it's not inner.
/// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ...
StorageID table_id = StorageID::createEmpty();
/// UUID of the target table, if it's inner.
/// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like
/// CREATE MATERIALIZED VIEW ... TO INNER UUID ...
UUID inner_uuid = UUIDHelpers::Nil;
/// Table engine of the target table, if it's inner.
/// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ...
std::shared_ptr<ASTStorage> inner_engine;
};
/// Converts ViewTarget::Kind to a string.
std::string_view toString(ViewTarget::Kind kind);
void parseFromString(ViewTarget::Kind & out, std::string_view str);
/// Information about all target tables (external or inner) of a view.
///
/// For example, for a materialized view:
/// CREATE MATERIALIZED VIEW db.mv_name [TO [db.]to_target | ENGINE to_engine] AS SELECT ...
/// this class contains information about the "TO" table: its name and database (if it's external), its UUID and engine (if it's inner).
///
/// For a window view:
/// CREATE WINDOW VIEW db.wv_name [TO [db.]to_target | ENGINE to_engine] [INNER ENGINE inner_engine] AS SELECT ...
/// this class contains information about both the "TO" table and the "INNER" table.
class ASTViewTargets : public IAST
{
public:
std::vector<ViewTarget> targets;
/// Sets the StorageID of the target table, if it's not inner.
/// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ...
void setTableID(ViewTarget::Kind kind, const StorageID & table_id_);
StorageID getTableID(ViewTarget::Kind kind) const;
bool hasTableID(ViewTarget::Kind kind) const;
/// Replaces an empty database in the StorageID of the target table with a specified database.
void setCurrentDatabase(const String & current_database);
/// Sets the UUID of the target table, if it's inner.
/// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like
/// CREATE MATERIALIZED VIEW ... TO INNER UUID ...
void setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_);
UUID getInnerUUID(ViewTarget::Kind kind) const;
bool hasInnerUUID(ViewTarget::Kind kind) const;
void resetInnerUUIDs();
bool hasInnerUUIDs() const;
/// Sets the table engine of the target table, if it's inner.
/// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ...
void setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def);
std::shared_ptr<ASTStorage> getInnerEngine(ViewTarget::Kind kind) const;
std::vector<std::shared_ptr<ASTStorage>> getInnerEngines() const;
/// Returns a list of all kinds of views in this ASTViewTargets.
std::vector<ViewTarget::Kind> getKinds() const;
/// Returns information about a target table.
/// The function returns null if such target doesn't exist.
const ViewTarget * tryGetTarget(ViewTarget::Kind kind) const;
String getID(char) const override { return "ViewTargets"; }
ASTPtr clone() const override;
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
/// Formats information only about a specific target table.
void formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const;
static void formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame);
/// Helper functions for class ParserViewTargets. Returns a prefix keyword matching a specified target kind.
static std::optional<Keyword> getKeywordForTableID(ViewTarget::Kind kind);
static std::optional<Keyword> getKeywordForInnerUUID(ViewTarget::Kind kind);
static std::optional<Keyword> getKeywordForInnerStorage(ViewTarget::Kind kind);
protected:
void forEachPointerToChild(std::function<void(void**)> f) override;
};
}

View File

@ -0,0 +1,168 @@
#include <Parsers/CreateQueryUUIDs.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
namespace DB
{
CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random, bool force_random)
{
if (!generate_random || !force_random)
{
uuid = query.uuid;
if (query.targets)
{
for (const auto & target : query.targets->targets)
setTargetInnerUUID(target.kind, target.inner_uuid);
}
}
if (generate_random)
{
if (uuid == UUIDHelpers::Nil)
uuid = UUIDHelpers::generateV4();
/// For an ATTACH query we should never generate UUIDs for its inner target tables
/// because for an ATTACH query those inner target tables probably already exist and can be accessible by names.
/// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible.
/// Thus it's not safe for example to replace
/// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with
/// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b"
/// This replacement is safe only for CREATE queries when inner target tables don't exist yet.
if (!query.attach)
{
auto generate_target_uuid = [&](ViewTarget::Kind target_kind)
{
if ((query.getTargetInnerUUID(target_kind) == UUIDHelpers::Nil) && query.getTargetTableID(target_kind).empty())
setTargetInnerUUID(target_kind, UUIDHelpers::generateV4());
};
/// If destination table (to_table_id) is not specified for materialized view,
/// then MV will create inner table. We should generate UUID of inner table here.
if (query.is_materialized_view)
generate_target_uuid(ViewTarget::To);
}
}
}
bool CreateQueryUUIDs::empty() const
{
if (uuid != UUIDHelpers::Nil)
return false;
for (const auto & [_, inner_uuid] : targets_inner_uuids)
{
if (inner_uuid != UUIDHelpers::Nil)
return false;
}
return true;
}
String CreateQueryUUIDs::toString() const
{
WriteBufferFromOwnString out;
out << "{";
bool need_comma = false;
auto add_name_and_uuid_to_string = [&](std::string_view name_, const UUID & uuid_)
{
if (std::exchange(need_comma, true))
out << ", ";
out << "\"" << name_ << "\": \"" << uuid_ << "\"";
};
if (uuid != UUIDHelpers::Nil)
add_name_and_uuid_to_string("uuid", uuid);
for (const auto & [kind, inner_uuid] : targets_inner_uuids)
{
if (inner_uuid != UUIDHelpers::Nil)
add_name_and_uuid_to_string(::DB::toString(kind), inner_uuid);
}
out << "}";
return out.str();
}
CreateQueryUUIDs CreateQueryUUIDs::fromString(const String & str)
{
ReadBufferFromString in{str};
CreateQueryUUIDs res;
skipWhitespaceIfAny(in);
in >> "{";
skipWhitespaceIfAny(in);
char c;
while (in.peek(c) && c != '}')
{
String name;
String value;
readDoubleQuotedString(name, in);
skipWhitespaceIfAny(in);
in >> ":";
skipWhitespaceIfAny(in);
readDoubleQuotedString(value, in);
skipWhitespaceIfAny(in);
if (name == "uuid")
{
res.uuid = parse<UUID>(value);
}
else
{
ViewTarget::Kind kind;
parseFromString(kind, name);
res.setTargetInnerUUID(kind, parse<UUID>(value));
}
if (in.peek(c) && c == ',')
{
in.ignore(1);
skipWhitespaceIfAny(in);
}
}
in >> "}";
return res;
}
void CreateQueryUUIDs::setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid)
{
for (auto & pair : targets_inner_uuids)
{
if (pair.first == kind)
{
pair.second = new_inner_uuid;
return;
}
}
if (new_inner_uuid != UUIDHelpers::Nil)
targets_inner_uuids.emplace_back(kind, new_inner_uuid);
}
UUID CreateQueryUUIDs::getTargetInnerUUID(ViewTarget::Kind kind) const
{
for (const auto & pair : targets_inner_uuids)
{
if (pair.first == kind)
return pair.second;
}
return UUIDHelpers::Nil;
}
void CreateQueryUUIDs::copyToQuery(ASTCreateQuery & query) const
{
query.uuid = uuid;
if (query.targets)
query.targets->resetInnerUUIDs();
if (!targets_inner_uuids.empty())
{
if (!query.targets)
query.set(query.targets, std::make_shared<ASTViewTargets>());
for (const auto & [kind, inner_uuid] : targets_inner_uuids)
{
if (inner_uuid != UUIDHelpers::Nil)
query.targets->setInnerUUID(kind, inner_uuid);
}
}
}
}

View File

@ -0,0 +1,40 @@
#pragma once
#include <Parsers/ASTViewTargets.h>
namespace DB
{
class ASTCreateQuery;
/// The UUID of a table or a database defined with a CREATE QUERY along with the UUIDs of its inner targets.
struct CreateQueryUUIDs
{
CreateQueryUUIDs() = default;
/// Collect UUIDs from ASTCreateQuery.
/// Parameters:
/// `generate_random` - if it's true then unspecified in the query UUIDs will be generated randomly;
/// `force_random` - if it's true then all UUIDs (even specified in the query) will be (re)generated randomly.
explicit CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random = false, bool force_random = false);
bool empty() const;
explicit operator bool() const { return !empty(); }
String toString() const;
static CreateQueryUUIDs fromString(const String & str);
void setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid);
UUID getTargetInnerUUID(ViewTarget::Kind kind) const;
/// Copies UUIDs to ASTCreateQuery.
void copyToQuery(ASTCreateQuery & query) const;
/// UUID of the table.
UUID uuid = UUIDHelpers::Nil;
/// UUIDs of its target table (or tables).
std::vector<std::pair<ViewTarget::Kind, UUID>> targets_inner_uuids;
};
}

View File

@ -9,7 +9,7 @@ namespace DB
{
/** The SELECT subquery is in parenthesis.
/** The SELECT subquery, in parentheses.
*/
class ParserSubquery : public IParserBase
{

View File

@ -22,6 +22,7 @@
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/ParserRefreshStrategy.h>
#include <Parsers/ParserViewTargets.h>
#include <Common/typeid_cast.h>
#include <Parsers/ASTColumnDeclaration.h>
@ -693,7 +694,8 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
ASTPtr table;
ASTPtr columns_list;
ASTPtr storage;
std::shared_ptr<ASTStorage> storage;
ASTPtr targets;
ASTPtr as_database;
ASTPtr as_table;
ASTPtr as_table_function;
@ -773,6 +775,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
return true;
}
auto parse_storage = [&]
{
chassert(!storage);
ASTPtr ast;
if (!storage_p.parse(pos, ast, expected))
return false;
storage = typeid_cast<std::shared_ptr<ASTStorage>>(ast);
return true;
};
auto need_parse_as_select = [&is_create_empty, &pos, &expected]()
{
if (ParserKeyword{Keyword::EMPTY_AS}.ignore(pos, expected))
@ -798,7 +811,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
if (!s_rparen.ignore(pos, expected))
return false;
auto storage_parse_result = storage_p.parse(pos, storage, expected);
auto storage_parse_result = parse_storage();
if ((storage_parse_result || is_temporary) && need_parse_as_select())
{
@ -820,7 +833,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
*/
else
{
storage_p.parse(pos, storage, expected);
parse_storage();
/// CREATE|ATTACH TABLE ... AS ...
if (need_parse_as_select())
@ -843,7 +856,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
/// Optional - ENGINE can be specified.
if (!storage)
storage_p.parse(pos, storage, expected);
parse_storage();
}
}
}
@ -904,6 +917,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
tryGetIdentifierNameInto(as_database, query->as_database);
tryGetIdentifierNameInto(as_table, query->as_table);
query->set(query->select, select);
query->set(query->targets, targets);
query->is_create_empty = is_create_empty;
if (from_path)
@ -977,6 +991,13 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
return false;
}
std::shared_ptr<ASTViewTargets> targets;
if (to_table)
{
targets = std::make_shared<ASTViewTargets>();
targets->setTableID(ViewTarget::To, to_table->as<ASTTableIdentifier>()->getTableId());
}
/// Optional - a list of columns can be specified. It must fully comply with SELECT.
if (s_lparen.ignore(pos, expected))
{
@ -1017,14 +1038,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
if (query->table)
query->children.push_back(query->table);
if (to_table)
query->to_table_id = to_table->as<ASTTableIdentifier>()->getTableId();
query->set(query->columns_list, columns_list);
tryGetIdentifierNameInto(as_database, query->as_database);
tryGetIdentifierNameInto(as_table, query->as_table);
query->set(query->select, select);
query->set(query->targets, targets);
if (comment)
query->set(query->comment, comment);
@ -1139,6 +1158,18 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
storage_p.parse(pos, storage, expected);
}
std::shared_ptr<ASTViewTargets> targets;
if (to_table || storage || inner_storage)
{
targets = std::make_shared<ASTViewTargets>();
if (to_table)
targets->setTableID(ViewTarget::To, to_table->as<ASTTableIdentifier>()->getTableId());
if (storage)
targets->setInnerEngine(ViewTarget::To, storage);
if (inner_storage)
targets->setInnerEngine(ViewTarget::Inner, inner_storage);
}
// WATERMARK
if (s_watermark.ignore(pos, expected))
{
@ -1195,12 +1226,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
if (query->table)
query->children.push_back(query->table);
if (to_table)
query->to_table_id = to_table->as<ASTTableIdentifier>()->getTableId();
query->set(query->columns_list, columns_list);
query->set(query->storage, storage);
query->set(query->inner_storage, inner_storage);
query->is_watermark_strictly_ascending = is_watermark_strictly_ascending;
query->is_watermark_ascending = is_watermark_ascending;
query->is_watermark_bounded = is_watermark_bounded;
@ -1213,6 +1240,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
tryGetIdentifierNameInto(as_database, query->as_database);
tryGetIdentifierNameInto(as_table, query->as_table);
query->set(query->select, select);
query->set(query->targets, targets);
return true;
}
@ -1436,6 +1464,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
return true;
}
bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_create(Keyword::CREATE);
@ -1622,13 +1651,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (query->table)
query->children.push_back(query->table);
if (to_table)
query->to_table_id = to_table->as<ASTTableIdentifier>()->getTableId();
if (to_inner_uuid)
query->to_inner_uuid = parseFromString<UUID>(to_inner_uuid->as<ASTLiteral>()->value.get<String>());
query->set(query->columns_list, columns_list);
query->set(query->storage, storage);
if (refresh_strategy)
query->set(query->refresh_strategy, refresh_strategy);
if (comment)
@ -1639,29 +1663,41 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (query->columns_list && query->columns_list->primary_key)
{
/// If engine is not set will use default one
if (!query->storage)
query->set(query->storage, std::make_shared<ASTStorage>());
else if (query->storage->primary_key)
if (!storage)
storage = std::make_shared<ASTStorage>();
auto & storage_ref = typeid_cast<ASTStorage &>(*storage);
if (storage_ref.primary_key)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed.");
query->storage->primary_key = query->columns_list->primary_key;
storage_ref.primary_key = query->columns_list->primary_key;
}
if (query->columns_list && (query->columns_list->primary_key_from_columns))
{
/// If engine is not set will use default one
if (!query->storage)
query->set(query->storage, std::make_shared<ASTStorage>());
else if (query->storage->primary_key)
if (!storage)
storage = std::make_shared<ASTStorage>();
auto & storage_ref = typeid_cast<ASTStorage &>(*storage);
if (storage_ref.primary_key)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed.");
storage_ref.primary_key = query->columns_list->primary_key_from_columns;
}
query->storage->primary_key = query->columns_list->primary_key_from_columns;
std::shared_ptr<ASTViewTargets> targets;
if (to_table || to_inner_uuid || storage)
{
targets = std::make_shared<ASTViewTargets>();
if (to_table)
targets->setTableID(ViewTarget::To, to_table->as<ASTTableIdentifier>()->getTableId());
if (to_inner_uuid)
targets->setInnerUUID(ViewTarget::To, parseFromString<UUID>(to_inner_uuid->as<ASTLiteral>()->value.safeGet<String>()));
if (storage)
targets->setInnerEngine(ViewTarget::To, storage);
}
tryGetIdentifierNameInto(as_database, query->as_database);
tryGetIdentifierNameInto(as_table, query->as_table);
query->set(query->select, select);
query->set(query->targets, targets);
return true;
}

View File

@ -11,15 +11,12 @@
namespace DB
{
bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_describe(Keyword::DESCRIBE);
ParserKeyword s_desc(Keyword::DESC);
ParserKeyword s_table(Keyword::TABLE);
ParserKeyword s_settings(Keyword::SETTINGS);
ParserToken s_dot(TokenType::Dot);
ParserIdentifier name_p;
ParserSetQuery parser_settings(true);
ASTPtr database;
@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
return true;
}
}

View File

@ -0,0 +1,88 @@
#include <Parsers/ParserViewTargets.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTViewTargets.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ParserCreateQuery.h>
#include <IO/ReadHelpers.h>
namespace DB
{
ParserViewTargets::ParserViewTargets()
{
for (auto kind : magic_enum::enum_values<ViewTarget::Kind>())
accept_kinds.push_back(kind);
}
bool ParserViewTargets::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserStringLiteral literal_p;
ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true);
std::shared_ptr<ASTViewTargets> res;
auto result = [&] -> ASTViewTargets &
{
if (!res)
res = std::make_shared<ASTViewTargets>();
return *res;
};
for (;;)
{
auto start = pos;
for (auto kind : accept_kinds)
{
auto current = pos;
auto keyword = ASTViewTargets::getKeywordForInnerUUID(kind);
if (keyword && ParserKeyword{*keyword}.ignore(pos, expected))
{
ASTPtr ast;
if (literal_p.parse(pos, ast, expected))
{
result().setInnerUUID(kind, parseFromString<UUID>(ast->as<ASTLiteral>()->value.safeGet<String>()));
break;
}
}
pos = current;
keyword = ASTViewTargets::getKeywordForInnerStorage(kind);
if (keyword && ParserKeyword{*keyword}.ignore(pos, expected))
{
ASTPtr ast;
if (storage_p.parse(pos, ast, expected))
{
result().setInnerEngine(kind, ast);
break;
}
}
pos = current;
keyword = ASTViewTargets::getKeywordForTableID(kind);
if (keyword && ParserKeyword{*keyword}.ignore(pos, expected))
{
ASTPtr ast;
if (table_name_p.parse(pos, ast, expected))
{
result().setTableID(kind, ast->as<ASTTableIdentifier>()->getTableId());
break;
}
}
pos = current;
}
if (pos == start)
break;
}
if (!res || res->targets.empty())
return false;
node = res;
return true;
}
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/ASTViewTargets.h>
namespace DB
{
/// Parses information about target tables (external or inner) of a materialized view or a window view.
/// The function parses one or multiple parts of a CREATE query looking like this:
/// TO db.table_name
/// TO INNER UUID 'XXX'
/// {ENGINE / INNER ENGINE} TableEngine(arguments) [ORDER BY ...] [SETTINGS ...]
/// Returns ASTViewTargets if succeeded.
class ParserViewTargets : public IParserBase
{
public:
ParserViewTargets();
explicit ParserViewTargets(const std::vector<ViewTarget::Kind> & accept_kinds_) : accept_kinds(accept_kinds_) { }
protected:
const char * getName() const override { return "ViewTargets"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
std::vector<ViewTarget::Kind> accept_kinds;
};
}

View File

@ -304,7 +304,7 @@ void RefreshTask::refreshTask()
{
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
auto text = message.text;
message.text = fmt::format("Refresh failed: {}", message.text);
message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text);
LOG_ERROR(log, message);
exception = text;
}

View File

@ -16,6 +16,7 @@
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Common/CurrentMetrics.h>
#include <Common/NetException.h>
#include <Common/randomDelay.h>
@ -224,14 +225,18 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM
&& !isRetryableException(std::current_exception()))
{
report_broken_part();
}
throw;
}
catch (...)
{
report_broken_part();
if (!isRetryableException(std::current_exception()))
report_broken_part();
throw;
}
}

View File

@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
{
auto stats = part->loadStatistics();
/// TODO: We only have one stats file for every part.
result.addRows(part->rows_count);
for (const auto & stat : stats)
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
result.merge(part->info.getPartNameV1(), stat);
}
catch (...)
{
@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
if (!partition_pruner.canBePruned(*part))
{
auto stats = part->loadStatistics();
result.addRows(part->rows_count);
for (const auto & stat : stats)
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
result.merge(part->info.getPartNameV1(), stat);
}
}
catch (...)
@ -1144,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
auto metadata_snapshot = getInMemoryMetadataPtr();
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false);
if (!filter_dag)
return {};

View File

@ -44,10 +44,12 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(
const String & index_name_,
const Block & index_sample_block_,
size_t max_rows_,
MutableColumns && mutable_columns_)
MutableColumns && mutable_columns_,
std::vector<Range> && set_hyperrectangle_)
: index_name(index_name_)
, max_rows(max_rows_)
, block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_)))
, set_hyperrectangle(std::move(set_hyperrectangle_))
{
}
@ -106,6 +108,10 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
settings.position_independent_encoding = false;
set_hyperrectangle.clear();
Field min_val;
Field max_val;
for (size_t i = 0; i < num_columns; ++i)
{
auto & elem = block.getByPosition(i);
@ -116,6 +122,13 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr);
serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr);
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(elem.column.get()))
column_nullable->getExtremesNullLast(min_val, max_val);
else
elem.column->getExtremes(min_val, max_val);
set_hyperrectangle.emplace_back(min_val, true, max_val, true);
}
}
@ -182,10 +195,29 @@ void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size
if (has_new_data)
{
FieldRef field_min;
FieldRef field_max;
for (size_t i = 0; i < columns.size(); ++i)
{
auto filtered_column = block.getByName(index_columns[i]).column->filter(filter, block.rows());
columns[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size());
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(filtered_column.get()))
column_nullable->getExtremesNullLast(field_min, field_max);
else
filtered_column->getExtremes(field_min, field_max);
if (set_hyperrectangle.size() <= i)
{
set_hyperrectangle.emplace_back(field_min, true, field_max, true);
}
else
{
set_hyperrectangle[i].left
= applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].left, field_min) ? set_hyperrectangle[i].left : field_min;
set_hyperrectangle[i].right
= applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].right, field_max) ? field_max : set_hyperrectangle[i].right;
}
}
}
@ -221,7 +253,7 @@ bool MergeTreeIndexAggregatorSet::buildFilter(
MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
{
auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index_name, index_sample_block, max_rows, std::move(columns));
auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index_name, index_sample_block, max_rows, std::move(columns), std::move(set_hyperrectangle));
switch (data.type)
{
@ -240,17 +272,22 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
return granule;
}
KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context)
{
return KeyCondition{filter_actions_dag, context, index.column_names, index.expression};
}
MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
const String & index_name_,
const Block & index_sample_block,
size_t max_rows_,
const ActionsDAGPtr & filter_dag,
ContextPtr context)
: index_name(index_name_)
ContextPtr context,
const IndexDescription & index_description)
: index_name(index_description.name)
, max_rows(max_rows_)
, index_data_types(index_description.data_types)
, condition(buildCondition(index_description, filter_dag, context))
{
for (const auto & name : index_sample_block.getNames())
for (const auto & name : index_description.sample_block.getNames())
if (!key_columns.contains(name))
key_columns.insert(name);
@ -293,6 +330,9 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
if (size == 0 || (max_rows != 0 && size > max_rows))
return true;
if (!condition.checkInHyperrectangle(granule.set_hyperrectangle, index_data_types).can_be_true)
return false;
Block result = granule.block;
actions->execute(result);
@ -546,7 +586,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge
MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition(
const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
{
return std::make_shared<MergeTreeIndexConditionSet>(index.name, index.sample_block, max_rows, filter_actions_dag, context);
return std::make_shared<MergeTreeIndexConditionSet>(max_rows, filter_actions_dag, context, index);
}
MergeTreeIndexPtr setIndexCreator(const IndexDescription & index)

View File

@ -22,7 +22,8 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
const String & index_name_,
const Block & index_sample_block_,
size_t max_rows_,
MutableColumns && columns_);
MutableColumns && columns_,
std::vector<Range> && set_hyperrectangle_);
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
@ -36,6 +37,7 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
const size_t max_rows;
Block block;
std::vector<Range> set_hyperrectangle;
};
@ -73,6 +75,7 @@ private:
ClearableSetVariants data;
Sizes key_sizes;
MutableColumns columns;
std::vector<Range> set_hyperrectangle;
};
@ -80,11 +83,10 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition
{
public:
MergeTreeIndexConditionSet(
const String & index_name_,
const Block & index_sample_block,
size_t max_rows_,
const ActionsDAGPtr & filter_dag,
ContextPtr context);
ContextPtr context,
const IndexDescription & index_description);
bool alwaysUnknownOrTrue() const override;
@ -119,6 +121,9 @@ private:
std::unordered_set<String> key_columns;
ExpressionActionsPtr actions;
String actions_output_column_name;
DataTypes index_data_types;
KeyCondition condition;
};

View File

@ -15,16 +15,11 @@
#include <Processors/QueryPlan/FilterStep.h>
#include <Common/logger_useful.h>
#include <Processors/Merges/Algorithms/MergeTreePartLevelInfo.h>
#include <Storages/MergeTree/checkDataPart.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
}
/// Lightweight (in terms of logic) stream for reading single part from
/// MergeTree, used for merges and mutations.
///
@ -281,7 +276,7 @@ try
catch (...)
{
/// Suspicion of the broken part. A part is added to the queue for verification.
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
if (!isRetryableException(std::current_exception()))
storage.reportBrokenPart(data_part);
throw;
}

View File

@ -36,11 +36,13 @@ namespace ErrorCodes
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int CANNOT_SCHEDULE_TASK;
extern const int UNEXPECTED_FILE_IN_DATA_PART;
extern const int NO_FILE_IN_DATA_PART;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int BROKEN_PROJECTION;
extern const int ABORTED;
}
@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr)
{
return isNotEnoughMemoryErrorCode(e.code())
|| e.code() == ErrorCodes::NETWORK_ERROR
|| e.code() == ErrorCodes::SOCKET_TIMEOUT;
|| e.code() == ErrorCodes::SOCKET_TIMEOUT
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|| e.code() == ErrorCodes::ABORTED;
}
catch (const Poco::Net::NetException &)
{
@ -329,16 +333,21 @@ static IMergeTreeDataPart::Checksums checkDataPart(
projections_on_disk.erase(projection_file);
}
if (throw_on_broken_projection && !broken_projections_message.empty())
if (throw_on_broken_projection)
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
if (!broken_projections_message.empty())
{
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
}
if (require_checksums && !projections_on_disk.empty())
{
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
"Found unexpected projection directories: {}",
fmt::join(projections_on_disk, ","));
/// This one is actually not broken, just redundant files on disk which
/// MergeTree will never use.
if (require_checksums && !projections_on_disk.empty())
{
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
"Found unexpected projection directories: {}",
fmt::join(projections_on_disk, ","));
}
}
if (is_cancelled())

View File

@ -163,7 +163,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
{
for (const auto & object_info : read_keys)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName());
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;
@ -221,7 +223,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
{
for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName());
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;

View File

@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
part_statistics[part_name] = stats;
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
return default_normal_cond_factor * rows;
@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(
return result * rows / part_rows;
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const
{
return rows - estimateLess(val, rows);
}
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
{
if (val < - threshold || val > threshold)
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val)
return default_unknown_cond_factor * rows;
else if (float_val.value() < - threshold || float_val.value() > threshold)
return default_normal_cond_factor * rows;
else
return default_good_cond_factor * rows;
@ -87,7 +90,7 @@ static std::pair<String, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNod
return result;
}
std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
std::pair<String, Field> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
{
if (!node.isFunction())
return {};
@ -123,48 +126,35 @@ std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const
DataTypePtr output_type;
if (!constant_node->tryGetConstant(output_value, output_type))
return {};
const auto type = output_value.getType();
Float64 value;
if (type == Field::Types::Int64)
value = output_value.get<Int64>();
else if (type == Field::Types::UInt64)
value = output_value.get<UInt64>();
else if (type == Field::Types::Float64)
value = output_value.get<Float64>();
else
return {};
return std::make_pair(function_name, value);
return std::make_pair(function_name, output_value);
}
Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const
{
auto result = tryToExtractSingleColumn(node);
if (result.second != 1)
{
return default_unknown_cond_factor;
}
return default_unknown_cond_factor * total_rows;
String col = result.first;
auto it = column_estimators.find(col);
/// If there the estimator of the column is not found or there are no data at all,
/// we use dummy estimation.
bool dummy = total_rows == 0;
bool dummy = false;
ColumnSelectivityEstimator estimator;
if (it != column_estimators.end())
{
estimator = it->second;
}
else
{
dummy = true;
}
auto [op, val] = extractBinaryOp(node, col);
if (op == "equals")
{
if (dummy)
{
if (val < - threshold || val > threshold)
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val || (float_val < - threshold || float_val > threshold))
return default_normal_cond_factor * total_rows;
else
return default_good_cond_factor * total_rows;
@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
return default_unknown_cond_factor * total_rows;
}
void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat)
void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat)
{
if (!part_names.contains(part_name))
{
total_rows += part_rows;
part_names.insert(part_name);
}
if (column_stat != nullptr)
column_estimators[column_stat->columnName()].merge(part_name, column_stat);
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Storages/Statistics/Statistics.h>
#include <Core/Field.h>
namespace DB
{
@ -10,6 +11,14 @@ class RPNBuilderTreeNode;
/// It estimates the selectivity of a condition.
class ConditionSelectivityEstimator
{
public:
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
/// Right now we only support simple condition like col = val / col < val
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
void merge(String part_name, ColumnStatisticsPtr column_stat);
void addRows(UInt64 part_rows) { total_rows += part_rows; }
private:
friend class ColumnStatistics;
struct ColumnSelectivityEstimator
@ -20,13 +29,15 @@ private:
void merge(String part_name, ColumnStatisticsPtr stats);
Float64 estimateLess(Float64 val, Float64 rows) const;
Float64 estimateLess(const Field & val, Float64 rows) const;
Float64 estimateGreater(Float64 val, Float64 rows) const;
Float64 estimateGreater(const Field & val, Float64 rows) const;
Float64 estimateEqual(Float64 val, Float64 rows) const;
Float64 estimateEqual(const Field & val, Float64 rows) const;
};
std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
static constexpr auto default_good_cond_factor = 0.1;
static constexpr auto default_normal_cond_factor = 0.5;
static constexpr auto default_unknown_cond_factor = 1.0;
@ -35,16 +46,7 @@ private:
static constexpr auto threshold = 2;
UInt64 total_rows = 0;
std::set<String> part_names;
std::map<String, ColumnSelectivityEstimator> column_estimators;
std::pair<String, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
public:
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
/// Right now we only support simple condition like col = val / col < val
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat);
};
}

View File

@ -1,15 +1,18 @@
#include <Storages/Statistics/Statistics.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Storages/Statistics/StatisticsUniq.h>
#include <Storages/StatisticsDescription.h>
#include <Storages/ColumnsDescription.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include "config.h" /// USE_DATASKETCHES
namespace DB
{
@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16
V0 = 0,
};
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
{
switch (field.getType())
{
case Field::Types::Int64:
return field.get<Int64>();
case Field::Types::UInt64:
return field.get<UInt64>();
case Field::Types::Float64:
return field.get<Float64>();
case Field::Types::Int128:
return field.get<Int128>();
case Field::Types::UInt128:
return field.get<UInt128>();
case Field::Types::Int256:
return field.get<Int256>();
case Field::Types::UInt256:
return field.get<UInt256>();
default:
return {};
}
}
std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
{
if (field.getType() == Field::Types::String)
return field.get<String>();
return {};
}
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
: stat(stat_)
{
@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateEqual(Float64 /*val*/) const
Float64 IStatistics::estimateEqual(const Field & /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
}
Float64 IStatistics::estimateLess(Float64 /*val*/) const
Float64 IStatistics::estimateLess(const Field & /*val*/) const
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
}
@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
/// object that supports it natively.
Float64 ColumnStatistics::estimateLess(Float64 val) const
Float64 ColumnStatistics::estimateLess(const Field & val) const
{
if (stats.contains(StatisticsType::TDigest))
return stats.at(StatisticsType::TDigest)->estimateLess(val);
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
}
Float64 ColumnStatistics::estimateGreater(Float64 val) const
Float64 ColumnStatistics::estimateGreater(const Field & val) const
{
return rows - estimateLess(val);
}
Float64 ColumnStatistics::estimateEqual(Float64 val) const
Float64 ColumnStatistics::estimateEqual(const Field & val) const
{
if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
{
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
return stats.at(StatisticsType::TDigest)->estimateEqual(val);
}
if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
#if USE_DATASKETCHES
if (stats.contains(StatisticsType::CountMinSketch))
return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
#endif
if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
else
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
{
registerValidator(StatisticsType::TDigest, TDigestValidator);
registerCreator(StatisticsType::TDigest, TDigestCreator);
registerValidator(StatisticsType::TDigest, tdigestValidator);
registerCreator(StatisticsType::TDigest, tdigestCreator);
registerValidator(StatisticsType::Uniq, UniqValidator);
registerCreator(StatisticsType::Uniq, UniqCreator);
registerValidator(StatisticsType::Uniq, uniqValidator);
registerCreator(StatisticsType::Uniq, uniqCreator);
#if USE_DATASKETCHES
registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
#endif
}
MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
{
auto it = creators.find(type);
if (it == creators.end())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
auto stat_ptr = (it->second)(desc, stats.data_type);
column_stat->stats[type] = stat_ptr;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Block.h>
#include <Core/Field.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
#include <Storages/StatisticsDescription.h>
@ -13,6 +14,14 @@ namespace DB
constexpr auto STATS_FILE_PREFIX = "statistics_";
constexpr auto STATS_FILE_SUFFIX = ".stats";
struct StatisticsUtils
{
/// Returns std::nullopt if input Field cannot be converted to a concrete value
static std::optional<Float64> tryConvertToFloat64(const Field & field);
static std::optional<String> tryConvertToString(const Field & field);
};
/// Statistics describe properties of the values in the column,
/// e.g. how many unique values exist,
/// what are the N most frequent values,
@ -34,8 +43,8 @@ public:
/// Per-value estimations.
/// Throws if the statistics object is not able to do a meaningful estimation.
virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column
virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column
virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column
protected:
SingleStatisticsDescription stat;
@ -58,9 +67,9 @@ public:
void update(const ColumnPtr & column);
Float64 estimateLess(Float64 val) const;
Float64 estimateGreater(Float64 val) const;
Float64 estimateEqual(Float64 val) const;
Float64 estimateLess(const Field & val) const;
Float64 estimateGreater(const Field & val) const;
Float64 estimateEqual(const Field & val) const;
private:
friend class MergeTreeStatisticsFactory;

View File

@ -0,0 +1,102 @@
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/convertFieldToType.h>
#if USE_DATASKETCHES
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_STATISTICS;
}
/// Constants chosen based on rolling dices.
/// The values provides:
/// 1. an error tolerance of 0.1% (ε = 0.001)
/// 2. a confidence level of 99.9% (δ = 0.001).
/// And sketch the size is 152kb.
static constexpr auto num_hashes = 7uz;
static constexpr auto num_buckets = 2718uz;
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
: IStatistics(stat_)
, sketch(num_hashes, num_buckets)
, data_type(data_type_)
{
}
Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
{
/// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc.
/// Return null if val larger than the range of data_type
///
/// For example: if data_type is Int32:
/// 1. For 1.0, 1, '1', return Field(1)
/// 2. For 1.1, max_value_int64, return null
Field val_converted = convertFieldToType(val, *data_type);
if (val_converted.isNull())
return 0;
if (data_type->isValueRepresentedByNumber())
return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory());
if (isStringOrFixedString(data_type))
return sketch.get_estimate(val.get<String>());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName());
}
void StatisticsCountMinSketch::update(const ColumnPtr & column)
{
for (size_t row = 0; row < column->size(); ++row)
{
if (column->isNullAt(row))
continue;
auto data = column->getDataAt(row);
sketch.update(data.data, data.size, 1);
}
}
void StatisticsCountMinSketch::serialize(WriteBuffer & buf)
{
Sketch::vector_bytes bytes = sketch.serialize();
writeIntBinary(static_cast<UInt64>(bytes.size()), buf);
buf.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
}
void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
{
UInt64 size;
readIntBinary(size, buf);
Sketch::vector_bytes bytes;
bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking
buf.readStrict(reinterpret_cast<char *>(bytes.data()), size);
sketch = Sketch::deserialize(bytes.data(), size);
}
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
}
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
}
}
#endif

View File

@ -0,0 +1,39 @@
#pragma once
#include <Storages/Statistics/Statistics.h>
#include "config.h"
#if USE_DATASKETCHES
#include <count_min.hpp>
namespace DB
{
class StatisticsCountMinSketch : public IStatistics
{
public:
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
Float64 estimateEqual(const Field & val) const override;
void update(const ColumnPtr & column) override;
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
private:
using Sketch = datasketches::count_min_sketch<UInt64>;
Sketch sketch;
DataTypePtr data_type;
};
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}
#endif

View File

@ -1,11 +1,13 @@
#include <Storages/Statistics/StatisticsTDigest.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_STATISTICS;
extern const int ILLEGAL_STATISTICS;
extern const int LOGICAL_ERROR;
}
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
void StatisticsTDigest::update(const ColumnPtr & column)
{
size_t rows = column->size();
for (size_t row = 0; row < rows; ++row)
{
/// TODO: support more types.
Float64 value = column->getFloat64(row);
t_digest.add(value, 1);
Field field;
column->get(row, field);
if (field.isNull())
continue;
if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
t_digest.add(*field_as_float, 1);
}
}
@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
t_digest.deserialize(buf);
}
Float64 StatisticsTDigest::estimateLess(Float64 val) const
Float64 StatisticsTDigest::estimateLess(const Field & val) const
{
return t_digest.getCountLessThan(val);
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountLessThan(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
Float64 StatisticsTDigest::estimateEqual(Float64 val) const
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
{
return t_digest.getCountEqual(val);
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountEqual(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
}
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
{
return std::make_shared<StatisticsTDigest>(stat);
}

View File

@ -16,14 +16,14 @@ public:
void serialize(WriteBuffer & buf) override;
void deserialize(ReadBuffer & buf) override;
Float64 estimateLess(Float64 val) const override;
Float64 estimateEqual(Float64 val) const override;
Float64 estimateLess(const Field & val) const override;
Float64 estimateEqual(const Field & val) const override;
private:
QuantileTDigest<Float64> t_digest;
};
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
}

View File

@ -1,6 +1,7 @@
#include <Storages/Statistics/StatisticsUniq.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
{
@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const
return column->getUInt(0);
}
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
}
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
{
return std::make_shared<StatisticsUniq>(stat, data_type);
}

View File

@ -27,7 +27,7 @@ private:
};
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
}

View File

@ -1,6 +1,10 @@
#include <gtest/gtest.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Interpreters/convertFieldToType.h>
#include <DataTypes/DataTypeFactory.h>
using namespace DB;
TEST(Statistics, TDigestLessThan)
{
@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan)
std::reverse(data.begin(), data.end());
test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
}

Some files were not shown because too many files have changed in this diff Show More