mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge remote-tracking branch 'blessed/master' into groupArrayIntersect
This commit is contained in:
commit
0c0e2d0ffb
11
.github/workflows/backport_branches.yml
vendored
11
.github/workflows/backport_branches.yml
vendored
@ -241,8 +241,9 @@ jobs:
|
||||
runner_type: stress-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FinishCheck:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- Builds_Report
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatefulTestDebug
|
||||
@ -257,6 +258,7 @@ jobs:
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Finish label
|
||||
if: ${{ !failure() }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# update mergeable check
|
||||
@ -264,3 +266,10 @@ jobs:
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
35
.github/workflows/master.yml
vendored
35
.github/workflows/master.yml
vendored
@ -121,34 +121,6 @@ jobs:
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
|
||||
MarkReleaseReady:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs: [RunConfig, Builds_1, Builds_2]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Debug
|
||||
run: |
|
||||
echo need with different filters
|
||||
cat << 'EOF'
|
||||
${{ toJSON(needs) }}
|
||||
${{ toJSON(needs.*.result) }}
|
||||
no failures ${{ !contains(needs.*.result, 'failure') }}
|
||||
no skips ${{ !contains(needs.*.result, 'skipped') }}
|
||||
no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
|
||||
EOF
|
||||
- name: Not ready
|
||||
# fail the job to be able to restart it
|
||||
if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
|
||||
run: exit 1
|
||||
- name: Check out repository code
|
||||
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Mark Commit Release Ready
|
||||
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 mark_release_ready.py
|
||||
|
||||
FinishCheck:
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
|
||||
@ -160,3 +132,10 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
10
.github/workflows/merge_queue.yml
vendored
10
.github/workflows/merge_queue.yml
vendored
@ -93,7 +93,7 @@ jobs:
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
|
||||
CheckReadyForMerge:
|
||||
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
|
||||
if: ${{ !cancelled() }}
|
||||
# Test_2 or Test_3 must not have jobs required for Mergeable check
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
@ -101,6 +101,14 @@ jobs:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Check and set merge status
|
||||
if: ${{ needs.StyleCheck.result == 'success' }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
14
.github/workflows/nightly.yml
vendored
14
.github/workflows/nightly.yml
vendored
@ -44,3 +44,17 @@ jobs:
|
||||
with:
|
||||
data: "${{ needs.RunConfig.outputs.data }}"
|
||||
set_latest: true
|
||||
CheckWorkflow:
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [RunConfig, BuildDockers]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
15
.github/workflows/pull_request.yml
vendored
15
.github/workflows/pull_request.yml
vendored
@ -151,9 +151,10 @@ jobs:
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
|
||||
CheckReadyForMerge:
|
||||
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
|
||||
# Test_2 or Test_3 must not have jobs required for Mergeable check
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1]
|
||||
if: ${{ !cancelled() }}
|
||||
# Test_2 or Test_3 do not have the jobs required for Mergeable check,
|
||||
# however, set them as "needs" to get all checks results before the automatic merge occurs.
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@ -161,9 +162,17 @@ jobs:
|
||||
with:
|
||||
filter: tree:0
|
||||
- name: Check and set merge status
|
||||
if: ${{ needs.StyleCheck.result == 'success' }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
||||
################################# Stage Final #################################
|
||||
#
|
||||
|
12
.github/workflows/release_branches.yml
vendored
12
.github/workflows/release_branches.yml
vendored
@ -441,8 +441,9 @@ jobs:
|
||||
runner_type: stress-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FinishCheck:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- DockerServerImage
|
||||
- DockerKeeperImage
|
||||
- Builds_Report
|
||||
@ -478,9 +479,18 @@ jobs:
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Finish label
|
||||
if: ${{ !failure() }}
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# update mergeable check
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
- name: Check Workflow results
|
||||
run: |
|
||||
export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json"
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
@ -14,3 +14,9 @@ rules:
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
document-start: disable
|
||||
colons: disable
|
||||
indentation: disable
|
||||
line-length: disable
|
||||
trailing-spaces: disable
|
||||
truthy: disable
|
||||
new-line-at-end-of-file: disable
|
||||
|
@ -2,11 +2,11 @@
|
||||
|
||||
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
|
||||
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
|
||||
SET(VERSION_REVISION 54488)
|
||||
SET(VERSION_REVISION 54489)
|
||||
SET(VERSION_MAJOR 24)
|
||||
SET(VERSION_MINOR 7)
|
||||
SET(VERSION_MINOR 8)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9)
|
||||
SET(VERSION_DESCRIBE v24.7.1.1-testing)
|
||||
SET(VERSION_STRING 24.7.1.1)
|
||||
SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
|
||||
SET(VERSION_DESCRIBE v24.8.1.1-testing)
|
||||
SET(VERSION_STRING 24.8.1.1)
|
||||
# end of autochange
|
||||
|
@ -9,6 +9,7 @@ set(DATASKETCHES_LIBRARY theta)
|
||||
add_library(_datasketches INTERFACE)
|
||||
target_include_directories(_datasketches SYSTEM BEFORE INTERFACE
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/common/include"
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/count/include"
|
||||
"${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/include")
|
||||
|
||||
add_library(ch_contrib::datasketches ALIAS _datasketches)
|
||||
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2
|
||||
Subproject commit fe854449e24bedfa26e38465b84374312dbd587f
|
@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
RUN apt-get update --yes \
|
||||
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends \
|
||||
&& env DEBIAN_FRONTEND=noninteractive apt-get install wget git python3 default-jdk maven --yes --no-install-recommends \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
|
@ -191,8 +191,8 @@ else
|
||||
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
|
||||
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
|
||||
else
|
||||
@ -200,7 +200,7 @@ else
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
fi
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
fi
|
||||
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
@ -209,9 +209,9 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat
|
||||
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
|
||||
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
|
||||
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
|
||||
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
|
||||
|
@ -999,6 +999,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
|
||||
|
||||
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
|
||||
|
||||
- `count_min`
|
||||
|
||||
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
Certain MergeTree settings can be override at column level:
|
||||
|
@ -49,7 +49,7 @@ enum class QueryTreeNodeType : uint8_t
|
||||
/// Convert query tree node type to string
|
||||
const char * toString(QueryTreeNodeType type);
|
||||
|
||||
/** Query tree is semantical representation of query.
|
||||
/** Query tree is a semantic representation of query.
|
||||
* Query tree node represent node in query tree.
|
||||
* IQueryTreeNode is base class for all query tree nodes.
|
||||
*
|
||||
|
@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS
|
||||
auto new_query = query.clone();
|
||||
adjustCreateQueryForBackup(new_query, global_context);
|
||||
ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*new_query);
|
||||
create.setUUID({});
|
||||
create.resetUUIDs();
|
||||
create.if_not_exists = false;
|
||||
return new_query;
|
||||
};
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Backups/RestoreCoordinationLocal.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
|
||||
auto it = create_query_uuids.find(query_str);
|
||||
if (it != create_query_uuids.end())
|
||||
{
|
||||
create_query.setUUID(it->second);
|
||||
it->second.copyToQuery(create_query);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true);
|
||||
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
|
||||
new_uuids.copyToQuery(create_query);
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
|
@ -1,16 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/CreateQueryUUIDs.h>
|
||||
#include <Common/Logger.h>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace Poco { class Logger; }
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ASTCreateQuery;
|
||||
|
||||
|
||||
/// Implementation of the IRestoreCoordination interface performing coordination in memory.
|
||||
class RestoreCoordinationLocal : public IRestoreCoordination
|
||||
@ -55,7 +56,7 @@ private:
|
||||
|
||||
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
|
||||
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
|
||||
std::unordered_map<String, ASTCreateQuery::UUIDs> create_query_uuids;
|
||||
std::unordered_map<String, CreateQueryUUIDs> create_query_uuids;
|
||||
std::unordered_set<String /* root_zk_path */> acquired_data_in_keeper_map_tables;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Backups/RestoreCoordinationRemote.h>
|
||||
#include <Backups/BackupCoordinationStageSync.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/CreateQueryUUIDs.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String &
|
||||
void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
|
||||
{
|
||||
String query_str = serializeAST(create_query);
|
||||
String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString();
|
||||
CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true};
|
||||
String new_uuids_str = new_uuids.toString();
|
||||
|
||||
auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable");
|
||||
holder.retries_ctl.retryLoop(
|
||||
@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que
|
||||
Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent);
|
||||
|
||||
if (res == Coordination::Error::ZOK)
|
||||
{
|
||||
new_uuids.copyToQuery(create_query);
|
||||
return;
|
||||
}
|
||||
|
||||
if (res == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path)));
|
||||
CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -543,7 +543,7 @@ if (TARGET ch_contrib::libpqxx)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::datasketches)
|
||||
target_link_libraries (clickhouse_aggregate_functions PRIVATE ch_contrib::datasketches)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::datasketches)
|
||||
endif ()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4)
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <base/cgroupsv2.h>
|
||||
#include <base/getMemoryAmount.h>
|
||||
#include <base/sleep.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
@ -45,26 +46,33 @@ namespace
|
||||
/// kernel 5
|
||||
/// rss 15
|
||||
/// [...]
|
||||
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
|
||||
using Metrics = std::map<std::string, uint64_t>;
|
||||
|
||||
Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf)
|
||||
{
|
||||
Metrics metrics;
|
||||
while (!buf.eof())
|
||||
{
|
||||
std::string current_key;
|
||||
readStringUntilWhitespace(current_key, buf);
|
||||
if (current_key != key)
|
||||
{
|
||||
std::string dummy;
|
||||
readStringUntilNewlineInto(dummy, buf);
|
||||
buf.ignore();
|
||||
continue;
|
||||
}
|
||||
|
||||
assertChar(' ', buf);
|
||||
|
||||
uint64_t value = 0;
|
||||
readIntText(value, buf);
|
||||
return value;
|
||||
}
|
||||
assertChar('\n', buf);
|
||||
|
||||
auto [_, inserted] = metrics.emplace(std::move(current_key), value);
|
||||
chassert(inserted, "Duplicate keys in stat file");
|
||||
}
|
||||
return metrics;
|
||||
}
|
||||
|
||||
uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key)
|
||||
{
|
||||
const auto all_metrics = readAllMetricsFromStatFile(buf);
|
||||
if (const auto it = all_metrics.find(key); it != all_metrics.end())
|
||||
return it->second;
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName());
|
||||
}
|
||||
|
||||
@ -79,6 +87,13 @@ struct CgroupsV1Reader : ICgroupsReader
|
||||
return readMetricFromStatFile(buf, "rss");
|
||||
}
|
||||
|
||||
std::string dumpAllStats() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
buf.rewind();
|
||||
return fmt::format("{}", readAllMetricsFromStatFile(buf));
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
ReadBufferFromFile buf TSA_GUARDED_BY(mutex);
|
||||
@ -106,6 +121,13 @@ struct CgroupsV2Reader : ICgroupsReader
|
||||
return mem_usage;
|
||||
}
|
||||
|
||||
std::string dumpAllStats() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
stat_buf.rewind();
|
||||
return fmt::format("{}", readAllMetricsFromStatFile(stat_buf));
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex);
|
||||
@ -178,10 +200,7 @@ CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait
|
||||
{
|
||||
const auto [cgroup_path, version] = getCgroupsPath();
|
||||
|
||||
if (version == CgroupsVersion::V2)
|
||||
cgroup_reader = std::make_unique<CgroupsV2Reader>(cgroup_path);
|
||||
else
|
||||
cgroup_reader = std::make_unique<CgroupsV1Reader>(cgroup_path);
|
||||
cgroup_reader = createCgroupsReader(version, cgroup_path);
|
||||
|
||||
LOG_INFO(
|
||||
log,
|
||||
@ -234,7 +253,12 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint
|
||||
# endif
|
||||
/// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them.
|
||||
uint64_t memory_usage = cgroup_reader->readMemoryUsage();
|
||||
LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage));
|
||||
LOG_TRACE(
|
||||
log,
|
||||
"Read current memory usage {} bytes ({}) from cgroups, full available stats: {}",
|
||||
memory_usage,
|
||||
ReadableSize(memory_usage),
|
||||
cgroup_reader->dumpAllStats());
|
||||
MemoryTracker::setRSS(memory_usage, 0);
|
||||
|
||||
LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage));
|
||||
@ -338,6 +362,13 @@ void CgroupsMemoryUsageObserver::runThread()
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ICgroupsReader> createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const fs::path & cgroup_path)
|
||||
{
|
||||
if (version == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
|
||||
return std::make_unique<CgroupsV2Reader>(cgroup_path);
|
||||
else
|
||||
return std::make_unique<CgroupsV1Reader>(cgroup_path);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,8 @@ struct ICgroupsReader
|
||||
virtual ~ICgroupsReader() = default;
|
||||
|
||||
virtual uint64_t readMemoryUsage() = 0;
|
||||
|
||||
virtual std::string dumpAllStats() = 0;
|
||||
};
|
||||
|
||||
/// Does two things:
|
||||
@ -81,6 +83,9 @@ private:
|
||||
bool quit = false;
|
||||
};
|
||||
|
||||
std::unique_ptr<ICgroupsReader>
|
||||
createCgroupsReader(CgroupsMemoryUsageObserver::CgroupsVersion version, const std::filesystem::path & cgroup_path);
|
||||
|
||||
#else
|
||||
class CgroupsMemoryUsageObserver
|
||||
{
|
||||
|
@ -57,7 +57,8 @@ static struct InitFiu
|
||||
PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \
|
||||
PAUSEABLE(dummy_pausable_failpoint) \
|
||||
ONCE(execute_query_calling_empty_set_result_func_on_exception) \
|
||||
ONCE(receive_timeout_on_table_status_response)
|
||||
ONCE(receive_timeout_on_table_status_response) \
|
||||
REGULAR(keepermap_fail_drop_data) \
|
||||
|
||||
|
||||
namespace FailPoints
|
||||
|
178
src/Common/tests/gtest_cgroups_reader.cpp
Normal file
178
src/Common/tests/gtest_cgroups_reader.cpp
Normal file
@ -0,0 +1,178 @@
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <Common/CgroupsMemoryUsageObserver.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
|
||||
const std::string SAMPLE_FILE[2] = {
|
||||
R"(cache 4673703936
|
||||
rss 2232029184
|
||||
rss_huge 0
|
||||
shmem 0
|
||||
mapped_file 344678400
|
||||
dirty 4730880
|
||||
writeback 135168
|
||||
swap 0
|
||||
pgpgin 2038569918
|
||||
pgpgout 2036883790
|
||||
pgfault 2055373287
|
||||
pgmajfault 0
|
||||
inactive_anon 2156335104
|
||||
active_anon 0
|
||||
inactive_file 2841305088
|
||||
active_file 1653915648
|
||||
unevictable 256008192
|
||||
hierarchical_memory_limit 8589934592
|
||||
hierarchical_memsw_limit 8589934592
|
||||
total_cache 4673703936
|
||||
total_rss 2232029184
|
||||
total_rss_huge 0
|
||||
total_shmem 0
|
||||
total_mapped_file 344678400
|
||||
total_dirty 4730880
|
||||
total_writeback 135168
|
||||
total_swap 0
|
||||
total_pgpgin 2038569918
|
||||
total_pgpgout 2036883790
|
||||
total_pgfault 2055373287
|
||||
total_pgmajfault 0
|
||||
total_inactive_anon 2156335104
|
||||
total_active_anon 0
|
||||
total_inactive_file 2841305088
|
||||
total_active_file 1653915648
|
||||
total_unevictable 256008192
|
||||
)",
|
||||
R"(anon 10429399040
|
||||
file 17410793472
|
||||
kernel 1537789952
|
||||
kernel_stack 3833856
|
||||
pagetables 65441792
|
||||
sec_pagetables 0
|
||||
percpu 15232
|
||||
sock 0
|
||||
vmalloc 0
|
||||
shmem 0
|
||||
zswap 0
|
||||
zswapped 0
|
||||
file_mapped 344010752
|
||||
file_dirty 2060857344
|
||||
file_writeback 0
|
||||
swapcached 0
|
||||
anon_thp 0
|
||||
file_thp 0
|
||||
shmem_thp 0
|
||||
inactive_anon 0
|
||||
active_anon 10429370368
|
||||
inactive_file 8693084160
|
||||
active_file 8717561856
|
||||
unevictable 0
|
||||
slab_reclaimable 1460982504
|
||||
slab_unreclaimable 5152864
|
||||
slab 1466135368
|
||||
workingset_refault_anon 0
|
||||
workingset_refault_file 0
|
||||
workingset_activate_anon 0
|
||||
workingset_activate_file 0
|
||||
workingset_restore_anon 0
|
||||
workingset_restore_file 0
|
||||
workingset_nodereclaim 0
|
||||
pgscan 0
|
||||
pgsteal 0
|
||||
pgscan_kswapd 0
|
||||
pgscan_direct 0
|
||||
pgscan_khugepaged 0
|
||||
pgsteal_kswapd 0
|
||||
pgsteal_direct 0
|
||||
pgsteal_khugepaged 0
|
||||
pgfault 43026352
|
||||
pgmajfault 36762
|
||||
pgrefill 0
|
||||
pgactivate 0
|
||||
pgdeactivate 0
|
||||
pglazyfree 259
|
||||
pglazyfreed 0
|
||||
zswpin 0
|
||||
zswpout 0
|
||||
thp_fault_alloc 0
|
||||
thp_collapse_alloc 0
|
||||
)"};
|
||||
|
||||
const std::string EXPECTED[2]
|
||||
= {"{\"active_anon\": 0, \"active_file\": 1653915648, \"cache\": 4673703936, \"dirty\": 4730880, \"hierarchical_memory_limit\": "
|
||||
"8589934592, \"hierarchical_memsw_limit\": 8589934592, \"inactive_anon\": 2156335104, \"inactive_file\": 2841305088, "
|
||||
"\"mapped_file\": 344678400, \"pgfault\": 2055373287, \"pgmajfault\": 0, \"pgpgin\": 2038569918, \"pgpgout\": 2036883790, \"rss\": "
|
||||
"2232029184, \"rss_huge\": 0, \"shmem\": 0, \"swap\": 0, \"total_active_anon\": 0, \"total_active_file\": 1653915648, "
|
||||
"\"total_cache\": 4673703936, \"total_dirty\": 4730880, \"total_inactive_anon\": 2156335104, \"total_inactive_file\": 2841305088, "
|
||||
"\"total_mapped_file\": 344678400, \"total_pgfault\": 2055373287, \"total_pgmajfault\": 0, \"total_pgpgin\": 2038569918, "
|
||||
"\"total_pgpgout\": 2036883790, \"total_rss\": 2232029184, \"total_rss_huge\": 0, \"total_shmem\": 0, \"total_swap\": 0, "
|
||||
"\"total_unevictable\": 256008192, \"total_writeback\": 135168, \"unevictable\": 256008192, \"writeback\": 135168}",
|
||||
"{\"active_anon\": 10429370368, \"active_file\": 8717561856, \"anon\": 10429399040, \"anon_thp\": 0, \"file\": 17410793472, "
|
||||
"\"file_dirty\": 2060857344, \"file_mapped\": 344010752, \"file_thp\": 0, \"file_writeback\": 0, \"inactive_anon\": 0, "
|
||||
"\"inactive_file\": 8693084160, \"kernel\": 1537789952, \"kernel_stack\": 3833856, \"pagetables\": 65441792, \"percpu\": 15232, "
|
||||
"\"pgactivate\": 0, \"pgdeactivate\": 0, \"pgfault\": 43026352, \"pglazyfree\": 259, \"pglazyfreed\": 0, \"pgmajfault\": 36762, "
|
||||
"\"pgrefill\": 0, \"pgscan\": 0, \"pgscan_direct\": 0, \"pgscan_khugepaged\": 0, \"pgscan_kswapd\": 0, \"pgsteal\": 0, "
|
||||
"\"pgsteal_direct\": 0, \"pgsteal_khugepaged\": 0, \"pgsteal_kswapd\": 0, \"sec_pagetables\": 0, \"shmem\": 0, \"shmem_thp\": 0, "
|
||||
"\"slab\": 1466135368, \"slab_reclaimable\": 1460982504, \"slab_unreclaimable\": 5152864, \"sock\": 0, \"swapcached\": 0, "
|
||||
"\"thp_collapse_alloc\": 0, \"thp_fault_alloc\": 0, \"unevictable\": 0, \"vmalloc\": 0, \"workingset_activate_anon\": 0, "
|
||||
"\"workingset_activate_file\": 0, \"workingset_nodereclaim\": 0, \"workingset_refault_anon\": 0, \"workingset_refault_file\": 0, "
|
||||
"\"workingset_restore_anon\": 0, \"workingset_restore_file\": 0, \"zswap\": 0, \"zswapped\": 0, \"zswpin\": 0, \"zswpout\": 0}"};
|
||||
|
||||
|
||||
class CgroupsMemoryUsageObserverFixture : public ::testing::TestWithParam<CgroupsMemoryUsageObserver::CgroupsVersion>
|
||||
{
|
||||
void SetUp() override
|
||||
{
|
||||
const uint8_t version = static_cast<uint8_t>(GetParam());
|
||||
tmp_dir = fmt::format("./test_cgroups_{}", magic_enum::enum_name(GetParam()));
|
||||
fs::create_directories(tmp_dir);
|
||||
|
||||
auto stat_file = WriteBufferFromFile(tmp_dir + "/memory.stat");
|
||||
stat_file.write(SAMPLE_FILE[version].data(), SAMPLE_FILE[version].size());
|
||||
stat_file.sync();
|
||||
|
||||
if (GetParam() == CgroupsMemoryUsageObserver::CgroupsVersion::V2)
|
||||
{
|
||||
auto current_file = WriteBufferFromFile(tmp_dir + "/memory.current");
|
||||
current_file.write("29645422592", 11);
|
||||
current_file.sync();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string tmp_dir;
|
||||
};
|
||||
|
||||
|
||||
TEST_P(CgroupsMemoryUsageObserverFixture, ReadMemoryUsageTest)
|
||||
{
|
||||
const auto version = GetParam();
|
||||
auto reader = createCgroupsReader(version, tmp_dir);
|
||||
ASSERT_EQ(
|
||||
reader->readMemoryUsage(),
|
||||
version == CgroupsMemoryUsageObserver::CgroupsVersion::V1 ? /* rss from memory.stat */ 2232029184
|
||||
: /* value from memory.current - inactive_file */ 20952338432);
|
||||
}
|
||||
|
||||
|
||||
TEST_P(CgroupsMemoryUsageObserverFixture, DumpAllStatsTest)
|
||||
{
|
||||
const auto version = GetParam();
|
||||
auto reader = createCgroupsReader(version, tmp_dir);
|
||||
ASSERT_EQ(reader->dumpAllStats(), EXPECTED[static_cast<uint8_t>(version)]);
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
CgroupsMemoryUsageObserverTests,
|
||||
CgroupsMemoryUsageObserverFixture,
|
||||
::testing::Values(CgroupsMemoryUsageObserver::CgroupsVersion::V1, CgroupsMemoryUsageObserver::CgroupsVersion::V2));
|
||||
|
||||
#endif
|
@ -33,7 +33,7 @@ size_t toMilliseconds(auto duration)
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
|
||||
}
|
||||
|
||||
const auto epsilon = 500us;
|
||||
const auto epsilon = 1ms;
|
||||
|
||||
class ResolvePoolMock : public DB::HostResolver
|
||||
{
|
||||
@ -358,53 +358,59 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses
|
||||
|
||||
TEST_F(ResolvePoolTest, BannedForConsiquenceFail)
|
||||
{
|
||||
auto history = 5ms;
|
||||
auto history = 10ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
|
||||
auto start_at = now();
|
||||
|
||||
failed_addr.setFail();
|
||||
auto start_at = now();
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
start_at = now();
|
||||
|
||||
resolver->update();
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
|
||||
|
||||
failed_addr.setFail();
|
||||
start_at = now();
|
||||
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
start_at = now();
|
||||
|
||||
resolver->update();
|
||||
|
||||
// too much time has passed
|
||||
if (now() > start_at + 2*history - epsilon)
|
||||
return;
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
|
||||
// ip still banned adter history_ms + update, because it was his second consiquent fail
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + 2*history - epsilon);
|
||||
}
|
||||
|
||||
TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
|
||||
{
|
||||
auto history = 5ms;
|
||||
auto history = 10ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
|
||||
auto start_at = now();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
auto start_at = now();
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
@ -413,6 +419,7 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
|
||||
sleep_until(start_at + history + epsilon);
|
||||
|
||||
resolver->update();
|
||||
|
||||
// ip is cleared after just 1 history_ms interval.
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
|
||||
|
@ -383,7 +383,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
|
||||
LockMemoryExceptionInThread::removeUniqueLock();
|
||||
};
|
||||
|
||||
asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores();
|
||||
/// At least 16 threads for network communication in asio.
|
||||
/// asio is async framework, so even with 1 thread it should be ok, but
|
||||
/// still as safeguard it's better to have some redundant capacity here
|
||||
asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores());
|
||||
|
||||
if (state_manager->isSecure())
|
||||
{
|
||||
|
@ -125,23 +125,6 @@ DataTypePtr DataTypeFactory::getImpl(const String & family_name_param, const AST
|
||||
{
|
||||
String family_name = getAliasToOrName(family_name_param);
|
||||
|
||||
if (endsWith(family_name, "WithDictionary"))
|
||||
{
|
||||
ASTPtr low_cardinality_params = std::make_shared<ASTExpressionList>();
|
||||
String param_name = family_name.substr(0, family_name.size() - strlen("WithDictionary"));
|
||||
if (parameters)
|
||||
{
|
||||
auto func = std::make_shared<ASTFunction>();
|
||||
func->name = param_name;
|
||||
func->arguments = parameters;
|
||||
low_cardinality_params->children.push_back(func);
|
||||
}
|
||||
else
|
||||
low_cardinality_params->children.push_back(std::make_shared<ASTIdentifier>(param_name));
|
||||
|
||||
return getImpl<nullptr_on_error>("LowCardinality", low_cardinality_params);
|
||||
}
|
||||
|
||||
const auto * creator = findCreatorByName<nullptr_on_error>(family_name);
|
||||
if constexpr (nullptr_on_error)
|
||||
{
|
||||
|
@ -80,13 +80,20 @@ namespace
|
||||
/// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
|
||||
void visitCreateQuery(const ASTCreateQuery & create)
|
||||
{
|
||||
QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name};
|
||||
if (!to_table.table.empty())
|
||||
if (create.targets)
|
||||
{
|
||||
/// TO target_table (for materialized views)
|
||||
if (to_table.database.empty())
|
||||
to_table.database = current_database;
|
||||
dependencies.emplace(to_table);
|
||||
for (const auto & target : create.targets->targets)
|
||||
{
|
||||
const auto & table_id = target.table_id;
|
||||
if (!table_id.table_name.empty())
|
||||
{
|
||||
/// TO target_table (for materialized views)
|
||||
QualifiedTableName target_name{table_id.database_name, table_id.table_name};
|
||||
if (target_name.database.empty())
|
||||
target_name.database = current_database;
|
||||
dependencies.emplace(target_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
QualifiedTableName as_table{create.as_database, create.as_table};
|
||||
|
@ -86,12 +86,19 @@ namespace
|
||||
create.as_table = as_table_new.table;
|
||||
}
|
||||
|
||||
QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name};
|
||||
if (!to_table.table.empty() && !to_table.database.empty())
|
||||
if (create.targets)
|
||||
{
|
||||
auto to_table_new = data.renaming_map.getNewTableName(to_table);
|
||||
if (to_table_new != to_table)
|
||||
create.to_table_id = StorageID{to_table_new.database, to_table_new.table};
|
||||
for (auto & target : create.targets->targets)
|
||||
{
|
||||
auto & table_id = target.table_id;
|
||||
if (!table_id.database_name.empty() && !table_id.table_name.empty())
|
||||
{
|
||||
QualifiedTableName target_name{table_id.database_name, table_id.table_name};
|
||||
auto new_target_name = data.renaming_map.getNewTableName(target_name);
|
||||
if (new_target_name != target_name)
|
||||
table_id = StorageID{new_target_name.database, new_target_name.table};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -729,81 +729,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
|
||||
|
||||
if (auto * create = query->as<ASTCreateQuery>())
|
||||
{
|
||||
bool replicated_table = create->storage && create->storage->engine &&
|
||||
(startsWith(create->storage->engine->name, "Replicated") || startsWith(create->storage->engine->name, "Shared"));
|
||||
if (!replicated_table || !create->storage->engine->arguments)
|
||||
return;
|
||||
if (create->storage)
|
||||
checkTableEngine(*create, *create->storage, query_context);
|
||||
|
||||
ASTs & args_ref = create->storage->engine->arguments->children;
|
||||
ASTs args = args_ref;
|
||||
if (args.size() < 2)
|
||||
return;
|
||||
|
||||
/// It can be a constant expression. Try to evaluate it, ignore exception if we cannot.
|
||||
bool has_expression_argument = args_ref[0]->as<ASTFunction>() || args_ref[1]->as<ASTFunction>();
|
||||
if (has_expression_argument)
|
||||
if (create->targets)
|
||||
{
|
||||
try
|
||||
{
|
||||
args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context);
|
||||
args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context);
|
||||
}
|
||||
catch (...) // NOLINT(bugprone-empty-catch)
|
||||
{
|
||||
}
|
||||
for (const auto & inner_table_engine : create->targets->getInnerEngines())
|
||||
checkTableEngine(*create, *inner_table_engine, query_context);
|
||||
}
|
||||
|
||||
ASTLiteral * arg1 = args[0]->as<ASTLiteral>();
|
||||
ASTLiteral * arg2 = args[1]->as<ASTLiteral>();
|
||||
if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String)
|
||||
return;
|
||||
|
||||
String maybe_path = arg1->value.get<String>();
|
||||
String maybe_replica = arg2->value.get<String>();
|
||||
|
||||
/// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments.
|
||||
/// Let's ensure that some macros are used.
|
||||
/// NOTE: we cannot check here that substituted values will be actually different on shards and replicas.
|
||||
|
||||
Macros::MacroExpansionInfo info;
|
||||
info.table_id = {getDatabaseName(), create->getTable(), create->uuid};
|
||||
info.shard = getShardName();
|
||||
info.replica = getReplicaName();
|
||||
query_context->getMacros()->expand(maybe_path, info);
|
||||
bool maybe_shard_macros = info.expanded_other;
|
||||
info.expanded_other = false;
|
||||
query_context->getMacros()->expand(maybe_replica, info);
|
||||
bool maybe_replica_macros = info.expanded_other;
|
||||
bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros");
|
||||
|
||||
if (!enable_functional_tests_helper)
|
||||
{
|
||||
if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments)
|
||||
LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments");
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"It's not allowed to specify explicit zookeeper_path and replica_name "
|
||||
"for ReplicatedMergeTree arguments in Replicated database. If you really want to "
|
||||
"specify them explicitly, enable setting "
|
||||
"database_replicated_allow_replicated_engine_arguments.");
|
||||
}
|
||||
|
||||
if (maybe_shard_macros && maybe_replica_macros)
|
||||
return;
|
||||
|
||||
if (enable_functional_tests_helper && !has_expression_argument)
|
||||
{
|
||||
if (maybe_path.empty() || maybe_path.back() != '/')
|
||||
maybe_path += '/';
|
||||
args_ref[0]->as<ASTLiteral>()->value = maybe_path + "auto_{shard}";
|
||||
args_ref[1]->as<ASTLiteral>()->value = maybe_replica + "auto_{replica}";
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. "
|
||||
"If you really want to specify it explicitly, then you should use some macros "
|
||||
"to distinguish different shards and replicas");
|
||||
}
|
||||
}
|
||||
|
||||
@ -827,6 +760,85 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const
|
||||
{
|
||||
bool replicated_table = storage.engine &&
|
||||
(startsWith(storage.engine->name, "Replicated") || startsWith(storage.engine->name, "Shared"));
|
||||
if (!replicated_table || !storage.engine->arguments)
|
||||
return;
|
||||
|
||||
ASTs & args_ref = storage.engine->arguments->children;
|
||||
ASTs args = args_ref;
|
||||
if (args.size() < 2)
|
||||
return;
|
||||
|
||||
/// It can be a constant expression. Try to evaluate it, ignore exception if we cannot.
|
||||
bool has_expression_argument = args_ref[0]->as<ASTFunction>() || args_ref[1]->as<ASTFunction>();
|
||||
if (has_expression_argument)
|
||||
{
|
||||
try
|
||||
{
|
||||
args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context);
|
||||
args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context);
|
||||
}
|
||||
catch (...) // NOLINT(bugprone-empty-catch)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
ASTLiteral * arg1 = args[0]->as<ASTLiteral>();
|
||||
ASTLiteral * arg2 = args[1]->as<ASTLiteral>();
|
||||
if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String)
|
||||
return;
|
||||
|
||||
String maybe_path = arg1->value.get<String>();
|
||||
String maybe_replica = arg2->value.get<String>();
|
||||
|
||||
/// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments.
|
||||
/// Let's ensure that some macros are used.
|
||||
/// NOTE: we cannot check here that substituted values will be actually different on shards and replicas.
|
||||
|
||||
Macros::MacroExpansionInfo info;
|
||||
info.table_id = {getDatabaseName(), query.getTable(), query.uuid};
|
||||
info.shard = getShardName();
|
||||
info.replica = getReplicaName();
|
||||
query_context->getMacros()->expand(maybe_path, info);
|
||||
bool maybe_shard_macros = info.expanded_other;
|
||||
info.expanded_other = false;
|
||||
query_context->getMacros()->expand(maybe_replica, info);
|
||||
bool maybe_replica_macros = info.expanded_other;
|
||||
bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros");
|
||||
|
||||
if (!enable_functional_tests_helper)
|
||||
{
|
||||
if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments)
|
||||
LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments");
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"It's not allowed to specify explicit zookeeper_path and replica_name "
|
||||
"for ReplicatedMergeTree arguments in Replicated database. If you really want to "
|
||||
"specify them explicitly, enable setting "
|
||||
"database_replicated_allow_replicated_engine_arguments.");
|
||||
}
|
||||
|
||||
if (maybe_shard_macros && maybe_replica_macros)
|
||||
return;
|
||||
|
||||
if (enable_functional_tests_helper && !has_expression_argument)
|
||||
{
|
||||
if (maybe_path.empty() || maybe_path.back() != '/')
|
||||
maybe_path += '/';
|
||||
args_ref[0]->as<ASTLiteral>()->value = maybe_path + "auto_{shard}";
|
||||
args_ref[1]->as<ASTLiteral>()->value = maybe_replica + "auto_{replica}";
|
||||
return;
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY,
|
||||
"Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. "
|
||||
"If you really want to specify it explicitly, then you should use some macros "
|
||||
"to distinguish different shards and replicas");
|
||||
}
|
||||
|
||||
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags)
|
||||
{
|
||||
waitDatabaseStarted();
|
||||
@ -1312,11 +1324,9 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
|
||||
if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query);
|
||||
|
||||
bool is_materialized_view_with_inner_table = create.is_materialized_view && create.to_table_id.empty();
|
||||
|
||||
create.setDatabase(getDatabaseName());
|
||||
create.setTable(unescapeForFileName(node_name));
|
||||
create.attach = is_materialized_view_with_inner_table;
|
||||
create.attach = create.is_materialized_view_with_inner_table();
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
@ -107,6 +107,7 @@ private:
|
||||
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
|
||||
void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const;
|
||||
|
||||
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr);
|
||||
|
||||
|
@ -739,7 +739,8 @@ public:
|
||||
{
|
||||
NumberType value;
|
||||
|
||||
tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error);
|
||||
if (!tryGetNumericValueFromJSONElement<JSONParser, NumberType>(value, element, convert_bool_to_integer, error))
|
||||
return false;
|
||||
auto & col_vec = assert_cast<ColumnVector<NumberType> &>(dest);
|
||||
col_vec.insertValue(value);
|
||||
return true;
|
||||
|
@ -5,11 +5,12 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct AcoshName
|
||||
{
|
||||
static constexpr auto name = "acosh";
|
||||
};
|
||||
using FunctionAcosh = FunctionMathUnary<UnaryFunctionVectorized<AcoshName, acosh>>;
|
||||
|
||||
struct AcoshName
|
||||
{
|
||||
static constexpr auto name = "acosh";
|
||||
};
|
||||
using FunctionAcosh = FunctionMathUnary<UnaryFunctionVectorized<AcoshName, acosh>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@ namespace DB
|
||||
{
|
||||
|
||||
using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval<AddMicrosecondsImpl>;
|
||||
|
||||
REGISTER_FUNCTION(AddMicroseconds)
|
||||
{
|
||||
factory.registerFunction<FunctionAddMicroseconds>();
|
||||
|
@ -6,6 +6,7 @@ namespace DB
|
||||
{
|
||||
|
||||
using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval<AddMillisecondsImpl>;
|
||||
|
||||
REGISTER_FUNCTION(AddMilliseconds)
|
||||
{
|
||||
factory.registerFunction<FunctionAddMilliseconds>();
|
||||
|
@ -6,6 +6,7 @@ namespace DB
|
||||
{
|
||||
|
||||
using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval<AddNanosecondsImpl>;
|
||||
|
||||
REGISTER_FUNCTION(AddNanoseconds)
|
||||
{
|
||||
factory.registerFunction<FunctionAddNanoseconds>();
|
||||
|
@ -7,7 +7,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
@ -57,7 +57,7 @@ private:
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const auto & column = arguments[0].column;
|
||||
const auto & column_char = arguments[1].column;
|
||||
@ -80,14 +80,13 @@ private:
|
||||
auto & dst_data = col_res->getChars();
|
||||
auto & dst_offsets = col_res->getOffsets();
|
||||
|
||||
const auto size = src_offsets.size();
|
||||
dst_data.resize(src_data.size() + size);
|
||||
dst_offsets.resize(size);
|
||||
dst_data.resize(src_data.size() + input_rows_count);
|
||||
dst_offsets.resize(input_rows_count);
|
||||
|
||||
ColumnString::Offset src_offset{};
|
||||
ColumnString::Offset dst_offset{};
|
||||
|
||||
for (const auto i : collections::range(0, size))
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const auto src_length = src_offsets[i] - src_offset;
|
||||
memcpySmallAllowReadWriteOverflow15(&dst_data[dst_offset], &src_data[src_offset], src_length);
|
||||
|
@ -45,9 +45,7 @@ struct AsciiImpl
|
||||
size_t size = data.size() / n;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = doAscii(data, i * n, n);
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
|
||||
|
@ -5,11 +5,12 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct AsinhName
|
||||
{
|
||||
static constexpr auto name = "asinh";
|
||||
};
|
||||
using FunctionAsinh = FunctionMathUnary<UnaryFunctionVectorized<AsinhName, asinh>>;
|
||||
|
||||
struct AsinhName
|
||||
{
|
||||
static constexpr auto name = "asinh";
|
||||
};
|
||||
using FunctionAsinh = FunctionMathUnary<UnaryFunctionVectorized<AsinhName, asinh>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -5,11 +5,12 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct Atan2Name
|
||||
{
|
||||
static constexpr auto name = "atan2";
|
||||
};
|
||||
using FunctionAtan2 = FunctionMathBinaryFloat64<BinaryFunctionVectorized<Atan2Name, atan2>>;
|
||||
|
||||
struct Atan2Name
|
||||
{
|
||||
static constexpr auto name = "atan2";
|
||||
};
|
||||
using FunctionAtan2 = FunctionMathBinaryFloat64<BinaryFunctionVectorized<Atan2Name, atan2>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -5,11 +5,12 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct AtanhName
|
||||
{
|
||||
static constexpr auto name = "atanh";
|
||||
};
|
||||
using FunctionAtanh = FunctionMathUnary<UnaryFunctionVectorized<AtanhName, atanh>>;
|
||||
|
||||
struct AtanhName
|
||||
{
|
||||
static constexpr auto name = "atanh";
|
||||
};
|
||||
using FunctionAtanh = FunctionMathUnary<UnaryFunctionVectorized<AtanhName, atanh>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -3,8 +3,10 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(Base58Encode)
|
||||
{
|
||||
factory.registerFunction<FunctionBase58Conversion<Base58Encode>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(Base64Decode)
|
||||
{
|
||||
FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)";
|
||||
@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Decode)
|
||||
/// MySQL compatibility alias.
|
||||
factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(Base64Encode)
|
||||
{
|
||||
FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)";
|
||||
@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Encode)
|
||||
/// MySQL compatibility alias.
|
||||
factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::Case::Insensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(Base64URLDecode)
|
||||
{
|
||||
FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
|
||||
@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLDecode)
|
||||
|
||||
factory.registerFunction<FunctionBase64Conversion<Base64Decode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(Base64URLEncode)
|
||||
{
|
||||
FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)";
|
||||
@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLEncode)
|
||||
|
||||
factory.registerFunction<FunctionBase64Conversion<Base64Encode<Base64Variant::URL>>>({description, syntax, arguments, returned_value, examples, categories});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -67,11 +67,11 @@ public:
|
||||
const IColumn * column = arguments[arg_num].column.get();
|
||||
|
||||
if (arg_num == 0)
|
||||
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
|
||||
vec_res[row_num] = column->byteSizeAt(row_num);
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
vec_res[row] = column->byteSizeAt(row);
|
||||
else
|
||||
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
|
||||
vec_res[row_num] += column->byteSizeAt(row_num);
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
vec_res[row] += column->byteSizeAt(row);
|
||||
}
|
||||
|
||||
return result_col;
|
||||
|
@ -10,6 +10,7 @@ extern const int NOT_IMPLEMENTED;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
requires std::is_integral_v<T>
|
||||
T byteSwap(T x)
|
||||
|
@ -98,8 +98,7 @@ public:
|
||||
|
||||
/// Execute transform.
|
||||
ColumnsWithTypeAndName transform_args{args.front(), src_array_col, dst_array_col, args.back()};
|
||||
return FunctionFactory::instance().get("transform", context)->build(transform_args)
|
||||
->execute(transform_args, result_type, input_rows_count);
|
||||
return FunctionFactory::instance().get("transform", context)->build(transform_args)->execute(transform_args, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -88,7 +88,8 @@ private:
|
||||
|
||||
static void convert(const String & from_charset, const String & to_charset,
|
||||
const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets,
|
||||
ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets)
|
||||
ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets,
|
||||
size_t input_rows_count)
|
||||
{
|
||||
auto converter_from = getConverter(from_charset);
|
||||
auto converter_to = getConverter(to_charset);
|
||||
@ -96,12 +97,11 @@ private:
|
||||
ColumnString::Offset current_from_offset = 0;
|
||||
ColumnString::Offset current_to_offset = 0;
|
||||
|
||||
size_t size = from_offsets.size();
|
||||
to_offsets.resize(size);
|
||||
to_offsets.resize(input_rows_count);
|
||||
|
||||
PODArray<UChar> uchars;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
size_t from_string_size = from_offsets[i] - current_from_offset - 1;
|
||||
|
||||
@ -184,7 +184,7 @@ public:
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & arg_from = arguments[0];
|
||||
const ColumnWithTypeAndName & arg_charset_from = arguments[1];
|
||||
@ -204,7 +204,7 @@ public:
|
||||
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(arg_from.column.get()))
|
||||
{
|
||||
auto col_to = ColumnString::create();
|
||||
convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets());
|
||||
convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets(), input_rows_count);
|
||||
return col_to;
|
||||
}
|
||||
else
|
||||
|
@ -5,11 +5,12 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct CoshName
|
||||
{
|
||||
static constexpr auto name = "cosh";
|
||||
};
|
||||
using FunctionCosh = FunctionMathUnary<UnaryFunctionVectorized<CoshName, cosh>>;
|
||||
|
||||
struct CoshName
|
||||
{
|
||||
static constexpr auto name = "cosh";
|
||||
};
|
||||
using FunctionCosh = FunctionMathUnary<UnaryFunctionVectorized<CoshName, cosh>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,7 @@ struct NameCountSubstringsCaseInsensitiveUTF8
|
||||
static constexpr auto name = "countSubstringsCaseInsensitiveUTF8";
|
||||
};
|
||||
|
||||
using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch<
|
||||
CountSubstringsImpl<NameCountSubstringsCaseInsensitiveUTF8, PositionCaseInsensitiveUTF8>>;
|
||||
using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch<CountSubstringsImpl<NameCountSubstringsCaseInsensitiveUTF8, PositionCaseInsensitiveUTF8>>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -109,14 +109,14 @@ public:
|
||||
ColumnPtr executeImpl(
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
const DataTypePtr & result_type,
|
||||
[[maybe_unused]] size_t input_rows_count) const override
|
||||
size_t input_rows_count) const override
|
||||
{
|
||||
ColumnPtr res;
|
||||
|
||||
if (!((res = executeType<DataTypeDate>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDate32>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDateTime>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||
if (!((res = executeType<DataTypeDate>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDate32>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDateTime>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type, input_rows_count))))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {}, must be Date or DateTime.",
|
||||
@ -127,7 +127,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
|
||||
{
|
||||
auto * times = checkAndGetColumn<typename DataType::ColumnType>(arguments[1].column.get());
|
||||
if (!times)
|
||||
@ -144,7 +144,7 @@ public:
|
||||
String date_part = date_part_column->getValue<String>();
|
||||
|
||||
const DateLUTImpl * time_zone_tmp;
|
||||
if (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
|
||||
if constexpr (std::is_same_v<DataType, DataTypeDateTime64> || std::is_same_v<DataType, DataTypeDateTime>)
|
||||
time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1);
|
||||
else
|
||||
time_zone_tmp = &DateLUT::instance();
|
||||
@ -175,7 +175,7 @@ public:
|
||||
using TimeType = DateTypeToTimeType<DataType>;
|
||||
callOnDatePartWriter<TimeType>(date_part, [&](const auto & writer)
|
||||
{
|
||||
for (size_t i = 0; i < times_data.size(); ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
|
||||
{
|
||||
|
@ -7,18 +7,20 @@ namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct DegreesName
|
||||
{
|
||||
static constexpr auto name = "degrees";
|
||||
};
|
||||
|
||||
Float64 degrees(Float64 r)
|
||||
{
|
||||
Float64 degrees = r * (180 / M_PI);
|
||||
return degrees;
|
||||
}
|
||||
struct DegreesName
|
||||
{
|
||||
static constexpr auto name = "degrees";
|
||||
};
|
||||
|
||||
Float64 degrees(Float64 r)
|
||||
{
|
||||
Float64 degrees = r * (180 / M_PI);
|
||||
return degrees;
|
||||
}
|
||||
|
||||
using FunctionDegrees = FunctionMathUnary<UnaryFunctionVectorized<DegreesName, degrees>>;
|
||||
|
||||
using FunctionDegrees = FunctionMathUnary<UnaryFunctionVectorized<DegreesName, degrees>>;
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(Degrees)
|
||||
|
@ -91,7 +91,7 @@ public:
|
||||
|
||||
auto col_res = ColumnVector<UInt64>::create(col_str->size());
|
||||
auto & data = col_res->getData();
|
||||
for (size_t i = 0; i < col_str->size(); ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
auto disk_name = col_str->getDataAt(i).toString();
|
||||
if (auto it = disk_map.find(disk_name); it != disk_map.end())
|
||||
|
@ -848,7 +848,7 @@ public:
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnPtr res;
|
||||
if constexpr (support_integer == SupportInteger::Yes)
|
||||
@ -862,17 +862,17 @@ public:
|
||||
if (!castType(arguments[0].type.get(), [&](const auto & type)
|
||||
{
|
||||
using FromDataType = std::decay_t<decltype(type)>;
|
||||
if (!(res = executeType<FromDataType>(arguments, result_type)))
|
||||
if (!(res = executeType<FromDataType>(arguments, result_type, input_rows_count)))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.",
|
||||
arguments[0].column->getName(), getName());
|
||||
return true;
|
||||
}))
|
||||
{
|
||||
if (!((res = executeType<DataTypeDate>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDate32>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDateTime>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||
if (!((res = executeType<DataTypeDate>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDate32>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDateTime>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type, input_rows_count))))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {}, must be Integer or DateTime.",
|
||||
arguments[0].column->getName(), getName());
|
||||
@ -881,10 +881,10 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!((res = executeType<DataTypeDate>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDate32>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDateTime>(arguments, result_type))
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||
if (!((res = executeType<DataTypeDate>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDate32>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDateTime>(arguments, result_type, input_rows_count))
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type, input_rows_count))))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {}, must be Date or DateTime.",
|
||||
arguments[0].column->getName(), getName());
|
||||
@ -894,7 +894,7 @@ public:
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
|
||||
{
|
||||
auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst();
|
||||
auto * times = checkAndGetColumn<typename DataType::ColumnType>(non_const_datetime.get());
|
||||
@ -955,13 +955,11 @@ public:
|
||||
else
|
||||
time_zone = &DateLUT::instance();
|
||||
|
||||
const auto & vec = times->getData();
|
||||
|
||||
auto col_res = ColumnString::create();
|
||||
auto & res_data = col_res->getChars();
|
||||
auto & res_offsets = col_res->getOffsets();
|
||||
res_data.resize(vec.size() * (out_template_size + 1));
|
||||
res_offsets.resize(vec.size());
|
||||
res_data.resize(input_rows_count * (out_template_size + 1));
|
||||
res_offsets.resize(input_rows_count);
|
||||
|
||||
if constexpr (format_syntax == FormatSyntax::MySQL)
|
||||
{
|
||||
@ -990,9 +988,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const auto & vec = times->getData();
|
||||
|
||||
auto * begin = reinterpret_cast<char *>(res_data.data());
|
||||
auto * pos = begin;
|
||||
for (size_t i = 0; i < vec.size(); ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if (!const_time_zone_column && arguments.size() > 2)
|
||||
{
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
if (const ColumnString * col_query_string = checkAndGetColumn<ColumnString>(col_query.get()))
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map);
|
||||
formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map, input_rows_count);
|
||||
|
||||
if (error_handling == ErrorHandling::Null)
|
||||
return ColumnNullable::create(std::move(col_res), std::move(col_null_map));
|
||||
@ -92,16 +92,16 @@ private:
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets,
|
||||
ColumnUInt8::MutablePtr & res_null_map) const
|
||||
ColumnUInt8::MutablePtr & res_null_map,
|
||||
size_t input_rows_count) const
|
||||
{
|
||||
const size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
res_offsets.resize(input_rows_count);
|
||||
res_data.resize(data.size());
|
||||
|
||||
size_t prev_offset = 0;
|
||||
size_t res_data_size = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const char * begin = reinterpret_cast<const char *>(&data[prev_offset]);
|
||||
const char * end = begin + offsets[i] - prev_offset - 1;
|
||||
|
@ -55,19 +55,19 @@ public:
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
ColumnPtr res;
|
||||
if (!((res = executeType<UInt8>(arguments))
|
||||
|| (res = executeType<UInt16>(arguments))
|
||||
|| (res = executeType<UInt32>(arguments))
|
||||
|| (res = executeType<UInt64>(arguments))
|
||||
|| (res = executeType<Int8>(arguments))
|
||||
|| (res = executeType<Int16>(arguments))
|
||||
|| (res = executeType<Int32>(arguments))
|
||||
|| (res = executeType<Int64>(arguments))
|
||||
|| (res = executeType<Float32>(arguments))
|
||||
|| (res = executeType<Float64>(arguments))))
|
||||
if (!((res = executeType<UInt8>(arguments, input_rows_count))
|
||||
|| (res = executeType<UInt16>(arguments, input_rows_count))
|
||||
|| (res = executeType<UInt32>(arguments, input_rows_count))
|
||||
|| (res = executeType<UInt64>(arguments, input_rows_count))
|
||||
|| (res = executeType<Int8>(arguments, input_rows_count))
|
||||
|| (res = executeType<Int16>(arguments, input_rows_count))
|
||||
|| (res = executeType<Int32>(arguments, input_rows_count))
|
||||
|| (res = executeType<Int64>(arguments, input_rows_count))
|
||||
|| (res = executeType<Float32>(arguments, input_rows_count))
|
||||
|| (res = executeType<Float64>(arguments, input_rows_count))))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
arguments[0].column->getName(), getName());
|
||||
|
||||
@ -76,7 +76,7 @@ public:
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
|
||||
{
|
||||
if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get()))
|
||||
{
|
||||
@ -85,13 +85,12 @@ private:
|
||||
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
|
||||
ColumnString::Chars & data_to = col_to->getChars();
|
||||
ColumnString::Offsets & offsets_to = col_to->getOffsets();
|
||||
size_t size = vec_from.size();
|
||||
data_to.resize(size * 2);
|
||||
offsets_to.resize(size);
|
||||
data_to.resize(input_rows_count * 2);
|
||||
offsets_to.resize(input_rows_count);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
Impl::format(static_cast<double>(vec_from[i]), buf_to);
|
||||
writeChar(0, buf_to);
|
||||
|
@ -51,21 +51,19 @@ public:
|
||||
}
|
||||
|
||||
template <typename ColumnTypeEncoded>
|
||||
bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) const
|
||||
bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column, size_t input_rows_count) const
|
||||
{
|
||||
const auto * encoded = checkAndGetColumn<ColumnTypeEncoded>(encoded_column);
|
||||
if (!encoded)
|
||||
return false;
|
||||
|
||||
const size_t count = encoded->size();
|
||||
|
||||
auto latitude = ColumnFloat64::create(count);
|
||||
auto longitude = ColumnFloat64::create(count);
|
||||
auto latitude = ColumnFloat64::create(input_rows_count);
|
||||
auto longitude = ColumnFloat64::create(input_rows_count);
|
||||
|
||||
ColumnFloat64::Container & lon_data = longitude->getData();
|
||||
ColumnFloat64::Container & lat_data = latitude->getData();
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
std::string_view encoded_string = encoded->getDataAt(i).toView();
|
||||
geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]);
|
||||
@ -79,13 +77,13 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const IColumn * encoded = arguments[0].column.get();
|
||||
ColumnPtr res_column;
|
||||
|
||||
if (tryExecute<ColumnString>(encoded, res_column) ||
|
||||
tryExecute<ColumnFixedString>(encoded, res_column))
|
||||
if (tryExecute<ColumnString>(encoded, res_column, input_rows_count) ||
|
||||
tryExecute<ColumnFixedString>(encoded, res_column, input_rows_count))
|
||||
return res_column;
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}",
|
||||
|
@ -53,7 +53,7 @@ public:
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const IColumn * longitude = arguments[0].column.get();
|
||||
const IColumn * latitude = arguments[1].column.get();
|
||||
@ -65,26 +65,24 @@ public:
|
||||
precision = arguments[2].column;
|
||||
|
||||
ColumnPtr res_column;
|
||||
vector(longitude, latitude, precision.get(), res_column);
|
||||
vector(longitude, latitude, precision.get(), res_column, input_rows_count);
|
||||
return res_column;
|
||||
}
|
||||
|
||||
private:
|
||||
void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const
|
||||
void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result, size_t input_rows_count) const
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const size_t size = lat_column->size();
|
||||
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1));
|
||||
out_offsets.resize(input_rows_count);
|
||||
out_vec.resize(input_rows_count * (GEOHASH_MAX_TEXT_LENGTH + 1));
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const Float64 longitude_value = lon_column->getFloat64(i);
|
||||
const Float64 latitude_value = lat_column->getFloat64(i);
|
||||
|
@ -138,8 +138,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
std::call_once(once, [&] { initialize(arguments, result_type); });
|
||||
|
||||
|
@ -17,13 +17,19 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const DB::ASTPtr& query_ptr_,
|
||||
const DB::ContextMutablePtr& context_, const DB::SelectQueryOptions& options_)
|
||||
: query_ptr(query_ptr_)
|
||||
, context(context_)
|
||||
, options(options_)
|
||||
, max_streams(context->getSettingsRef().max_threads)
|
||||
IInterpreterUnionOrSelectQuery::IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_,
|
||||
const ContextMutablePtr & context_, const SelectQueryOptions & options_)
|
||||
: query_ptr(query_ptr_)
|
||||
, context(context_)
|
||||
, options(options_)
|
||||
, max_streams(context->getSettingsRef().max_threads)
|
||||
{
|
||||
/// FIXME All code here will work with the old analyzer, however for views over Distributed tables
|
||||
/// it's possible that new analyzer will be enabled in ::getQueryProcessingStage method
|
||||
/// of the underlying storage when all other parts of infrastructure are not ready for it
|
||||
/// (built with old analyzer).
|
||||
context->setSetting("allow_experimental_analyzer", false);
|
||||
|
||||
if (options.shard_num)
|
||||
context->addSpecialScalar(
|
||||
"_shard_num",
|
||||
|
@ -949,7 +949,7 @@ namespace
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines");
|
||||
}
|
||||
|
||||
void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine)
|
||||
void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine)
|
||||
{
|
||||
if (engine == DefaultTableEngine::None)
|
||||
throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query");
|
||||
@ -969,9 +969,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view)
|
||||
return;
|
||||
|
||||
if (create.is_materialized_view && create.to_table_id)
|
||||
return;
|
||||
|
||||
if (create.temporary)
|
||||
{
|
||||
/// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting.
|
||||
@ -986,22 +983,44 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
}
|
||||
|
||||
if (!create.storage->engine)
|
||||
{
|
||||
setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value);
|
||||
}
|
||||
|
||||
checkTemporaryTableEngineName(create.storage->engine->name);
|
||||
return;
|
||||
}
|
||||
|
||||
if (create.is_materialized_view)
|
||||
{
|
||||
/// A materialized view with an external target doesn't need a table engine.
|
||||
if (create.is_materialized_view_with_external_target())
|
||||
return;
|
||||
|
||||
if (auto to_engine = create.getTargetInnerEngine(ViewTarget::To))
|
||||
{
|
||||
/// This materialized view already has a storage definition.
|
||||
if (!to_engine->engine)
|
||||
{
|
||||
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
|
||||
setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (create.storage)
|
||||
{
|
||||
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
|
||||
/// This table already has a storage definition.
|
||||
if (!create.storage->engine)
|
||||
{
|
||||
/// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one.
|
||||
setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// We'll try to extract a storage definition from clause `AS`:
|
||||
/// CREATE TABLE table_name AS other_table_name
|
||||
std::shared_ptr<ASTStorage> storage_def;
|
||||
if (!create.as_table.empty())
|
||||
{
|
||||
/// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table.
|
||||
@ -1017,12 +1036,14 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
if (as_create.is_ordinary_view)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name);
|
||||
|
||||
if (as_create.is_materialized_view && as_create.to_table_id)
|
||||
if (as_create.is_materialized_view_with_external_target())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_QUERY,
|
||||
"Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead",
|
||||
"Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS {}\" instead",
|
||||
qualified_name,
|
||||
as_create.to_table_id.getQualifiedName());
|
||||
as_create.getTargetTableID(ViewTarget::To).getFullTableName());
|
||||
}
|
||||
|
||||
if (as_create.is_live_view)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name);
|
||||
@ -1033,18 +1054,37 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
if (as_create.is_dictionary)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name);
|
||||
|
||||
if (as_create.storage)
|
||||
create.set(create.storage, as_create.storage->ptr());
|
||||
if (as_create.is_materialized_view)
|
||||
{
|
||||
storage_def = as_create.getTargetInnerEngine(ViewTarget::To);
|
||||
}
|
||||
else if (as_create.as_table_function)
|
||||
{
|
||||
create.set(create.as_table_function, as_create.as_table_function->ptr());
|
||||
return;
|
||||
}
|
||||
else if (as_create.storage)
|
||||
{
|
||||
storage_def = typeid_cast<std::shared_ptr<ASTStorage>>(as_create.storage->ptr());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
create.set(create.storage, std::make_shared<ASTStorage>());
|
||||
setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value);
|
||||
if (!storage_def)
|
||||
{
|
||||
/// Set ENGINE by default.
|
||||
storage_def = std::make_shared<ASTStorage>();
|
||||
setDefaultTableEngine(*storage_def, getContext()->getSettingsRef().default_table_engine.value);
|
||||
}
|
||||
|
||||
/// Use the found table engine to modify the create query.
|
||||
if (create.is_materialized_view)
|
||||
create.setTargetInnerEngine(ViewTarget::To, storage_def);
|
||||
else
|
||||
create.set(create.storage, storage_def);
|
||||
}
|
||||
|
||||
void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const
|
||||
@ -1086,11 +1126,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
|
||||
kind_upper, create.table);
|
||||
}
|
||||
|
||||
create.generateRandomUUID();
|
||||
create.generateRandomUUIDs();
|
||||
}
|
||||
else
|
||||
{
|
||||
bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil;
|
||||
bool has_uuid = (create.uuid != UUIDHelpers::Nil) || create.hasInnerUUIDs();
|
||||
if (has_uuid && !is_on_cluster && !internal)
|
||||
{
|
||||
/// We don't show the following error message either
|
||||
@ -1105,8 +1145,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
|
||||
/// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either
|
||||
/// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or
|
||||
/// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts.
|
||||
create.uuid = UUIDHelpers::Nil;
|
||||
create.to_inner_uuid = UUIDHelpers::Nil;
|
||||
create.resetUUIDs();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1130,6 +1169,14 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create,
|
||||
DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies);
|
||||
}
|
||||
|
||||
bool isReplicated(const ASTStorage & storage)
|
||||
{
|
||||
if (!storage.engine)
|
||||
return false;
|
||||
const auto & storage_name = storage.engine->name;
|
||||
return storage_name.starts_with("Replicated") || storage_name.starts_with("Shared");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
@ -1246,8 +1293,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
|
||||
if (!create.temporary && !create.database)
|
||||
create.setDatabase(current_database);
|
||||
if (create.to_table_id && create.to_table_id.database_name.empty())
|
||||
create.to_table_id.database_name = current_database;
|
||||
|
||||
if (create.targets)
|
||||
create.targets->setCurrentDatabase(current_database);
|
||||
|
||||
if (create.select && create.isView())
|
||||
{
|
||||
@ -1281,12 +1329,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode);
|
||||
|
||||
/// Check type compatible for materialized dest table and select columns
|
||||
if (create.select && create.is_materialized_view && create.to_table_id && mode <= LoadingStrictnessLevel::CREATE)
|
||||
if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE)
|
||||
{
|
||||
if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(
|
||||
{create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid},
|
||||
getContext()
|
||||
))
|
||||
if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext()))
|
||||
{
|
||||
Block input_block;
|
||||
|
||||
@ -1332,11 +1377,17 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
|
||||
{
|
||||
bool is_storage_replicated = false;
|
||||
if (create.storage && create.storage->engine)
|
||||
|
||||
if (create.storage && isReplicated(*create.storage))
|
||||
is_storage_replicated = true;
|
||||
|
||||
if (create.targets)
|
||||
{
|
||||
const auto & storage_name = create.storage->engine->name;
|
||||
if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared"))
|
||||
is_storage_replicated = true;
|
||||
for (const auto & inner_table_engine : create.targets->getInnerEngines())
|
||||
{
|
||||
if (isReplicated(*inner_table_engine))
|
||||
is_storage_replicated = true;
|
||||
}
|
||||
}
|
||||
|
||||
const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
|
||||
@ -1795,7 +1846,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont
|
||||
|
||||
/// For CREATE query generate UUID on initiator, so it will be the same on all hosts.
|
||||
/// It will be ignored if database does not support UUIDs.
|
||||
create.generateRandomUUID();
|
||||
create.generateRandomUUIDs();
|
||||
|
||||
/// For cross-replication cluster we cannot use UUID in replica path.
|
||||
String cluster_name_expanded = local_context->getMacros()->expand(cluster_name);
|
||||
@ -1917,8 +1968,15 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const
|
||||
}
|
||||
}
|
||||
|
||||
if (create.to_table_id)
|
||||
required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name);
|
||||
if (create.targets)
|
||||
{
|
||||
for (const auto & target : create.targets->targets)
|
||||
{
|
||||
const auto & target_id = target.table_id;
|
||||
if (target_id)
|
||||
required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, target_id.database_name, target_id.table_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (create.storage && create.storage->engine)
|
||||
required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name);
|
||||
|
@ -75,7 +75,6 @@
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
||||
#include <Storages/StorageDistributed.h>
|
||||
#include <Storages/StorageDummy.h>
|
||||
#include <Storages/StorageMerge.h>
|
||||
#include <Storages/StorageValues.h>
|
||||
#include <Storages/StorageView.h>
|
||||
@ -214,11 +213,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
const ContextPtr & context_,
|
||||
Pipe input_pipe_,
|
||||
const SelectQueryOptions & options_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery())
|
||||
const ASTPtr & query_ptr_,
|
||||
const ContextPtr & context_,
|
||||
Pipe input_pipe_,
|
||||
const SelectQueryOptions & options_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery())
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
@ -227,18 +226,15 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const StoragePtr & storage_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
const SelectQueryOptions & options_)
|
||||
: InterpreterSelectQuery(
|
||||
query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
|
||||
{
|
||||
}
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_)
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
const ContextPtr & context_,
|
||||
const SelectQueryOptions & options_,
|
||||
PreparedSetsPtr prepared_sets_)
|
||||
: InterpreterSelectQuery(
|
||||
query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, {}, {}, prepared_sets_)
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::~InterpreterSelectQuery() = default;
|
||||
|
@ -26,7 +26,6 @@ class Logger;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class SubqueryForSet;
|
||||
class InterpreterSelectWithUnionQuery;
|
||||
class Context;
|
||||
class QueryPlan;
|
||||
|
@ -94,7 +94,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl()
|
||||
{
|
||||
auto & create = create_query->as<ASTCreateQuery &>();
|
||||
create.uuid = UUIDHelpers::Nil;
|
||||
create.to_inner_uuid = UUIDHelpers::Nil;
|
||||
if (create.targets)
|
||||
create.targets->resetInnerUUIDs();
|
||||
}
|
||||
|
||||
MutableColumnPtr column = ColumnString::create();
|
||||
|
@ -545,7 +545,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get<String>(), type.getName());
|
||||
|
||||
e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
|
||||
throw;
|
||||
|
@ -147,7 +147,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
DecimalField(DateTime64(123 * Day * 1'000'000), 6)
|
||||
}
|
||||
})
|
||||
);
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
DateTimeToDateTime64,
|
||||
@ -179,3 +179,84 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
StringToNumber,
|
||||
ConvertFieldToTypeTest,
|
||||
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
|
||||
{
|
||||
"String",
|
||||
Field("1"),
|
||||
"Int8",
|
||||
Field(1)
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("256"),
|
||||
"Int8",
|
||||
Field()
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("not a number"),
|
||||
"Int8",
|
||||
{}
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("1.1"),
|
||||
"Int8",
|
||||
{} /// we can not convert '1.1' to Int8
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("1.1"),
|
||||
"Float64",
|
||||
Field(1.1)
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
NumberToString,
|
||||
ConvertFieldToTypeTest,
|
||||
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
|
||||
{
|
||||
"Int8",
|
||||
Field(1),
|
||||
"String",
|
||||
Field("1")
|
||||
},
|
||||
{
|
||||
"Int8",
|
||||
Field(-1),
|
||||
"String",
|
||||
Field("-1")
|
||||
},
|
||||
{
|
||||
"Float64",
|
||||
Field(1.1),
|
||||
"String",
|
||||
Field("1.1")
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
StringToDate,
|
||||
ConvertFieldToTypeTest,
|
||||
::testing::ValuesIn(std::initializer_list<ConvertFieldToTypeTestParams>{
|
||||
{
|
||||
"String",
|
||||
Field("2024-07-12"),
|
||||
"Date",
|
||||
Field(static_cast<UInt16>(19916))
|
||||
},
|
||||
{
|
||||
"String",
|
||||
Field("not a date"),
|
||||
"Date",
|
||||
{}
|
||||
},
|
||||
})
|
||||
);
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/CreateQueryUUIDs.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Interpreters/StorageID.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -240,12 +242,12 @@ ASTPtr ASTCreateQuery::clone() const
|
||||
res->set(res->columns_list, columns_list->clone());
|
||||
if (storage)
|
||||
res->set(res->storage, storage->clone());
|
||||
if (inner_storage)
|
||||
res->set(res->inner_storage, inner_storage->clone());
|
||||
if (select)
|
||||
res->set(res->select, select->clone());
|
||||
if (table_overrides)
|
||||
res->set(res->table_overrides, table_overrides->clone());
|
||||
if (targets)
|
||||
res->set(res->targets, targets->clone());
|
||||
|
||||
if (dictionary)
|
||||
{
|
||||
@ -398,20 +400,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
|
||||
refresh_strategy->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
||||
if (to_table_id)
|
||||
if (auto to_table_id = getTargetTableID(ViewTarget::To))
|
||||
{
|
||||
assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil);
|
||||
settings.ostr
|
||||
<< (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "")
|
||||
<< (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "")
|
||||
<< backQuoteIfNeed(to_table_id.table_name);
|
||||
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO)
|
||||
<< (settings.hilite ? hilite_none : "") << " "
|
||||
<< (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "")
|
||||
<< backQuoteIfNeed(to_table_id.table_name);
|
||||
}
|
||||
|
||||
if (to_inner_uuid != UUIDHelpers::Nil)
|
||||
if (auto to_inner_uuid = getTargetInnerUUID(ViewTarget::To); to_inner_uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
assert(is_materialized_view && !to_table_id);
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO INNER UUID " << (settings.hilite ? hilite_none : "")
|
||||
<< quoteString(toString(to_inner_uuid));
|
||||
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO_INNER_UUID)
|
||||
<< (settings.hilite ? hilite_none : "") << " " << quoteString(toString(to_inner_uuid));
|
||||
}
|
||||
|
||||
bool should_add_empty = is_create_empty;
|
||||
@ -471,14 +471,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
|
||||
|
||||
frame.expression_list_always_start_on_new_line = false;
|
||||
|
||||
if (inner_storage)
|
||||
if (storage)
|
||||
storage->formatImpl(settings, state, frame);
|
||||
|
||||
if (auto inner_storage = getTargetInnerEngine(ViewTarget::Inner))
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::INNER) << (settings.hilite ? hilite_none : "");
|
||||
inner_storage->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
||||
if (storage)
|
||||
storage->formatImpl(settings, state, frame);
|
||||
if (auto to_storage = getTargetInnerEngine(ViewTarget::To))
|
||||
to_storage->formatImpl(settings, state, frame);
|
||||
|
||||
if (dictionary)
|
||||
dictionary->formatImpl(settings, state, frame);
|
||||
@ -538,48 +541,57 @@ bool ASTCreateQuery::isParameterizedView() const
|
||||
}
|
||||
|
||||
|
||||
ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query)
|
||||
: uuid(query.uuid)
|
||||
, to_inner_uuid(query.to_inner_uuid)
|
||||
void ASTCreateQuery::generateRandomUUIDs()
|
||||
{
|
||||
CreateQueryUUIDs{*this, /* generate_random= */ true}.copyToQuery(*this);
|
||||
}
|
||||
|
||||
String ASTCreateQuery::UUIDs::toString() const
|
||||
void ASTCreateQuery::resetUUIDs()
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
out << "{" << uuid << "," << to_inner_uuid << "}";
|
||||
return out.str();
|
||||
CreateQueryUUIDs{}.copyToQuery(*this);
|
||||
}
|
||||
|
||||
ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str)
|
||||
|
||||
StorageID ASTCreateQuery::getTargetTableID(ViewTarget::Kind target_kind) const
|
||||
{
|
||||
ReadBufferFromString in{str};
|
||||
ASTCreateQuery::UUIDs res;
|
||||
in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}";
|
||||
return res;
|
||||
if (targets)
|
||||
return targets->getTableID(target_kind);
|
||||
return StorageID::createEmpty();
|
||||
}
|
||||
|
||||
ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid)
|
||||
bool ASTCreateQuery::hasTargetTableID(ViewTarget::Kind target_kind) const
|
||||
{
|
||||
if (always_generate_new_uuid)
|
||||
setUUID({});
|
||||
|
||||
if (uuid == UUIDHelpers::Nil)
|
||||
uuid = UUIDHelpers::generateV4();
|
||||
|
||||
/// If destination table (to_table_id) is not specified for materialized view,
|
||||
/// then MV will create inner table. We should generate UUID of inner table here.
|
||||
bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id;
|
||||
if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil))
|
||||
to_inner_uuid = UUIDHelpers::generateV4();
|
||||
|
||||
return UUIDs{*this};
|
||||
if (targets)
|
||||
return targets->hasTableID(target_kind);
|
||||
return false;
|
||||
}
|
||||
|
||||
void ASTCreateQuery::setUUID(const UUIDs & uuids)
|
||||
UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const
|
||||
{
|
||||
uuid = uuids.uuid;
|
||||
to_inner_uuid = uuids.to_inner_uuid;
|
||||
if (targets)
|
||||
return targets->getInnerUUID(target_kind);
|
||||
return UUIDHelpers::Nil;
|
||||
}
|
||||
|
||||
bool ASTCreateQuery::hasInnerUUIDs() const
|
||||
{
|
||||
if (targets)
|
||||
return targets->hasInnerUUIDs();
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<ASTStorage> ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const
|
||||
{
|
||||
if (targets)
|
||||
return targets->getInnerEngine(target_kind);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ASTCreateQuery::setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def)
|
||||
{
|
||||
if (!targets)
|
||||
set(targets, std::make_shared<ASTViewTargets>());
|
||||
targets->setInnerEngine(target_kind, storage_def);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Parsers/ASTDictionary.h>
|
||||
#include <Parsers/ASTDictionaryAttributeDeclaration.h>
|
||||
#include <Parsers/ASTTableOverrides.h>
|
||||
#include <Parsers/ASTViewTargets.h>
|
||||
#include <Parsers/ASTSQLSecurity.h>
|
||||
#include <Parsers/ASTRefreshStrategy.h>
|
||||
#include <Interpreters/StorageID.h>
|
||||
@ -15,6 +16,7 @@ namespace DB
|
||||
class ASTFunction;
|
||||
class ASTSetQuery;
|
||||
class ASTSelectWithUnionQuery;
|
||||
struct CreateQueryUUIDs;
|
||||
|
||||
|
||||
class ASTStorage : public IAST
|
||||
@ -101,17 +103,15 @@ public:
|
||||
bool has_uuid{false}; // CREATE TABLE x UUID '...'
|
||||
|
||||
ASTColumns * columns_list = nullptr;
|
||||
|
||||
StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table.
|
||||
UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table
|
||||
ASTStorage * inner_storage = nullptr; /// For window view with inner table
|
||||
ASTStorage * storage = nullptr;
|
||||
|
||||
ASTPtr watermark_function;
|
||||
ASTPtr lateness_function;
|
||||
String as_database;
|
||||
String as_table;
|
||||
IAST * as_table_function = nullptr;
|
||||
ASTSelectWithUnionQuery * select = nullptr;
|
||||
ASTViewTargets * targets = nullptr;
|
||||
IAST * comment = nullptr;
|
||||
ASTPtr sql_security = nullptr;
|
||||
|
||||
@ -153,17 +153,26 @@ public:
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Create; }
|
||||
|
||||
struct UUIDs
|
||||
{
|
||||
UUID uuid = UUIDHelpers::Nil;
|
||||
UUID to_inner_uuid = UUIDHelpers::Nil;
|
||||
UUIDs() = default;
|
||||
explicit UUIDs(const ASTCreateQuery & query);
|
||||
String toString() const;
|
||||
static UUIDs fromString(const String & str);
|
||||
};
|
||||
UUIDs generateRandomUUID(bool always_generate_new_uuid = false);
|
||||
void setUUID(const UUIDs & uuids);
|
||||
/// Generates a random UUID for this create query if it's not specified already.
|
||||
/// The function also generates random UUIDs for inner target tables if this create query implies that
|
||||
/// (for example, if it's a `CREATE MATERIALIZED VIEW` query with an inner storage).
|
||||
void generateRandomUUIDs();
|
||||
|
||||
/// Removes UUID from this create query.
|
||||
/// The function also removes UUIDs for inner target tables from this create query (see also generateRandomUUID()).
|
||||
void resetUUIDs();
|
||||
|
||||
/// Returns information about a target table.
|
||||
/// If that information isn't specified in this create query (or even not allowed) then the function returns an empty value.
|
||||
StorageID getTargetTableID(ViewTarget::Kind target_kind) const;
|
||||
bool hasTargetTableID(ViewTarget::Kind target_kind) const;
|
||||
UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const;
|
||||
bool hasInnerUUIDs() const;
|
||||
std::shared_ptr<ASTStorage> getTargetInnerEngine(ViewTarget::Kind target_kind) const;
|
||||
void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def);
|
||||
|
||||
bool is_materialized_view_with_external_target() const { return is_materialized_view && hasTargetTableID(ViewTarget::To); }
|
||||
bool is_materialized_view_with_inner_table() const { return is_materialized_view && !hasTargetTableID(ViewTarget::To); }
|
||||
|
||||
protected:
|
||||
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
@ -171,8 +180,8 @@ protected:
|
||||
void forEachPointerToChild(std::function<void(void**)> f) override
|
||||
{
|
||||
f(reinterpret_cast<void **>(&columns_list));
|
||||
f(reinterpret_cast<void **>(&inner_storage));
|
||||
f(reinterpret_cast<void **>(&storage));
|
||||
f(reinterpret_cast<void **>(&targets));
|
||||
f(reinterpret_cast<void **>(&as_table_function));
|
||||
f(reinterpret_cast<void **>(&select));
|
||||
f(reinterpret_cast<void **>(&comment));
|
||||
|
@ -329,19 +329,23 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
|
||||
|
||||
const auto * literal = arguments->children[0]->as<ASTLiteral>();
|
||||
const auto * function = arguments->children[0]->as<ASTFunction>();
|
||||
const auto * subquery = arguments->children[0]->as<ASTSubquery>();
|
||||
bool is_tuple = literal && literal->value.getType() == Field::Types::Tuple;
|
||||
// do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
|
||||
/// Do not add parentheses for tuple literal, otherwise extra parens will be added `-((3, 7, 3), 1)` -> `-(((3, 7, 3), 1))`
|
||||
bool literal_need_parens = literal && !is_tuple;
|
||||
|
||||
// negate always requires parentheses, otherwise -(-1) will be printed as --1
|
||||
bool inside_parens = name == "negate" && (literal_need_parens || (function && function->name == "negate"));
|
||||
/// Negate always requires parentheses, otherwise -(-1) will be printed as --1
|
||||
/// Also extra parentheses are needed for subqueries, because NOT can be parsed as a function:
|
||||
/// not(SELECT 1) cannot be parsed, while not((SELECT 1)) can.
|
||||
bool inside_parens = (name == "negate" && (literal_need_parens || (function && function->name == "negate")))
|
||||
|| (subquery && name == "not");
|
||||
|
||||
/// We DO need parentheses around a single literal
|
||||
/// For example, SELECT (NOT 0) + (NOT 0) cannot be transformed into SELECT NOT 0 + NOT 0, since
|
||||
/// this is equal to SELECT NOT (0 + NOT 0)
|
||||
bool outside_parens = frame.need_parens && !inside_parens;
|
||||
|
||||
// do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
|
||||
/// Do not add extra parentheses for functions inside negate, i.e. -(-toUInt64(-(1)))
|
||||
if (inside_parens)
|
||||
nested_need_parens.need_parens = false;
|
||||
|
||||
|
300
src/Parsers/ASTViewTargets.cpp
Normal file
300
src/Parsers/ASTViewTargets.cpp
Normal file
@ -0,0 +1,300 @@
|
||||
#include <Parsers/ASTViewTargets.h>
|
||||
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
std::string_view toString(ViewTarget::Kind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ViewTarget::To: return "to";
|
||||
case ViewTarget::Inner: return "inner";
|
||||
}
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind);
|
||||
}
|
||||
|
||||
void parseFromString(ViewTarget::Kind & out, std::string_view str)
|
||||
{
|
||||
for (auto kind : magic_enum::enum_values<ViewTarget::Kind>())
|
||||
{
|
||||
if (toString(kind) == str)
|
||||
{
|
||||
out = kind;
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: Unexpected string {}", __FUNCTION__, str);
|
||||
}
|
||||
|
||||
|
||||
std::vector<ViewTarget::Kind> ASTViewTargets::getKinds() const
|
||||
{
|
||||
std::vector<ViewTarget::Kind> kinds;
|
||||
kinds.reserve(targets.size());
|
||||
for (const auto & target : targets)
|
||||
kinds.push_back(target.kind);
|
||||
return kinds;
|
||||
}
|
||||
|
||||
|
||||
void ASTViewTargets::setTableID(ViewTarget::Kind kind, const StorageID & table_id_)
|
||||
{
|
||||
for (auto & target : targets)
|
||||
{
|
||||
if (target.kind == kind)
|
||||
{
|
||||
target.table_id = table_id_;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (table_id_)
|
||||
targets.emplace_back(kind).table_id = table_id_;
|
||||
}
|
||||
|
||||
StorageID ASTViewTargets::getTableID(ViewTarget::Kind kind) const
|
||||
{
|
||||
if (const auto * target = tryGetTarget(kind))
|
||||
return target->table_id;
|
||||
return StorageID::createEmpty();
|
||||
}
|
||||
|
||||
bool ASTViewTargets::hasTableID(ViewTarget::Kind kind) const
|
||||
{
|
||||
if (const auto * target = tryGetTarget(kind))
|
||||
return !target->table_id.empty();
|
||||
return false;
|
||||
}
|
||||
|
||||
void ASTViewTargets::setCurrentDatabase(const String & current_database)
|
||||
{
|
||||
for (auto & target : targets)
|
||||
{
|
||||
auto & table_id = target.table_id;
|
||||
if (!table_id.table_name.empty() && table_id.database_name.empty())
|
||||
table_id.database_name = current_database;
|
||||
}
|
||||
}
|
||||
|
||||
void ASTViewTargets::setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_)
|
||||
{
|
||||
for (auto & target : targets)
|
||||
{
|
||||
if (target.kind == kind)
|
||||
{
|
||||
target.inner_uuid = inner_uuid_;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (inner_uuid_ != UUIDHelpers::Nil)
|
||||
targets.emplace_back(kind).inner_uuid = inner_uuid_;
|
||||
}
|
||||
|
||||
UUID ASTViewTargets::getInnerUUID(ViewTarget::Kind kind) const
|
||||
{
|
||||
if (const auto * target = tryGetTarget(kind))
|
||||
return target->inner_uuid;
|
||||
return UUIDHelpers::Nil;
|
||||
}
|
||||
|
||||
bool ASTViewTargets::hasInnerUUID(ViewTarget::Kind kind) const
|
||||
{
|
||||
return getInnerUUID(kind) != UUIDHelpers::Nil;
|
||||
}
|
||||
|
||||
void ASTViewTargets::resetInnerUUIDs()
|
||||
{
|
||||
for (auto & target : targets)
|
||||
target.inner_uuid = UUIDHelpers::Nil;
|
||||
}
|
||||
|
||||
bool ASTViewTargets::hasInnerUUIDs() const
|
||||
{
|
||||
for (const auto & target : targets)
|
||||
{
|
||||
if (target.inner_uuid != UUIDHelpers::Nil)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ASTViewTargets::setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def)
|
||||
{
|
||||
auto new_inner_engine = typeid_cast<std::shared_ptr<ASTStorage>>(storage_def);
|
||||
if (!new_inner_engine && storage_def)
|
||||
throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Bad cast from type {} to ASTStorage", storage_def->getID());
|
||||
|
||||
for (auto & target : targets)
|
||||
{
|
||||
if (target.kind == kind)
|
||||
{
|
||||
if (target.inner_engine == new_inner_engine)
|
||||
return;
|
||||
if (new_inner_engine)
|
||||
children.push_back(new_inner_engine);
|
||||
if (target.inner_engine)
|
||||
std::erase(children, target.inner_engine);
|
||||
target.inner_engine = new_inner_engine;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_inner_engine)
|
||||
{
|
||||
targets.emplace_back(kind).inner_engine = new_inner_engine;
|
||||
children.push_back(new_inner_engine);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<ASTStorage> ASTViewTargets::getInnerEngine(ViewTarget::Kind kind) const
|
||||
{
|
||||
if (const auto * target = tryGetTarget(kind))
|
||||
return target->inner_engine;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<ASTStorage>> ASTViewTargets::getInnerEngines() const
|
||||
{
|
||||
std::vector<std::shared_ptr<ASTStorage>> res;
|
||||
res.reserve(targets.size());
|
||||
for (const auto & target : targets)
|
||||
{
|
||||
if (target.inner_engine)
|
||||
res.push_back(target.inner_engine);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
const ViewTarget * ASTViewTargets::tryGetTarget(ViewTarget::Kind kind) const
|
||||
{
|
||||
for (const auto & target : targets)
|
||||
{
|
||||
if (target.kind == kind)
|
||||
return ⌖
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ASTPtr ASTViewTargets::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTViewTargets>(*this);
|
||||
res->children.clear();
|
||||
for (auto & target : res->targets)
|
||||
{
|
||||
if (target.inner_engine)
|
||||
{
|
||||
target.inner_engine = typeid_cast<std::shared_ptr<ASTStorage>>(target.inner_engine->clone());
|
||||
res->children.push_back(target.inner_engine);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTViewTargets::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
for (const auto & target : targets)
|
||||
formatTarget(target, s, state, frame);
|
||||
}
|
||||
|
||||
void ASTViewTargets::formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
for (const auto & target : targets)
|
||||
{
|
||||
if (target.kind == kind)
|
||||
formatTarget(target, s, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
void ASTViewTargets::formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame)
|
||||
{
|
||||
if (target.table_id)
|
||||
{
|
||||
auto keyword = getKeywordForTableID(target.kind);
|
||||
if (!keyword)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No keyword for table name of kind {}", toString(target.kind));
|
||||
s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword)
|
||||
<< (s.hilite ? hilite_none : "") << " "
|
||||
<< (!target.table_id.database_name.empty() ? backQuoteIfNeed(target.table_id.database_name) + "." : "")
|
||||
<< backQuoteIfNeed(target.table_id.table_name);
|
||||
}
|
||||
|
||||
if (target.inner_uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
auto keyword = getKeywordForInnerUUID(target.kind);
|
||||
if (!keyword)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for inner UUID of kind {}", toString(target.kind));
|
||||
s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword)
|
||||
<< (s.hilite ? hilite_none : "") << " " << quoteString(toString(target.inner_uuid));
|
||||
}
|
||||
|
||||
if (target.inner_engine)
|
||||
{
|
||||
auto keyword = getKeywordForInnerStorage(target.kind);
|
||||
if (!keyword)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for table engine of kind {}", toString(target.kind));
|
||||
s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) << (s.hilite ? hilite_none : "");
|
||||
target.inner_engine->formatImpl(s, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<Keyword> ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata
|
||||
case ViewTarget::Inner: return std::nullopt;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::optional<Keyword> ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree()
|
||||
case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree()
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::optional<Keyword> ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX'
|
||||
case ViewTarget::Inner: return std::nullopt;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void ASTViewTargets::forEachPointerToChild(std::function<void(void**)> f)
|
||||
{
|
||||
for (auto & target : targets)
|
||||
{
|
||||
if (target.inner_engine)
|
||||
{
|
||||
ASTStorage * new_inner_engine = target.inner_engine.get();
|
||||
f(reinterpret_cast<void **>(&new_inner_engine));
|
||||
if (new_inner_engine != target.inner_engine.get())
|
||||
{
|
||||
if (new_inner_engine)
|
||||
target.inner_engine = typeid_cast<std::shared_ptr<ASTStorage>>(new_inner_engine->ptr());
|
||||
else
|
||||
target.inner_engine.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
115
src/Parsers/ASTViewTargets.h
Normal file
115
src/Parsers/ASTViewTargets.h
Normal file
@ -0,0 +1,115 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/StorageID.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ASTStorage;
|
||||
enum class Keyword : size_t;
|
||||
|
||||
/// Information about target tables (external or inner) of a materialized view or a window view.
|
||||
/// See ASTViewTargets for more details.
|
||||
struct ViewTarget
|
||||
{
|
||||
enum Kind
|
||||
{
|
||||
/// If `kind == ViewTarget::To` then `ViewTarget` contains information about the "TO" table of a materialized view or a window view:
|
||||
/// CREATE MATERIALIZED VIEW db.mv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ...
|
||||
/// or
|
||||
/// CREATE WINDOW VIEW db.wv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ...
|
||||
To,
|
||||
|
||||
/// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view:
|
||||
/// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ...
|
||||
Inner,
|
||||
};
|
||||
|
||||
Kind kind = To;
|
||||
|
||||
/// StorageID of the target table, if it's not inner.
|
||||
/// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ...
|
||||
StorageID table_id = StorageID::createEmpty();
|
||||
|
||||
/// UUID of the target table, if it's inner.
|
||||
/// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like
|
||||
/// CREATE MATERIALIZED VIEW ... TO INNER UUID ...
|
||||
UUID inner_uuid = UUIDHelpers::Nil;
|
||||
|
||||
/// Table engine of the target table, if it's inner.
|
||||
/// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ...
|
||||
std::shared_ptr<ASTStorage> inner_engine;
|
||||
};
|
||||
|
||||
/// Converts ViewTarget::Kind to a string.
|
||||
std::string_view toString(ViewTarget::Kind kind);
|
||||
void parseFromString(ViewTarget::Kind & out, std::string_view str);
|
||||
|
||||
|
||||
/// Information about all target tables (external or inner) of a view.
|
||||
///
|
||||
/// For example, for a materialized view:
|
||||
/// CREATE MATERIALIZED VIEW db.mv_name [TO [db.]to_target | ENGINE to_engine] AS SELECT ...
|
||||
/// this class contains information about the "TO" table: its name and database (if it's external), its UUID and engine (if it's inner).
|
||||
///
|
||||
/// For a window view:
|
||||
/// CREATE WINDOW VIEW db.wv_name [TO [db.]to_target | ENGINE to_engine] [INNER ENGINE inner_engine] AS SELECT ...
|
||||
/// this class contains information about both the "TO" table and the "INNER" table.
|
||||
class ASTViewTargets : public IAST
|
||||
{
|
||||
public:
|
||||
std::vector<ViewTarget> targets;
|
||||
|
||||
/// Sets the StorageID of the target table, if it's not inner.
|
||||
/// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ...
|
||||
void setTableID(ViewTarget::Kind kind, const StorageID & table_id_);
|
||||
StorageID getTableID(ViewTarget::Kind kind) const;
|
||||
bool hasTableID(ViewTarget::Kind kind) const;
|
||||
|
||||
/// Replaces an empty database in the StorageID of the target table with a specified database.
|
||||
void setCurrentDatabase(const String & current_database);
|
||||
|
||||
/// Sets the UUID of the target table, if it's inner.
|
||||
/// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like
|
||||
/// CREATE MATERIALIZED VIEW ... TO INNER UUID ...
|
||||
void setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_);
|
||||
UUID getInnerUUID(ViewTarget::Kind kind) const;
|
||||
bool hasInnerUUID(ViewTarget::Kind kind) const;
|
||||
|
||||
void resetInnerUUIDs();
|
||||
bool hasInnerUUIDs() const;
|
||||
|
||||
/// Sets the table engine of the target table, if it's inner.
|
||||
/// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ...
|
||||
void setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def);
|
||||
std::shared_ptr<ASTStorage> getInnerEngine(ViewTarget::Kind kind) const;
|
||||
std::vector<std::shared_ptr<ASTStorage>> getInnerEngines() const;
|
||||
|
||||
/// Returns a list of all kinds of views in this ASTViewTargets.
|
||||
std::vector<ViewTarget::Kind> getKinds() const;
|
||||
|
||||
/// Returns information about a target table.
|
||||
/// The function returns null if such target doesn't exist.
|
||||
const ViewTarget * tryGetTarget(ViewTarget::Kind kind) const;
|
||||
|
||||
String getID(char) const override { return "ViewTargets"; }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
/// Formats information only about a specific target table.
|
||||
void formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const;
|
||||
static void formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame);
|
||||
|
||||
/// Helper functions for class ParserViewTargets. Returns a prefix keyword matching a specified target kind.
|
||||
static std::optional<Keyword> getKeywordForTableID(ViewTarget::Kind kind);
|
||||
static std::optional<Keyword> getKeywordForInnerUUID(ViewTarget::Kind kind);
|
||||
static std::optional<Keyword> getKeywordForInnerStorage(ViewTarget::Kind kind);
|
||||
|
||||
protected:
|
||||
void forEachPointerToChild(std::function<void(void**)> f) override;
|
||||
};
|
||||
|
||||
}
|
168
src/Parsers/CreateQueryUUIDs.cpp
Normal file
168
src/Parsers/CreateQueryUUIDs.cpp
Normal file
@ -0,0 +1,168 @@
|
||||
#include <Parsers/CreateQueryUUIDs.h>
|
||||
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random, bool force_random)
|
||||
{
|
||||
if (!generate_random || !force_random)
|
||||
{
|
||||
uuid = query.uuid;
|
||||
if (query.targets)
|
||||
{
|
||||
for (const auto & target : query.targets->targets)
|
||||
setTargetInnerUUID(target.kind, target.inner_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
if (generate_random)
|
||||
{
|
||||
if (uuid == UUIDHelpers::Nil)
|
||||
uuid = UUIDHelpers::generateV4();
|
||||
|
||||
/// For an ATTACH query we should never generate UUIDs for its inner target tables
|
||||
/// because for an ATTACH query those inner target tables probably already exist and can be accessible by names.
|
||||
/// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible.
|
||||
/// Thus it's not safe for example to replace
|
||||
/// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with
|
||||
/// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b"
|
||||
/// This replacement is safe only for CREATE queries when inner target tables don't exist yet.
|
||||
if (!query.attach)
|
||||
{
|
||||
auto generate_target_uuid = [&](ViewTarget::Kind target_kind)
|
||||
{
|
||||
if ((query.getTargetInnerUUID(target_kind) == UUIDHelpers::Nil) && query.getTargetTableID(target_kind).empty())
|
||||
setTargetInnerUUID(target_kind, UUIDHelpers::generateV4());
|
||||
};
|
||||
|
||||
/// If destination table (to_table_id) is not specified for materialized view,
|
||||
/// then MV will create inner table. We should generate UUID of inner table here.
|
||||
if (query.is_materialized_view)
|
||||
generate_target_uuid(ViewTarget::To);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CreateQueryUUIDs::empty() const
|
||||
{
|
||||
if (uuid != UUIDHelpers::Nil)
|
||||
return false;
|
||||
for (const auto & [_, inner_uuid] : targets_inner_uuids)
|
||||
{
|
||||
if (inner_uuid != UUIDHelpers::Nil)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
String CreateQueryUUIDs::toString() const
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
out << "{";
|
||||
bool need_comma = false;
|
||||
auto add_name_and_uuid_to_string = [&](std::string_view name_, const UUID & uuid_)
|
||||
{
|
||||
if (std::exchange(need_comma, true))
|
||||
out << ", ";
|
||||
out << "\"" << name_ << "\": \"" << uuid_ << "\"";
|
||||
};
|
||||
if (uuid != UUIDHelpers::Nil)
|
||||
add_name_and_uuid_to_string("uuid", uuid);
|
||||
for (const auto & [kind, inner_uuid] : targets_inner_uuids)
|
||||
{
|
||||
if (inner_uuid != UUIDHelpers::Nil)
|
||||
add_name_and_uuid_to_string(::DB::toString(kind), inner_uuid);
|
||||
}
|
||||
out << "}";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
CreateQueryUUIDs CreateQueryUUIDs::fromString(const String & str)
|
||||
{
|
||||
ReadBufferFromString in{str};
|
||||
CreateQueryUUIDs res;
|
||||
skipWhitespaceIfAny(in);
|
||||
in >> "{";
|
||||
skipWhitespaceIfAny(in);
|
||||
char c;
|
||||
while (in.peek(c) && c != '}')
|
||||
{
|
||||
String name;
|
||||
String value;
|
||||
readDoubleQuotedString(name, in);
|
||||
skipWhitespaceIfAny(in);
|
||||
in >> ":";
|
||||
skipWhitespaceIfAny(in);
|
||||
readDoubleQuotedString(value, in);
|
||||
skipWhitespaceIfAny(in);
|
||||
if (name == "uuid")
|
||||
{
|
||||
res.uuid = parse<UUID>(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
ViewTarget::Kind kind;
|
||||
parseFromString(kind, name);
|
||||
res.setTargetInnerUUID(kind, parse<UUID>(value));
|
||||
}
|
||||
if (in.peek(c) && c == ',')
|
||||
{
|
||||
in.ignore(1);
|
||||
skipWhitespaceIfAny(in);
|
||||
}
|
||||
}
|
||||
in >> "}";
|
||||
return res;
|
||||
}
|
||||
|
||||
void CreateQueryUUIDs::setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid)
|
||||
{
|
||||
for (auto & pair : targets_inner_uuids)
|
||||
{
|
||||
if (pair.first == kind)
|
||||
{
|
||||
pair.second = new_inner_uuid;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (new_inner_uuid != UUIDHelpers::Nil)
|
||||
targets_inner_uuids.emplace_back(kind, new_inner_uuid);
|
||||
}
|
||||
|
||||
UUID CreateQueryUUIDs::getTargetInnerUUID(ViewTarget::Kind kind) const
|
||||
{
|
||||
for (const auto & pair : targets_inner_uuids)
|
||||
{
|
||||
if (pair.first == kind)
|
||||
return pair.second;
|
||||
}
|
||||
return UUIDHelpers::Nil;
|
||||
}
|
||||
|
||||
void CreateQueryUUIDs::copyToQuery(ASTCreateQuery & query) const
|
||||
{
|
||||
query.uuid = uuid;
|
||||
|
||||
if (query.targets)
|
||||
query.targets->resetInnerUUIDs();
|
||||
|
||||
if (!targets_inner_uuids.empty())
|
||||
{
|
||||
if (!query.targets)
|
||||
query.set(query.targets, std::make_shared<ASTViewTargets>());
|
||||
|
||||
for (const auto & [kind, inner_uuid] : targets_inner_uuids)
|
||||
{
|
||||
if (inner_uuid != UUIDHelpers::Nil)
|
||||
query.targets->setInnerUUID(kind, inner_uuid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
40
src/Parsers/CreateQueryUUIDs.h
Normal file
40
src/Parsers/CreateQueryUUIDs.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/ASTViewTargets.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ASTCreateQuery;
|
||||
|
||||
/// The UUID of a table or a database defined with a CREATE QUERY along with the UUIDs of its inner targets.
|
||||
struct CreateQueryUUIDs
|
||||
{
|
||||
CreateQueryUUIDs() = default;
|
||||
|
||||
/// Collect UUIDs from ASTCreateQuery.
|
||||
/// Parameters:
|
||||
/// `generate_random` - if it's true then unspecified in the query UUIDs will be generated randomly;
|
||||
/// `force_random` - if it's true then all UUIDs (even specified in the query) will be (re)generated randomly.
|
||||
explicit CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random = false, bool force_random = false);
|
||||
|
||||
bool empty() const;
|
||||
explicit operator bool() const { return !empty(); }
|
||||
|
||||
String toString() const;
|
||||
static CreateQueryUUIDs fromString(const String & str);
|
||||
|
||||
void setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid);
|
||||
UUID getTargetInnerUUID(ViewTarget::Kind kind) const;
|
||||
|
||||
/// Copies UUIDs to ASTCreateQuery.
|
||||
void copyToQuery(ASTCreateQuery & query) const;
|
||||
|
||||
/// UUID of the table.
|
||||
UUID uuid = UUIDHelpers::Nil;
|
||||
|
||||
/// UUIDs of its target table (or tables).
|
||||
std::vector<std::pair<ViewTarget::Kind, UUID>> targets_inner_uuids;
|
||||
};
|
||||
|
||||
}
|
@ -9,7 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** The SELECT subquery is in parenthesis.
|
||||
/** The SELECT subquery, in parentheses.
|
||||
*/
|
||||
class ParserSubquery : public IParserBase
|
||||
{
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <Parsers/ParserSelectWithUnionQuery.h>
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
#include <Parsers/ParserRefreshStrategy.h>
|
||||
#include <Parsers/ParserViewTargets.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
|
||||
@ -693,7 +694,8 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
|
||||
ASTPtr table;
|
||||
ASTPtr columns_list;
|
||||
ASTPtr storage;
|
||||
std::shared_ptr<ASTStorage> storage;
|
||||
ASTPtr targets;
|
||||
ASTPtr as_database;
|
||||
ASTPtr as_table;
|
||||
ASTPtr as_table_function;
|
||||
@ -773,6 +775,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
return true;
|
||||
}
|
||||
|
||||
auto parse_storage = [&]
|
||||
{
|
||||
chassert(!storage);
|
||||
ASTPtr ast;
|
||||
if (!storage_p.parse(pos, ast, expected))
|
||||
return false;
|
||||
|
||||
storage = typeid_cast<std::shared_ptr<ASTStorage>>(ast);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto need_parse_as_select = [&is_create_empty, &pos, &expected]()
|
||||
{
|
||||
if (ParserKeyword{Keyword::EMPTY_AS}.ignore(pos, expected))
|
||||
@ -798,7 +811,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
if (!s_rparen.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
auto storage_parse_result = storage_p.parse(pos, storage, expected);
|
||||
auto storage_parse_result = parse_storage();
|
||||
|
||||
if ((storage_parse_result || is_temporary) && need_parse_as_select())
|
||||
{
|
||||
@ -820,7 +833,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
*/
|
||||
else
|
||||
{
|
||||
storage_p.parse(pos, storage, expected);
|
||||
parse_storage();
|
||||
|
||||
/// CREATE|ATTACH TABLE ... AS ...
|
||||
if (need_parse_as_select())
|
||||
@ -843,7 +856,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
|
||||
/// Optional - ENGINE can be specified.
|
||||
if (!storage)
|
||||
storage_p.parse(pos, storage, expected);
|
||||
parse_storage();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -904,6 +917,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
||||
tryGetIdentifierNameInto(as_database, query->as_database);
|
||||
tryGetIdentifierNameInto(as_table, query->as_table);
|
||||
query->set(query->select, select);
|
||||
query->set(query->targets, targets);
|
||||
query->is_create_empty = is_create_empty;
|
||||
|
||||
if (from_path)
|
||||
@ -977,6 +991,13 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<ASTViewTargets> targets;
|
||||
if (to_table)
|
||||
{
|
||||
targets = std::make_shared<ASTViewTargets>();
|
||||
targets->setTableID(ViewTarget::To, to_table->as<ASTTableIdentifier>()->getTableId());
|
||||
}
|
||||
|
||||
/// Optional - a list of columns can be specified. It must fully comply with SELECT.
|
||||
if (s_lparen.ignore(pos, expected))
|
||||
{
|
||||
@ -1017,14 +1038,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
if (query->table)
|
||||
query->children.push_back(query->table);
|
||||
|
||||
if (to_table)
|
||||
query->to_table_id = to_table->as<ASTTableIdentifier>()->getTableId();
|
||||
|
||||
query->set(query->columns_list, columns_list);
|
||||
|
||||
tryGetIdentifierNameInto(as_database, query->as_database);
|
||||
tryGetIdentifierNameInto(as_table, query->as_table);
|
||||
query->set(query->select, select);
|
||||
query->set(query->targets, targets);
|
||||
|
||||
if (comment)
|
||||
query->set(query->comment, comment);
|
||||
@ -1139,6 +1158,18 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
storage_p.parse(pos, storage, expected);
|
||||
}
|
||||
|
||||
std::shared_ptr<ASTViewTargets> targets;
|
||||
if (to_table || storage || inner_storage)
|
||||
{
|
||||
targets = std::make_shared<ASTViewTargets>();
|
||||
if (to_table)
|
||||
targets->setTableID(ViewTarget::To, to_table->as<ASTTableIdentifier>()->getTableId());
|
||||
if (storage)
|
||||
targets->setInnerEngine(ViewTarget::To, storage);
|
||||
if (inner_storage)
|
||||
targets->setInnerEngine(ViewTarget::Inner, inner_storage);
|
||||
}
|
||||
|
||||
// WATERMARK
|
||||
if (s_watermark.ignore(pos, expected))
|
||||
{
|
||||
@ -1195,12 +1226,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
if (query->table)
|
||||
query->children.push_back(query->table);
|
||||
|
||||
if (to_table)
|
||||
query->to_table_id = to_table->as<ASTTableIdentifier>()->getTableId();
|
||||
|
||||
query->set(query->columns_list, columns_list);
|
||||
query->set(query->storage, storage);
|
||||
query->set(query->inner_storage, inner_storage);
|
||||
|
||||
query->is_watermark_strictly_ascending = is_watermark_strictly_ascending;
|
||||
query->is_watermark_ascending = is_watermark_ascending;
|
||||
query->is_watermark_bounded = is_watermark_bounded;
|
||||
@ -1213,6 +1240,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
tryGetIdentifierNameInto(as_database, query->as_database);
|
||||
tryGetIdentifierNameInto(as_table, query->as_table);
|
||||
query->set(query->select, select);
|
||||
query->set(query->targets, targets);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1436,6 +1464,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_create(Keyword::CREATE);
|
||||
@ -1622,13 +1651,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
|
||||
if (query->table)
|
||||
query->children.push_back(query->table);
|
||||
|
||||
if (to_table)
|
||||
query->to_table_id = to_table->as<ASTTableIdentifier>()->getTableId();
|
||||
if (to_inner_uuid)
|
||||
query->to_inner_uuid = parseFromString<UUID>(to_inner_uuid->as<ASTLiteral>()->value.get<String>());
|
||||
|
||||
query->set(query->columns_list, columns_list);
|
||||
query->set(query->storage, storage);
|
||||
|
||||
if (refresh_strategy)
|
||||
query->set(query->refresh_strategy, refresh_strategy);
|
||||
if (comment)
|
||||
@ -1639,29 +1663,41 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
|
||||
if (query->columns_list && query->columns_list->primary_key)
|
||||
{
|
||||
/// If engine is not set will use default one
|
||||
if (!query->storage)
|
||||
query->set(query->storage, std::make_shared<ASTStorage>());
|
||||
else if (query->storage->primary_key)
|
||||
if (!storage)
|
||||
storage = std::make_shared<ASTStorage>();
|
||||
auto & storage_ref = typeid_cast<ASTStorage &>(*storage);
|
||||
if (storage_ref.primary_key)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed.");
|
||||
|
||||
query->storage->primary_key = query->columns_list->primary_key;
|
||||
|
||||
storage_ref.primary_key = query->columns_list->primary_key;
|
||||
}
|
||||
|
||||
if (query->columns_list && (query->columns_list->primary_key_from_columns))
|
||||
{
|
||||
/// If engine is not set will use default one
|
||||
if (!query->storage)
|
||||
query->set(query->storage, std::make_shared<ASTStorage>());
|
||||
else if (query->storage->primary_key)
|
||||
if (!storage)
|
||||
storage = std::make_shared<ASTStorage>();
|
||||
auto & storage_ref = typeid_cast<ASTStorage &>(*storage);
|
||||
if (storage_ref.primary_key)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed.");
|
||||
storage_ref.primary_key = query->columns_list->primary_key_from_columns;
|
||||
}
|
||||
|
||||
query->storage->primary_key = query->columns_list->primary_key_from_columns;
|
||||
std::shared_ptr<ASTViewTargets> targets;
|
||||
if (to_table || to_inner_uuid || storage)
|
||||
{
|
||||
targets = std::make_shared<ASTViewTargets>();
|
||||
if (to_table)
|
||||
targets->setTableID(ViewTarget::To, to_table->as<ASTTableIdentifier>()->getTableId());
|
||||
if (to_inner_uuid)
|
||||
targets->setInnerUUID(ViewTarget::To, parseFromString<UUID>(to_inner_uuid->as<ASTLiteral>()->value.safeGet<String>()));
|
||||
if (storage)
|
||||
targets->setInnerEngine(ViewTarget::To, storage);
|
||||
}
|
||||
|
||||
tryGetIdentifierNameInto(as_database, query->as_database);
|
||||
tryGetIdentifierNameInto(as_table, query->as_table);
|
||||
query->set(query->select, select);
|
||||
query->set(query->targets, targets);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -11,15 +11,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_describe(Keyword::DESCRIBE);
|
||||
ParserKeyword s_desc(Keyword::DESC);
|
||||
ParserKeyword s_table(Keyword::TABLE);
|
||||
ParserKeyword s_settings(Keyword::SETTINGS);
|
||||
ParserToken s_dot(TokenType::Dot);
|
||||
ParserIdentifier name_p;
|
||||
ParserSetQuery parser_settings(true);
|
||||
|
||||
ASTPtr database;
|
||||
@ -53,5 +50,4 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
88
src/Parsers/ParserViewTargets.cpp
Normal file
88
src/Parsers/ParserViewTargets.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
#include <Parsers/ParserViewTargets.h>
|
||||
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTViewTargets.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ParserViewTargets::ParserViewTargets()
|
||||
{
|
||||
for (auto kind : magic_enum::enum_values<ViewTarget::Kind>())
|
||||
accept_kinds.push_back(kind);
|
||||
}
|
||||
|
||||
bool ParserViewTargets::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserStringLiteral literal_p;
|
||||
ParserStorage storage_p{ParserStorage::TABLE_ENGINE};
|
||||
ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true);
|
||||
|
||||
std::shared_ptr<ASTViewTargets> res;
|
||||
|
||||
auto result = [&] -> ASTViewTargets &
|
||||
{
|
||||
if (!res)
|
||||
res = std::make_shared<ASTViewTargets>();
|
||||
return *res;
|
||||
};
|
||||
|
||||
for (;;)
|
||||
{
|
||||
auto start = pos;
|
||||
for (auto kind : accept_kinds)
|
||||
{
|
||||
auto current = pos;
|
||||
|
||||
auto keyword = ASTViewTargets::getKeywordForInnerUUID(kind);
|
||||
if (keyword && ParserKeyword{*keyword}.ignore(pos, expected))
|
||||
{
|
||||
ASTPtr ast;
|
||||
if (literal_p.parse(pos, ast, expected))
|
||||
{
|
||||
result().setInnerUUID(kind, parseFromString<UUID>(ast->as<ASTLiteral>()->value.safeGet<String>()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos = current;
|
||||
|
||||
keyword = ASTViewTargets::getKeywordForInnerStorage(kind);
|
||||
if (keyword && ParserKeyword{*keyword}.ignore(pos, expected))
|
||||
{
|
||||
ASTPtr ast;
|
||||
if (storage_p.parse(pos, ast, expected))
|
||||
{
|
||||
result().setInnerEngine(kind, ast);
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos = current;
|
||||
|
||||
keyword = ASTViewTargets::getKeywordForTableID(kind);
|
||||
if (keyword && ParserKeyword{*keyword}.ignore(pos, expected))
|
||||
{
|
||||
ASTPtr ast;
|
||||
if (table_name_p.parse(pos, ast, expected))
|
||||
{
|
||||
result().setTableID(kind, ast->as<ASTTableIdentifier>()->getTableId());
|
||||
break;
|
||||
}
|
||||
}
|
||||
pos = current;
|
||||
}
|
||||
if (pos == start)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!res || res->targets.empty())
|
||||
return false;
|
||||
|
||||
node = res;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
29
src/Parsers/ParserViewTargets.h
Normal file
29
src/Parsers/ParserViewTargets.h
Normal file
@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IParserBase.h>
|
||||
#include <Parsers/ASTViewTargets.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Parses information about target tables (external or inner) of a materialized view or a window view.
|
||||
/// The function parses one or multiple parts of a CREATE query looking like this:
|
||||
/// TO db.table_name
|
||||
/// TO INNER UUID 'XXX'
|
||||
/// {ENGINE / INNER ENGINE} TableEngine(arguments) [ORDER BY ...] [SETTINGS ...]
|
||||
/// Returns ASTViewTargets if succeeded.
|
||||
class ParserViewTargets : public IParserBase
|
||||
{
|
||||
public:
|
||||
ParserViewTargets();
|
||||
explicit ParserViewTargets(const std::vector<ViewTarget::Kind> & accept_kinds_) : accept_kinds(accept_kinds_) { }
|
||||
|
||||
protected:
|
||||
const char * getName() const override { return "ViewTargets"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
|
||||
std::vector<ViewTarget::Kind> accept_kinds;
|
||||
};
|
||||
|
||||
}
|
@ -304,7 +304,7 @@ void RefreshTask::refreshTask()
|
||||
{
|
||||
PreformattedMessage message = getCurrentExceptionMessageAndPattern(true);
|
||||
auto text = message.text;
|
||||
message.text = fmt::format("Refresh failed: {}", message.text);
|
||||
message.text = fmt::format("Refresh view {} failed: {}", view->getStorageID().getFullTableName(), message.text);
|
||||
LOG_ERROR(log, message);
|
||||
exception = text;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/randomDelay.h>
|
||||
@ -224,14 +225,18 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
|
||||
if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM
|
||||
&& !isRetryableException(std::current_exception()))
|
||||
{
|
||||
report_broken_part();
|
||||
}
|
||||
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
report_broken_part();
|
||||
if (!isRetryableException(std::current_exception()))
|
||||
report_broken_part();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
@ -499,8 +499,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
|
||||
{
|
||||
auto stats = part->loadStatistics();
|
||||
/// TODO: We only have one stats file for every part.
|
||||
result.addRows(part->rows_count);
|
||||
for (const auto & stat : stats)
|
||||
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
|
||||
result.merge(part->info.getPartNameV1(), stat);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -515,8 +516,9 @@ ConditionSelectivityEstimator MergeTreeData::getConditionSelectivityEstimatorByP
|
||||
if (!partition_pruner.canBePruned(*part))
|
||||
{
|
||||
auto stats = part->loadStatistics();
|
||||
result.addRows(part->rows_count);
|
||||
for (const auto & stat : stats)
|
||||
result.merge(part->info.getPartNameV1(), part->rows_count, stat);
|
||||
result.merge(part->info.getPartNameV1(), stat);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -1144,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});
|
||||
|
||||
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
|
||||
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false);
|
||||
if (!filter_dag)
|
||||
return {};
|
||||
|
||||
|
@ -44,10 +44,12 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(
|
||||
const String & index_name_,
|
||||
const Block & index_sample_block_,
|
||||
size_t max_rows_,
|
||||
MutableColumns && mutable_columns_)
|
||||
MutableColumns && mutable_columns_,
|
||||
std::vector<Range> && set_hyperrectangle_)
|
||||
: index_name(index_name_)
|
||||
, max_rows(max_rows_)
|
||||
, block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_)))
|
||||
, set_hyperrectangle(std::move(set_hyperrectangle_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -106,6 +108,10 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
|
||||
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
|
||||
settings.position_independent_encoding = false;
|
||||
|
||||
set_hyperrectangle.clear();
|
||||
Field min_val;
|
||||
Field max_val;
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
auto & elem = block.getByPosition(i);
|
||||
@ -116,6 +122,13 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
|
||||
|
||||
serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr);
|
||||
serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr);
|
||||
|
||||
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(elem.column.get()))
|
||||
column_nullable->getExtremesNullLast(min_val, max_val);
|
||||
else
|
||||
elem.column->getExtremes(min_val, max_val);
|
||||
|
||||
set_hyperrectangle.emplace_back(min_val, true, max_val, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,10 +195,29 @@ void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size
|
||||
|
||||
if (has_new_data)
|
||||
{
|
||||
FieldRef field_min;
|
||||
FieldRef field_max;
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
auto filtered_column = block.getByName(index_columns[i]).column->filter(filter, block.rows());
|
||||
columns[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size());
|
||||
|
||||
if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(filtered_column.get()))
|
||||
column_nullable->getExtremesNullLast(field_min, field_max);
|
||||
else
|
||||
filtered_column->getExtremes(field_min, field_max);
|
||||
|
||||
if (set_hyperrectangle.size() <= i)
|
||||
{
|
||||
set_hyperrectangle.emplace_back(field_min, true, field_max, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_hyperrectangle[i].left
|
||||
= applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].left, field_min) ? set_hyperrectangle[i].left : field_min;
|
||||
set_hyperrectangle[i].right
|
||||
= applyVisitor(FieldVisitorAccurateLess(), set_hyperrectangle[i].right, field_max) ? field_max : set_hyperrectangle[i].right;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -221,7 +253,7 @@ bool MergeTreeIndexAggregatorSet::buildFilter(
|
||||
|
||||
MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
|
||||
{
|
||||
auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index_name, index_sample_block, max_rows, std::move(columns));
|
||||
auto granule = std::make_shared<MergeTreeIndexGranuleSet>(index_name, index_sample_block, max_rows, std::move(columns), std::move(set_hyperrectangle));
|
||||
|
||||
switch (data.type)
|
||||
{
|
||||
@ -240,17 +272,22 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
|
||||
return granule;
|
||||
}
|
||||
|
||||
KeyCondition buildCondition(const IndexDescription & index, const ActionsDAGPtr & filter_actions_dag, ContextPtr context)
|
||||
{
|
||||
return KeyCondition{filter_actions_dag, context, index.column_names, index.expression};
|
||||
}
|
||||
|
||||
MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
|
||||
const String & index_name_,
|
||||
const Block & index_sample_block,
|
||||
size_t max_rows_,
|
||||
const ActionsDAGPtr & filter_dag,
|
||||
ContextPtr context)
|
||||
: index_name(index_name_)
|
||||
ContextPtr context,
|
||||
const IndexDescription & index_description)
|
||||
: index_name(index_description.name)
|
||||
, max_rows(max_rows_)
|
||||
, index_data_types(index_description.data_types)
|
||||
, condition(buildCondition(index_description, filter_dag, context))
|
||||
{
|
||||
for (const auto & name : index_sample_block.getNames())
|
||||
for (const auto & name : index_description.sample_block.getNames())
|
||||
if (!key_columns.contains(name))
|
||||
key_columns.insert(name);
|
||||
|
||||
@ -293,6 +330,9 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
if (size == 0 || (max_rows != 0 && size > max_rows))
|
||||
return true;
|
||||
|
||||
if (!condition.checkInHyperrectangle(granule.set_hyperrectangle, index_data_types).can_be_true)
|
||||
return false;
|
||||
|
||||
Block result = granule.block;
|
||||
actions->execute(result);
|
||||
|
||||
@ -546,7 +586,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator(const Merge
|
||||
MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition(
|
||||
const ActionsDAGPtr & filter_actions_dag, ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<MergeTreeIndexConditionSet>(index.name, index.sample_block, max_rows, filter_actions_dag, context);
|
||||
return std::make_shared<MergeTreeIndexConditionSet>(max_rows, filter_actions_dag, context, index);
|
||||
}
|
||||
|
||||
MergeTreeIndexPtr setIndexCreator(const IndexDescription & index)
|
||||
|
@ -22,7 +22,8 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
|
||||
const String & index_name_,
|
||||
const Block & index_sample_block_,
|
||||
size_t max_rows_,
|
||||
MutableColumns && columns_);
|
||||
MutableColumns && columns_,
|
||||
std::vector<Range> && set_hyperrectangle_);
|
||||
|
||||
void serializeBinary(WriteBuffer & ostr) const override;
|
||||
void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
|
||||
@ -36,6 +37,7 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule
|
||||
const size_t max_rows;
|
||||
|
||||
Block block;
|
||||
std::vector<Range> set_hyperrectangle;
|
||||
};
|
||||
|
||||
|
||||
@ -73,6 +75,7 @@ private:
|
||||
ClearableSetVariants data;
|
||||
Sizes key_sizes;
|
||||
MutableColumns columns;
|
||||
std::vector<Range> set_hyperrectangle;
|
||||
};
|
||||
|
||||
|
||||
@ -80,11 +83,10 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition
|
||||
{
|
||||
public:
|
||||
MergeTreeIndexConditionSet(
|
||||
const String & index_name_,
|
||||
const Block & index_sample_block,
|
||||
size_t max_rows_,
|
||||
const ActionsDAGPtr & filter_dag,
|
||||
ContextPtr context);
|
||||
ContextPtr context,
|
||||
const IndexDescription & index_description);
|
||||
|
||||
bool alwaysUnknownOrTrue() const override;
|
||||
|
||||
@ -119,6 +121,9 @@ private:
|
||||
std::unordered_set<String> key_columns;
|
||||
ExpressionActionsPtr actions;
|
||||
String actions_output_column_name;
|
||||
|
||||
DataTypes index_data_types;
|
||||
KeyCondition condition;
|
||||
};
|
||||
|
||||
|
||||
|
@ -15,16 +15,11 @@
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Processors/Merges/Algorithms/MergeTreePartLevelInfo.h>
|
||||
#include <Storages/MergeTree/checkDataPart.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||
}
|
||||
|
||||
|
||||
/// Lightweight (in terms of logic) stream for reading single part from
|
||||
/// MergeTree, used for merges and mutations.
|
||||
///
|
||||
@ -281,7 +276,7 @@ try
|
||||
catch (...)
|
||||
{
|
||||
/// Suspicion of the broken part. A part is added to the queue for verification.
|
||||
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
||||
if (!isRetryableException(std::current_exception()))
|
||||
storage.reportBrokenPart(data_part);
|
||||
throw;
|
||||
}
|
||||
|
@ -36,11 +36,13 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
extern const int CANNOT_SCHEDULE_TASK;
|
||||
extern const int UNEXPECTED_FILE_IN_DATA_PART;
|
||||
extern const int NO_FILE_IN_DATA_PART;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int SOCKET_TIMEOUT;
|
||||
extern const int BROKEN_PROJECTION;
|
||||
extern const int ABORTED;
|
||||
}
|
||||
|
||||
|
||||
@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr)
|
||||
{
|
||||
return isNotEnoughMemoryErrorCode(e.code())
|
||||
|| e.code() == ErrorCodes::NETWORK_ERROR
|
||||
|| e.code() == ErrorCodes::SOCKET_TIMEOUT;
|
||||
|| e.code() == ErrorCodes::SOCKET_TIMEOUT
|
||||
|| e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK
|
||||
|| e.code() == ErrorCodes::ABORTED;
|
||||
}
|
||||
catch (const Poco::Net::NetException &)
|
||||
{
|
||||
@ -329,16 +333,21 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
projections_on_disk.erase(projection_file);
|
||||
}
|
||||
|
||||
if (throw_on_broken_projection && !broken_projections_message.empty())
|
||||
if (throw_on_broken_projection)
|
||||
{
|
||||
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
|
||||
}
|
||||
if (!broken_projections_message.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message);
|
||||
}
|
||||
|
||||
if (require_checksums && !projections_on_disk.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
|
||||
"Found unexpected projection directories: {}",
|
||||
fmt::join(projections_on_disk, ","));
|
||||
/// This one is actually not broken, just redundant files on disk which
|
||||
/// MergeTree will never use.
|
||||
if (require_checksums && !projections_on_disk.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART,
|
||||
"Found unexpected projection directories: {}",
|
||||
fmt::join(projections_on_disk, ","));
|
||||
}
|
||||
}
|
||||
|
||||
if (is_cancelled())
|
||||
|
@ -163,7 +163,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
|
||||
{
|
||||
for (const auto & object_info : read_keys)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()))
|
||||
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName());
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
@ -221,7 +223,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
|
||||
{
|
||||
for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
|
||||
{
|
||||
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
|
||||
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName());
|
||||
/// Use this format only if we have a schema reader for it.
|
||||
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
|
||||
{
|
||||
format = format_from_file_name;
|
||||
break;
|
||||
|
@ -16,7 +16,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
|
||||
part_statistics[part_name] = stats;
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(Float64 val, Float64 rows) const
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
|
||||
{
|
||||
if (part_statistics.empty())
|
||||
return default_normal_cond_factor * rows;
|
||||
@ -30,16 +30,19 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(
|
||||
return result * rows / part_rows;
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(Float64 val, Float64 rows) const
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreater(const Field & val, Float64 rows) const
|
||||
{
|
||||
return rows - estimateLess(val, rows);
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(Float64 val, Float64 rows) const
|
||||
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const
|
||||
{
|
||||
if (part_statistics.empty())
|
||||
{
|
||||
if (val < - threshold || val > threshold)
|
||||
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (!float_val)
|
||||
return default_unknown_cond_factor * rows;
|
||||
else if (float_val.value() < - threshold || float_val.value() > threshold)
|
||||
return default_normal_cond_factor * rows;
|
||||
else
|
||||
return default_good_cond_factor * rows;
|
||||
@ -87,7 +90,7 @@ static std::pair<String, Int32> tryToExtractSingleColumn(const RPNBuilderTreeNod
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
|
||||
std::pair<String, Field> ConditionSelectivityEstimator::extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const
|
||||
{
|
||||
if (!node.isFunction())
|
||||
return {};
|
||||
@ -123,48 +126,35 @@ std::pair<String, Float64> ConditionSelectivityEstimator::extractBinaryOp(const
|
||||
DataTypePtr output_type;
|
||||
if (!constant_node->tryGetConstant(output_value, output_type))
|
||||
return {};
|
||||
|
||||
const auto type = output_value.getType();
|
||||
Float64 value;
|
||||
if (type == Field::Types::Int64)
|
||||
value = output_value.get<Int64>();
|
||||
else if (type == Field::Types::UInt64)
|
||||
value = output_value.get<UInt64>();
|
||||
else if (type == Field::Types::Float64)
|
||||
value = output_value.get<Float64>();
|
||||
else
|
||||
return {};
|
||||
return std::make_pair(function_name, value);
|
||||
return std::make_pair(function_name, output_value);
|
||||
}
|
||||
|
||||
Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode & node) const
|
||||
{
|
||||
auto result = tryToExtractSingleColumn(node);
|
||||
if (result.second != 1)
|
||||
{
|
||||
return default_unknown_cond_factor;
|
||||
}
|
||||
return default_unknown_cond_factor * total_rows;
|
||||
|
||||
String col = result.first;
|
||||
auto it = column_estimators.find(col);
|
||||
|
||||
/// If there the estimator of the column is not found or there are no data at all,
|
||||
/// we use dummy estimation.
|
||||
bool dummy = total_rows == 0;
|
||||
bool dummy = false;
|
||||
ColumnSelectivityEstimator estimator;
|
||||
if (it != column_estimators.end())
|
||||
{
|
||||
estimator = it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
dummy = true;
|
||||
}
|
||||
|
||||
auto [op, val] = extractBinaryOp(node, col);
|
||||
|
||||
if (op == "equals")
|
||||
{
|
||||
if (dummy)
|
||||
{
|
||||
if (val < - threshold || val > threshold)
|
||||
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (!float_val || (float_val < - threshold || float_val > threshold))
|
||||
return default_normal_cond_factor * total_rows;
|
||||
else
|
||||
return default_good_cond_factor * total_rows;
|
||||
@ -187,13 +177,8 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
|
||||
return default_unknown_cond_factor * total_rows;
|
||||
}
|
||||
|
||||
void ConditionSelectivityEstimator::merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat)
|
||||
void ConditionSelectivityEstimator::merge(String part_name, ColumnStatisticsPtr column_stat)
|
||||
{
|
||||
if (!part_names.contains(part_name))
|
||||
{
|
||||
total_rows += part_rows;
|
||||
part_names.insert(part_name);
|
||||
}
|
||||
if (column_stat != nullptr)
|
||||
column_estimators[column_stat->columnName()].merge(part_name, column_stat);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,6 +11,14 @@ class RPNBuilderTreeNode;
|
||||
/// It estimates the selectivity of a condition.
|
||||
class ConditionSelectivityEstimator
|
||||
{
|
||||
public:
|
||||
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
|
||||
/// Right now we only support simple condition like col = val / col < val
|
||||
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
|
||||
|
||||
void merge(String part_name, ColumnStatisticsPtr column_stat);
|
||||
void addRows(UInt64 part_rows) { total_rows += part_rows; }
|
||||
|
||||
private:
|
||||
friend class ColumnStatistics;
|
||||
struct ColumnSelectivityEstimator
|
||||
@ -20,13 +29,15 @@ private:
|
||||
|
||||
void merge(String part_name, ColumnStatisticsPtr stats);
|
||||
|
||||
Float64 estimateLess(Float64 val, Float64 rows) const;
|
||||
Float64 estimateLess(const Field & val, Float64 rows) const;
|
||||
|
||||
Float64 estimateGreater(Float64 val, Float64 rows) const;
|
||||
Float64 estimateGreater(const Field & val, Float64 rows) const;
|
||||
|
||||
Float64 estimateEqual(Float64 val, Float64 rows) const;
|
||||
Float64 estimateEqual(const Field & val, Float64 rows) const;
|
||||
};
|
||||
|
||||
std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
|
||||
|
||||
static constexpr auto default_good_cond_factor = 0.1;
|
||||
static constexpr auto default_normal_cond_factor = 0.5;
|
||||
static constexpr auto default_unknown_cond_factor = 1.0;
|
||||
@ -35,16 +46,7 @@ private:
|
||||
static constexpr auto threshold = 2;
|
||||
|
||||
UInt64 total_rows = 0;
|
||||
std::set<String> part_names;
|
||||
std::map<String, ColumnSelectivityEstimator> column_estimators;
|
||||
std::pair<String, Float64> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
|
||||
|
||||
public:
|
||||
/// TODO: Support the condition consists of CNF/DNF like (cond1 and cond2) or (cond3) ...
|
||||
/// Right now we only support simple condition like col = val / col < val
|
||||
Float64 estimateRowCount(const RPNBuilderTreeNode & node) const;
|
||||
|
||||
void merge(String part_name, UInt64 part_rows, ColumnStatisticsPtr column_stat);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,15 +1,18 @@
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
|
||||
#include <Storages/Statistics/StatisticsCountMinSketch.h>
|
||||
#include <Storages/Statistics/StatisticsTDigest.h>
|
||||
#include <Storages/Statistics/StatisticsUniq.h>
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
|
||||
#include "config.h" /// USE_DATASKETCHES
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -24,6 +27,36 @@ enum StatisticsFileVersion : UInt16
|
||||
V0 = 0,
|
||||
};
|
||||
|
||||
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
|
||||
{
|
||||
switch (field.getType())
|
||||
{
|
||||
case Field::Types::Int64:
|
||||
return field.get<Int64>();
|
||||
case Field::Types::UInt64:
|
||||
return field.get<UInt64>();
|
||||
case Field::Types::Float64:
|
||||
return field.get<Float64>();
|
||||
case Field::Types::Int128:
|
||||
return field.get<Int128>();
|
||||
case Field::Types::UInt128:
|
||||
return field.get<UInt128>();
|
||||
case Field::Types::Int256:
|
||||
return field.get<Int256>();
|
||||
case Field::Types::UInt256:
|
||||
return field.get<UInt256>();
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
|
||||
{
|
||||
if (field.getType() == Field::Types::String)
|
||||
return field.get<String>();
|
||||
return {};
|
||||
}
|
||||
|
||||
IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
|
||||
: stat(stat_)
|
||||
{
|
||||
@ -46,12 +79,12 @@ UInt64 IStatistics::estimateCardinality() const
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cardinality estimation is not implemented for this type of statistics");
|
||||
}
|
||||
|
||||
Float64 IStatistics::estimateEqual(Float64 /*val*/) const
|
||||
Float64 IStatistics::estimateEqual(const Field & /*val*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Equality estimation is not implemented for this type of statistics");
|
||||
}
|
||||
|
||||
Float64 IStatistics::estimateLess(Float64 /*val*/) const
|
||||
Float64 IStatistics::estimateLess(const Field & /*val*/) const
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
|
||||
}
|
||||
@ -66,27 +99,32 @@ Float64 IStatistics::estimateLess(Float64 /*val*/) const
|
||||
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
|
||||
/// object that supports it natively.
|
||||
|
||||
Float64 ColumnStatistics::estimateLess(Float64 val) const
|
||||
Float64 ColumnStatistics::estimateLess(const Field & val) const
|
||||
{
|
||||
if (stats.contains(StatisticsType::TDigest))
|
||||
return stats.at(StatisticsType::TDigest)->estimateLess(val);
|
||||
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
|
||||
}
|
||||
|
||||
Float64 ColumnStatistics::estimateGreater(Float64 val) const
|
||||
Float64 ColumnStatistics::estimateGreater(const Field & val) const
|
||||
{
|
||||
return rows - estimateLess(val);
|
||||
}
|
||||
|
||||
Float64 ColumnStatistics::estimateEqual(Float64 val) const
|
||||
Float64 ColumnStatistics::estimateEqual(const Field & val) const
|
||||
{
|
||||
if (stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
|
||||
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
|
||||
{
|
||||
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
|
||||
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
|
||||
return stats.at(StatisticsType::TDigest)->estimateEqual(val);
|
||||
}
|
||||
if (val < - ConditionSelectivityEstimator::threshold || val > ConditionSelectivityEstimator::threshold)
|
||||
#if USE_DATASKETCHES
|
||||
if (stats.contains(StatisticsType::CountMinSketch))
|
||||
return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
|
||||
#endif
|
||||
if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
|
||||
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
|
||||
else
|
||||
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
|
||||
@ -166,11 +204,16 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
|
||||
|
||||
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
|
||||
{
|
||||
registerValidator(StatisticsType::TDigest, TDigestValidator);
|
||||
registerCreator(StatisticsType::TDigest, TDigestCreator);
|
||||
registerValidator(StatisticsType::TDigest, tdigestValidator);
|
||||
registerCreator(StatisticsType::TDigest, tdigestCreator);
|
||||
|
||||
registerValidator(StatisticsType::Uniq, UniqValidator);
|
||||
registerCreator(StatisticsType::Uniq, UniqCreator);
|
||||
registerValidator(StatisticsType::Uniq, uniqValidator);
|
||||
registerCreator(StatisticsType::Uniq, uniqCreator);
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
|
||||
registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
|
||||
#endif
|
||||
}
|
||||
|
||||
MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
|
||||
@ -197,7 +240,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri
|
||||
{
|
||||
auto it = creators.find(type);
|
||||
if (it == creators.end())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq'", type);
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
|
||||
auto stat_ptr = (it->second)(desc, stats.data_type);
|
||||
column_stat->stats[type] = stat_ptr;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Core/Field.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <Storages/StatisticsDescription.h>
|
||||
@ -13,6 +14,14 @@ namespace DB
|
||||
constexpr auto STATS_FILE_PREFIX = "statistics_";
|
||||
constexpr auto STATS_FILE_SUFFIX = ".stats";
|
||||
|
||||
|
||||
struct StatisticsUtils
|
||||
{
|
||||
/// Returns std::nullopt if input Field cannot be converted to a concrete value
|
||||
static std::optional<Float64> tryConvertToFloat64(const Field & field);
|
||||
static std::optional<String> tryConvertToString(const Field & field);
|
||||
};
|
||||
|
||||
/// Statistics describe properties of the values in the column,
|
||||
/// e.g. how many unique values exist,
|
||||
/// what are the N most frequent values,
|
||||
@ -34,8 +43,8 @@ public:
|
||||
|
||||
/// Per-value estimations.
|
||||
/// Throws if the statistics object is not able to do a meaningful estimation.
|
||||
virtual Float64 estimateEqual(Float64 val) const; /// cardinality of val in the column
|
||||
virtual Float64 estimateLess(Float64 val) const; /// summarized cardinality of values < val in the column
|
||||
virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column
|
||||
virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column
|
||||
|
||||
protected:
|
||||
SingleStatisticsDescription stat;
|
||||
@ -58,9 +67,9 @@ public:
|
||||
|
||||
void update(const ColumnPtr & column);
|
||||
|
||||
Float64 estimateLess(Float64 val) const;
|
||||
Float64 estimateGreater(Float64 val) const;
|
||||
Float64 estimateEqual(Float64 val) const;
|
||||
Float64 estimateLess(const Field & val) const;
|
||||
Float64 estimateGreater(const Field & val) const;
|
||||
Float64 estimateEqual(const Field & val) const;
|
||||
|
||||
private:
|
||||
friend class MergeTreeStatisticsFactory;
|
||||
|
102
src/Storages/Statistics/StatisticsCountMinSketch.cpp
Normal file
102
src/Storages/Statistics/StatisticsCountMinSketch.cpp
Normal file
@ -0,0 +1,102 @@
|
||||
|
||||
#include <Storages/Statistics/StatisticsCountMinSketch.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
}
|
||||
|
||||
/// Constants chosen based on rolling dices.
|
||||
/// The values provides:
|
||||
/// 1. an error tolerance of 0.1% (ε = 0.001)
|
||||
/// 2. a confidence level of 99.9% (δ = 0.001).
|
||||
/// And sketch the size is 152kb.
|
||||
static constexpr auto num_hashes = 7uz;
|
||||
static constexpr auto num_buckets = 2718uz;
|
||||
|
||||
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
|
||||
: IStatistics(stat_)
|
||||
, sketch(num_hashes, num_buckets)
|
||||
, data_type(data_type_)
|
||||
{
|
||||
}
|
||||
|
||||
Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const
|
||||
{
|
||||
/// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc.
|
||||
/// Return null if val larger than the range of data_type
|
||||
///
|
||||
/// For example: if data_type is Int32:
|
||||
/// 1. For 1.0, 1, '1', return Field(1)
|
||||
/// 2. For 1.1, max_value_int64, return null
|
||||
Field val_converted = convertFieldToType(val, *data_type);
|
||||
if (val_converted.isNull())
|
||||
return 0;
|
||||
|
||||
if (data_type->isValueRepresentedByNumber())
|
||||
return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory());
|
||||
|
||||
if (isStringOrFixedString(data_type))
|
||||
return sketch.get_estimate(val.get<String>());
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName());
|
||||
}
|
||||
|
||||
void StatisticsCountMinSketch::update(const ColumnPtr & column)
|
||||
{
|
||||
for (size_t row = 0; row < column->size(); ++row)
|
||||
{
|
||||
if (column->isNullAt(row))
|
||||
continue;
|
||||
auto data = column->getDataAt(row);
|
||||
sketch.update(data.data, data.size, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void StatisticsCountMinSketch::serialize(WriteBuffer & buf)
|
||||
{
|
||||
Sketch::vector_bytes bytes = sketch.serialize();
|
||||
writeIntBinary(static_cast<UInt64>(bytes.size()), buf);
|
||||
buf.write(reinterpret_cast<const char *>(bytes.data()), bytes.size());
|
||||
}
|
||||
|
||||
void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
|
||||
{
|
||||
UInt64 size;
|
||||
readIntBinary(size, buf);
|
||||
|
||||
Sketch::vector_bytes bytes;
|
||||
bytes.resize(size); /// To avoid 'container-overflow' in AddressSanitizer checking
|
||||
buf.readStrict(reinterpret_cast<char *>(bytes.data()), size);
|
||||
|
||||
sketch = Sketch::deserialize(bytes.data(), size);
|
||||
}
|
||||
|
||||
|
||||
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
{
|
||||
data_type = removeNullable(data_type);
|
||||
data_type = removeLowCardinalityAndNullable(data_type);
|
||||
if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
|
||||
}
|
||||
|
||||
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
|
||||
{
|
||||
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
39
src/Storages/Statistics/StatisticsCountMinSketch.h
Normal file
39
src/Storages/Statistics/StatisticsCountMinSketch.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/Statistics/Statistics.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_DATASKETCHES
|
||||
|
||||
#include <count_min.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StatisticsCountMinSketch : public IStatistics
|
||||
{
|
||||
public:
|
||||
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
|
||||
|
||||
Float64 estimateEqual(const Field & val) const override;
|
||||
|
||||
void update(const ColumnPtr & column) override;
|
||||
|
||||
void serialize(WriteBuffer & buf) override;
|
||||
void deserialize(ReadBuffer & buf) override;
|
||||
|
||||
private:
|
||||
using Sketch = datasketches::count_min_sketch<UInt64>;
|
||||
Sketch sketch;
|
||||
|
||||
DataTypePtr data_type;
|
||||
};
|
||||
|
||||
|
||||
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,11 +1,13 @@
|
||||
#include <Storages/Statistics/StatisticsTDigest.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
extern const int ILLEGAL_STATISTICS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
|
||||
@ -16,12 +18,16 @@ StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
|
||||
void StatisticsTDigest::update(const ColumnPtr & column)
|
||||
{
|
||||
size_t rows = column->size();
|
||||
|
||||
for (size_t row = 0; row < rows; ++row)
|
||||
{
|
||||
/// TODO: support more types.
|
||||
Float64 value = column->getFloat64(row);
|
||||
t_digest.add(value, 1);
|
||||
Field field;
|
||||
column->get(row, field);
|
||||
|
||||
if (field.isNull())
|
||||
continue;
|
||||
|
||||
if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
|
||||
t_digest.add(*field_as_float, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,24 +41,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
|
||||
t_digest.deserialize(buf);
|
||||
}
|
||||
|
||||
Float64 StatisticsTDigest::estimateLess(Float64 val) const
|
||||
Float64 StatisticsTDigest::estimateLess(const Field & val) const
|
||||
{
|
||||
return t_digest.getCountLessThan(val);
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (val_as_float)
|
||||
return t_digest.getCountLessThan(*val_as_float);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
|
||||
}
|
||||
|
||||
Float64 StatisticsTDigest::estimateEqual(Float64 val) const
|
||||
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
|
||||
{
|
||||
return t_digest.getCountEqual(val);
|
||||
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
|
||||
if (val_as_float)
|
||||
return t_digest.getCountEqual(*val_as_float);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
|
||||
}
|
||||
|
||||
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
{
|
||||
data_type = removeNullable(data_type);
|
||||
data_type = removeLowCardinalityAndNullable(data_type);
|
||||
if (!data_type->isValueRepresentedByNumber())
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
|
||||
}
|
||||
|
||||
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
|
||||
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
|
||||
{
|
||||
return std::make_shared<StatisticsTDigest>(stat);
|
||||
}
|
||||
|
@ -16,14 +16,14 @@ public:
|
||||
void serialize(WriteBuffer & buf) override;
|
||||
void deserialize(ReadBuffer & buf) override;
|
||||
|
||||
Float64 estimateLess(Float64 val) const override;
|
||||
Float64 estimateEqual(Float64 val) const override;
|
||||
Float64 estimateLess(const Field & val) const override;
|
||||
Float64 estimateEqual(const Field & val) const override;
|
||||
|
||||
private:
|
||||
QuantileTDigest<Float64> t_digest;
|
||||
};
|
||||
|
||||
void TDigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr TDigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
|
||||
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Storages/Statistics/StatisticsUniq.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -51,14 +52,15 @@ UInt64 StatisticsUniq::estimateCardinality() const
|
||||
return column->getUInt(0);
|
||||
}
|
||||
|
||||
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
|
||||
{
|
||||
data_type = removeNullable(data_type);
|
||||
data_type = removeLowCardinalityAndNullable(data_type);
|
||||
if (!data_type->isValueRepresentedByNumber())
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
|
||||
}
|
||||
|
||||
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
|
||||
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
|
||||
{
|
||||
return std::make_shared<StatisticsUniq>(stat, data_type);
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ private:
|
||||
|
||||
};
|
||||
|
||||
void UniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr UniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
|
||||
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
|
||||
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,10 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Storages/Statistics/StatisticsTDigest.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
TEST(Statistics, TDigestLessThan)
|
||||
{
|
||||
@ -39,6 +43,4 @@ TEST(Statistics, TDigestLessThan)
|
||||
|
||||
std::reverse(data.begin(), data.end());
|
||||
test_less_than(data, {-1, 1e9, 50000.0, 3000.0, 30.0}, {0, 100000, 50000, 3000, 30}, {0, 0, 0.001, 0.001, 0.001});
|
||||
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user